document-dataply 0.0.3-alpha.1 → 0.0.3-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,3 +1,6 @@
1
+ ![node.js workflow](https://github.com/izure1/document-dataply/actions/workflows/node.js.yml/badge.svg)
2
+ ![License](https://img.shields.io/badge/license-MIT-blue.svg)
3
+
1
4
  # Document-Dataply
2
5
 
3
6
  > [!WARNING]
@@ -175,6 +178,27 @@ Returns a new `Transaction` object.
175
178
  ### `db.close()`
176
179
  Flushes changes and closes the database files.
177
180
 
181
+ ## Benchmark
182
+
183
+ The following benchmarks were conducted using the `npm run benchmark` command. The results show the performance of various operations on a dataset of **10,000** documents.
184
+
185
+ - **Package Version**: `0.0.3-alpha.1`
186
+ - **Batch Size**: 1,000 items per batch (for bulk operations)
187
+ - **Iterations**: 5 full lifecycle cycles
188
+
189
+ ### Benchmark Results
190
+
191
+ | Operation | Count | Avg Time (ms) | Ops/s | Min (ms) | Max (ms) | Remarks |
192
+ | :--- | :---: | :---: | :---: | :---: | :---: | :--- |
193
+ | **InsertBatch** | 10,000 | 6499.22 | **1,538** | 5891.53 | 7403.67 | 1000 items per batch |
194
+ | **Select** | 100 | 30.09 | **3,323** | 26.09 | 37.16 | Indexed Equality |
195
+ | **Partial Update** | 100 | 86.44 | **1,157** | 74.15 | 115.22 | Bulk Update |
196
+ | **Full Update** | 1 | 20.77 | **48** | 16.10 | 25.61 | Single Update |
197
+ | **Delete** | 100 | 5893.65 | **17** | 5523.06 | 6173.39 | Page Reclamation |
198
+
199
+ > [!NOTE]
200
+ > **Performance Analysis**: The `Delete (Bulk)` operation takes more time compared to other operations because it involves intensive index tree restructuring and physical **Page Reclamation (Garbage Collection)** logic to maintain storage efficiency after massive data removal.
201
+
178
202
  ## License
179
203
 
180
204
  MIT
package/dist/cjs/index.js CHANGED
@@ -7944,20 +7944,32 @@ var require_cjs = __commonJS({
7944
7944
  } else {
7945
7945
  let nextPageId = manager.getNextPageId(page);
7946
7946
  if (nextPageId !== -1) {
7947
- let pendingFreePageId = nextPageId;
7948
- while (pendingFreePageId !== -1) {
7949
- const pendingPage = await this.get(pendingFreePageId, tx);
7950
- const pendingManager = this.pageFactory.getManager(pendingPage);
7951
- const next = pendingManager.getNextPageId(pendingPage);
7952
- await this.setFreePage(pendingFreePageId, tx);
7953
- pendingFreePageId = next;
7954
- }
7947
+ await this.freeChain(nextPageId, tx);
7955
7948
  manager.setNextPageId(page, -1);
7956
7949
  await this.setPage(currentPageId, page, tx);
7957
7950
  }
7958
7951
  }
7959
7952
  }
7960
7953
  }
7954
+ /**
7955
+ * Free chain of pages.
7956
+ * @param startPageId Start page ID
7957
+ * @param tx Transaction
7958
+ */
7959
+ async freeChain(startPageId, tx) {
7960
+ let currentPageId = startPageId;
7961
+ const visited = /* @__PURE__ */ new Set();
7962
+ while (currentPageId !== -1 && currentPageId !== 0) {
7963
+ if (visited.has(currentPageId)) {
7964
+ break;
7965
+ }
7966
+ visited.add(currentPageId);
7967
+ const page = await this.get(currentPageId, tx);
7968
+ const nextPageId = this.pageFactory.getManager(page).getNextPageId(page);
7969
+ await this.setFreePage(currentPageId, tx);
7970
+ currentPageId = nextPageId;
7971
+ }
7972
+ }
7961
7973
  /**
7962
7974
  * Frees the page and marks it as available in the bitmap.
7963
7975
  * It also adds the page to the linked list of free pages in metadata.
@@ -7965,6 +7977,7 @@ var require_cjs = __commonJS({
7965
7977
  * @param tx Transaction
7966
7978
  */
7967
7979
  async setFreePage(pageId, tx) {
7980
+ if (pageId <= 0) return;
7968
7981
  await tx.__acquireWriteLock(0);
7969
7982
  await tx.__acquireWriteLock(pageId);
7970
7983
  const metadata = await this.getMetadata(tx);
@@ -8137,17 +8150,8 @@ var require_cjs = __commonJS({
8137
8150
  }
8138
8151
  async delete(id) {
8139
8152
  const tx = this.txContext.get();
8140
- const manager = this.factory.getManagerFromType(PageManager.CONSTANT.PAGE_TYPE_INDEX);
8141
- let pageId = +id;
8142
- while (true) {
8143
- const page = await this.pfs.get(pageId, tx);
8144
- const nextPageId = manager.getNextPageId(page);
8145
- await this.pfs.setFreePage(pageId, tx);
8146
- if (nextPageId === -1) {
8147
- break;
8148
- }
8149
- pageId = nextPageId;
8150
- }
8153
+ const pageId = +id;
8154
+ await this.pfs.freeChain(pageId, tx);
8151
8155
  }
8152
8156
  async readHead() {
8153
8157
  const tx = this.txContext.get();
@@ -8568,14 +8572,8 @@ var require_cjs = __commonJS({
8568
8572
  return;
8569
8573
  }
8570
8574
  if (this.rowManager.getOverflowFlag(row)) {
8571
- let overflowPageId = bytesToNumber(this.rowManager.getBody(row));
8572
- while (overflowPageId !== -1) {
8573
- const overflowPage = await this.pfs.get(overflowPageId, tx);
8574
- const manager = this.factory.getManager(overflowPage);
8575
- const nextPageId = manager.getNextPageId(overflowPage);
8576
- await this.pfs.setFreePage(overflowPageId, tx);
8577
- overflowPageId = nextPageId;
8578
- }
8575
+ const overflowPageId = bytesToNumber(this.rowManager.getBody(row));
8576
+ await this.pfs.freeChain(overflowPageId, tx);
8579
8577
  }
8580
8578
  this.rowManager.setDeletedFlag(row, true);
8581
8579
  await this.pfs.setPage(pageId, page, tx);
@@ -8606,7 +8604,7 @@ var require_cjs = __commonJS({
8606
8604
  }
8607
8605
  }
8608
8606
  if (allDeleted) {
8609
- await this.pfs.setFreePage(pageId, tx);
8607
+ await this.pfs.freeChain(pageId, tx);
8610
8608
  }
8611
8609
  }
8612
8610
  /**
@@ -9532,6 +9530,15 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
9532
9530
  return tx;
9533
9531
  });
9534
9532
  }
9533
+ async getDocument(pk, tx) {
9534
+ return this.runWithDefault(async (tx2) => {
9535
+ const row = await this.select(pk, false, tx2);
9536
+ if (!row) {
9537
+ throw new Error(`Document not found with PK: ${pk}`);
9538
+ }
9539
+ return JSON.parse(row);
9540
+ }, tx);
9541
+ }
9535
9542
  async readLock(fn) {
9536
9543
  let lockId;
9537
9544
  return this.lock.readLock(async (_lockId) => {
@@ -9550,15 +9557,6 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
9550
9557
  this.lock.writeUnlock(lockId);
9551
9558
  });
9552
9559
  }
9553
- async getDocument(pk, tx) {
9554
- return this.runWithDefault(async (tx2) => {
9555
- const row = await this.select(pk, false, tx2);
9556
- if (!row) {
9557
- throw new Error(`Document not found with PK: ${pk}`);
9558
- }
9559
- return JSON.parse(row);
9560
- }, tx);
9561
- }
9562
9560
  /**
9563
9561
  * Backfill indices for fields that were added with `true` option after data was inserted.
9564
9562
  * This method should be called after `init()` if you want to index existing documents
@@ -9796,9 +9794,16 @@ var DocumentDataply = class _DocumentDataply {
9796
9794
  const tree = this.api.trees.get(field);
9797
9795
  if (!tree) continue;
9798
9796
  const condition = query[field];
9799
- candidates.push({ tree, condition, field });
9797
+ const treeTx = await tree.createTransaction();
9798
+ candidates.push({ tree: treeTx, condition, field });
9800
9799
  }
9800
+ const rollback = () => {
9801
+ for (const { tree } of candidates) {
9802
+ tree.rollback();
9803
+ }
9804
+ };
9801
9805
  if (candidates.length === 0) {
9806
+ rollback();
9802
9807
  return null;
9803
9808
  }
9804
9809
  if (orderByField) {
@@ -9806,7 +9811,8 @@ var DocumentDataply = class _DocumentDataply {
9806
9811
  if (orderByCandidate) {
9807
9812
  return {
9808
9813
  driver: orderByCandidate,
9809
- others: candidates.filter((c) => c.field !== orderByField)
9814
+ others: candidates.filter((c) => c.field !== orderByField),
9815
+ rollback
9810
9816
  };
9811
9817
  }
9812
9818
  }
@@ -9817,9 +9823,41 @@ var DocumentDataply = class _DocumentDataply {
9817
9823
  if (!res) return null;
9818
9824
  return {
9819
9825
  driver: res,
9820
- others: candidates.filter((c) => c.tree !== res.tree)
9826
+ others: candidates.filter((c) => c.tree !== res.tree),
9827
+ rollback
9821
9828
  };
9822
9829
  }
9830
+ /**
9831
+ * Get Primary Keys based on query and index selection.
9832
+ * Internal common method to unify query optimization.
9833
+ */
9834
+ async getKeys(query, orderBy, sortOrder = "asc") {
9835
+ const isQueryEmpty = Object.keys(query).length === 0;
9836
+ const normalizedQuery = isQueryEmpty ? { _id: { gte: 0 } } : query;
9837
+ const verbose = this.verboseQuery(normalizedQuery);
9838
+ const selectivity = await this.getSelectivityCandidate(
9839
+ verbose,
9840
+ orderBy
9841
+ );
9842
+ if (!selectivity) return /* @__PURE__ */ new Set();
9843
+ const { driver, others, rollback } = selectivity;
9844
+ const isDriverOrderByField = orderBy === void 0 || driver.field === orderBy;
9845
+ if (isDriverOrderByField) {
9846
+ let keys = await driver.tree.keys(driver.condition, void 0, sortOrder);
9847
+ for (const { tree, condition } of others) {
9848
+ keys = await tree.keys(condition, keys, sortOrder);
9849
+ }
9850
+ rollback();
9851
+ return keys;
9852
+ } else {
9853
+ let keys = await driver.tree.keys(driver.condition, void 0);
9854
+ for (const { tree, condition } of others) {
9855
+ keys = await tree.keys(condition, keys);
9856
+ }
9857
+ rollback();
9858
+ return keys;
9859
+ }
9860
+ }
9823
9861
  async insertDocument(document, tx) {
9824
9862
  const metadata = await this.api.getDocumentInnerMetadata(tx);
9825
9863
  const id = ++metadata.lastId;
@@ -9897,7 +9935,10 @@ var DocumentDataply = class _DocumentDataply {
9897
9935
  console.error(`BPTree indexing failed for field: ${field}`, error);
9898
9936
  }
9899
9937
  }
9900
- await treeTx.commit();
9938
+ const res = await treeTx.commit();
9939
+ if (!res.success) {
9940
+ throw res.error;
9941
+ }
9901
9942
  }
9902
9943
  return ids;
9903
9944
  }, tx));
@@ -9910,37 +9951,42 @@ var DocumentDataply = class _DocumentDataply {
9910
9951
  * @returns The number of updated documents
9911
9952
  */
9912
9953
  async updateInternal(query, computeUpdatedDoc, tx) {
9913
- const idTree = this.api.trees.get("_id");
9914
- if (!idTree) {
9915
- throw new Error("ID tree not found");
9916
- }
9917
- const { stream } = this.select(query, {}, tx);
9954
+ const pks = await this.getKeys(query);
9918
9955
  let updatedCount = 0;
9919
- for await (const doc of stream) {
9920
- const id = doc._id;
9921
- let pk = null;
9922
- for await (const [entryPk] of idTree.whereStream({ primaryEqual: { v: id } })) {
9923
- pk = entryPk;
9924
- break;
9925
- }
9926
- if (pk === null) continue;
9956
+ const treeTxs = /* @__PURE__ */ new Map();
9957
+ for (const [field, tree] of this.api.trees) {
9958
+ treeTxs.set(field, await tree.createTransaction());
9959
+ }
9960
+ treeTxs.delete("_id");
9961
+ for (const pk of pks) {
9962
+ const doc = await this.api.getDocument(pk, tx);
9963
+ if (!doc) continue;
9927
9964
  const updatedDoc = computeUpdatedDoc(doc);
9928
9965
  const oldFlatDoc = this.api.flattenDocument(doc);
9929
9966
  const newFlatDoc = this.api.flattenDocument(updatedDoc);
9930
- for (const [field, tree] of this.api.trees) {
9967
+ for (const [field, treeTx] of treeTxs) {
9931
9968
  const oldV = oldFlatDoc[field];
9932
9969
  const newV = newFlatDoc[field];
9933
9970
  if (oldV === newV) continue;
9934
- if (oldV !== void 0) {
9935
- await tree.delete(pk, { k: pk, v: oldV });
9971
+ if (field in oldFlatDoc) {
9972
+ await treeTx.delete(pk, { k: pk, v: oldV });
9936
9973
  }
9937
- if (newV !== void 0) {
9938
- await tree.insert(pk, { k: pk, v: newV });
9974
+ if (field in newFlatDoc) {
9975
+ await treeTx.insert(pk, { k: pk, v: newV });
9939
9976
  }
9940
9977
  }
9941
9978
  await this.api.update(pk, JSON.stringify(updatedDoc), tx);
9942
9979
  updatedCount++;
9943
9980
  }
9981
+ for (const [field, treeTx] of treeTxs) {
9982
+ const result = await treeTx.commit();
9983
+ if (!result.success) {
9984
+ for (const rollbackTx of treeTxs.values()) {
9985
+ rollbackTx.rollback();
9986
+ }
9987
+ throw result.error;
9988
+ }
9989
+ }
9944
9990
  return updatedCount;
9945
9991
  }
9946
9992
  /**
@@ -9951,7 +9997,7 @@ var DocumentDataply = class _DocumentDataply {
9951
9997
  * @returns The number of updated documents
9952
9998
  */
9953
9999
  async fullUpdate(query, newRecord, tx) {
9954
- return this.api.writeLock(() => this.api.runWithDefault(async (tx2) => {
10000
+ return await this.api.writeLock(() => this.api.runWithDefault(async (tx2) => {
9955
10001
  return this.updateInternal(query, (doc) => {
9956
10002
  const newDoc = typeof newRecord === "function" ? newRecord(doc) : newRecord;
9957
10003
  return { _id: doc._id, ...newDoc };
@@ -9982,20 +10028,11 @@ var DocumentDataply = class _DocumentDataply {
9982
10028
  */
9983
10029
  async delete(query, tx) {
9984
10030
  return this.api.writeLock(() => this.api.runWithDefault(async (tx2) => {
9985
- const idTree = this.api.trees.get("_id");
9986
- if (!idTree) {
9987
- throw new Error("ID tree not found");
9988
- }
9989
- const { stream } = this.select(query, {}, tx2);
10031
+ const pks = await this.getKeys(query);
9990
10032
  let deletedCount = 0;
9991
- for await (const doc of stream) {
9992
- const id = doc._id;
9993
- let pk = null;
9994
- for await (const [entryPk] of idTree.whereStream({ primaryEqual: { v: id } })) {
9995
- pk = entryPk;
9996
- break;
9997
- }
9998
- if (pk === null) continue;
10033
+ for (const pk of pks) {
10034
+ const doc = await this.api.getDocument(pk, tx2);
10035
+ if (!doc) continue;
9999
10036
  const flatDoc = this.api.flattenDocument(doc);
10000
10037
  for (const [field, tree] of this.api.trees) {
10001
10038
  const v = flatDoc[field];
@@ -10028,47 +10065,30 @@ var DocumentDataply = class _DocumentDataply {
10028
10065
  }
10029
10066
  const {
10030
10067
  limit = Infinity,
10031
- sortOrder = "asc"
10068
+ sortOrder = "asc",
10069
+ orderBy: orderByField
10032
10070
  } = options;
10033
10071
  const self = this;
10034
10072
  const stream = this.api.streamWithDefault(async function* (tx2) {
10035
- const isQueryEmpty = Object.keys(query).length === 0;
10036
- const normalizedQuery = isQueryEmpty ? { _id: { gte: 0 } } : query;
10037
- const verbose = self.verboseQuery(normalizedQuery);
10073
+ const keys = await self.getKeys(query, orderByField, sortOrder);
10038
10074
  const selectivity = await self.getSelectivityCandidate(
10039
- verbose,
10040
- orderBy
10075
+ self.verboseQuery(query),
10076
+ orderByField
10041
10077
  );
10042
- if (!selectivity) return;
10043
- const { driver, others } = selectivity;
10044
- const isDriverOrderByField = orderBy === void 0 || driver.field === orderBy;
10045
- if (isDriverOrderByField) {
10046
- let keys = await driver.tree.keys(driver.condition, void 0, sortOrder);
10047
- for (const { tree, condition } of others) {
10048
- keys = await tree.keys(condition, keys, sortOrder);
10049
- }
10050
- let i = 0;
10051
- for (const key of keys) {
10052
- if (i >= limit) break;
10053
- const stringified = await self.api.select(key, false, tx2);
10054
- if (!stringified) continue;
10055
- yield JSON.parse(stringified);
10056
- i++;
10057
- }
10058
- } else {
10078
+ const isDriverOrderByField = orderByField === void 0 || selectivity && selectivity.driver.field === orderByField;
10079
+ if (selectivity) {
10080
+ selectivity.rollback();
10081
+ }
10082
+ if (!isDriverOrderByField && orderByField) {
10059
10083
  const results = [];
10060
- let keys = await driver.tree.keys(driver.condition, void 0);
10061
- for (const { tree, condition } of others) {
10062
- keys = await tree.keys(condition, keys);
10063
- }
10064
10084
  for (const key of keys) {
10065
10085
  const stringified = await self.api.select(key, false, tx2);
10066
10086
  if (!stringified) continue;
10067
10087
  results.push(JSON.parse(stringified));
10068
10088
  }
10069
10089
  results.sort((a, b) => {
10070
- const aVal = a[orderBy] ?? a._id;
10071
- const bVal = b[orderBy] ?? b._id;
10090
+ const aVal = a[orderByField] ?? a._id;
10091
+ const bVal = b[orderByField] ?? b._id;
10072
10092
  const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
10073
10093
  return sortOrder === "asc" ? cmp : -cmp;
10074
10094
  });
@@ -10076,6 +10096,15 @@ var DocumentDataply = class _DocumentDataply {
10076
10096
  for (const doc of limitedResults) {
10077
10097
  yield doc;
10078
10098
  }
10099
+ } else {
10100
+ let i = 0;
10101
+ for (const key of keys) {
10102
+ if (i >= limit) break;
10103
+ const stringified = await self.api.select(key, false, tx2);
10104
+ if (!stringified) continue;
10105
+ yield JSON.parse(stringified);
10106
+ i++;
10107
+ }
10079
10108
  }
10080
10109
  }, tx);
10081
10110
  const drain = async () => {
@@ -10,9 +10,9 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON, IC extends Index
10
10
  private pendingBackfillFields;
11
11
  private readonly lock;
12
12
  constructor(file: string, options: DocumentDataplyOptions<T, IC>);
13
+ getDocument(pk: number, tx?: Transaction): Promise<DataplyDocument<T>>;
13
14
  readLock<T>(fn: () => T): Promise<T>;
14
15
  writeLock<T>(fn: () => T): Promise<T>;
15
- getDocument(pk: number, tx?: Transaction): Promise<DataplyDocument<T>>;
16
16
  /**
17
17
  * Backfill indices for fields that were added with `true` option after data was inserted.
18
18
  * This method should be called after `init()` if you want to index existing documents
@@ -96,7 +96,13 @@ export declare class DocumentDataply<T extends DocumentJSON, IC extends IndexCon
96
96
  condition: Partial<DocumentDataplyCondition<U>>;
97
97
  field: string;
98
98
  }[];
99
+ rollback: () => void;
99
100
  } | null>;
101
+ /**
102
+ * Get Primary Keys based on query and index selection.
103
+ * Internal common method to unify query optimization.
104
+ */
105
+ private getKeys;
100
106
  private insertDocument;
101
107
  /**
102
108
  * Insert a document into the database
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "document-dataply",
3
- "version": "0.0.3-alpha.1",
3
+ "version": "0.0.3-alpha.2",
4
4
  "description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
5
5
  "license": "MIT",
6
6
  "author": "izure <admin@izure.org>",
@@ -23,6 +23,9 @@
23
23
  },
24
24
  "scripts": {
25
25
  "test": "jest -i",
26
+ "pretest": "esbuild test/wal_crash_sim.ts --bundle --platform=node --outfile=test/wal_crash_sim_bundled.js",
27
+ "benchmark": "npx tsx benchmark/MassiveInsert.bench.ts",
28
+ "prebenchmark": "npm run pretest",
26
29
  "build": "node build/index.js && tsc"
27
30
  },
28
31
  "keywords": [
@@ -39,7 +42,7 @@
39
42
  "dataply"
40
43
  ],
41
44
  "dependencies": {
42
- "dataply": "^0.0.18"
45
+ "dataply": "^0.0.19-alpha.0"
43
46
  },
44
47
  "devDependencies": {
45
48
  "@types/jest": "^30.0.0",