document-dataply 0.0.9 → 0.0.10-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -6478,65 +6478,22 @@ var require_cjs = __commonJS({
6478
6478
  }
6479
6479
  return (crc ^ -1) >>> 0;
6480
6480
  }
6481
- function calcThreshold(sortedGaps, n) {
6482
- const gLen = sortedGaps.length;
6483
- if (gLen === 0) return 0;
6484
- const median = sortedGaps[Math.floor(gLen * 0.5)];
6485
- const q1 = sortedGaps[Math.floor(gLen * 0.25)];
6486
- const q3 = sortedGaps[Math.floor(gLen * 0.75)];
6487
- const iqr = q3 - q1;
6488
- const logN = Math.max(1, Math.log10(n));
6489
- if (iqr > 0) {
6490
- const threshold2 = q3 + iqr * 1.5 * logN;
6491
- const minJump = Math.max(median * 5, 20);
6492
- return Math.max(threshold2, minJump);
6493
- }
6494
- const baseGap = median > 0 ? median : 1;
6495
- const p90 = sortedGaps[Math.floor(gLen * 0.9)];
6496
- if (p90 > baseGap) {
6497
- const threshold2 = baseGap + (p90 - baseGap) * 0.5 * logN;
6498
- return Math.max(threshold2, baseGap * 5, 20);
6499
- }
6500
- let mean = 0;
6501
- for (let i = 0; i < gLen; i++) mean += sortedGaps[i];
6502
- mean /= gLen;
6503
- let variance = 0;
6504
- for (let i = 0; i < gLen; i++) {
6505
- const d = sortedGaps[i] - mean;
6506
- variance += d * d;
6507
- }
6508
- const stddev = Math.sqrt(variance / gLen);
6509
- if (stddev === 0) {
6510
- return baseGap * 2;
6511
- }
6512
- const threshold = mean + stddev * logN;
6513
- return Math.max(threshold, baseGap * 5, 20);
6514
- }
6515
- function clusterNumbers(numbers, maxGap) {
6481
+ function clusterNumbersByPagination(numbers, pagingSize, startPageId = 0) {
6516
6482
  const n = numbers.length;
6517
6483
  if (n === 0) return [];
6518
6484
  if (n === 1) return [new Float64Array([numbers[0]])];
6519
6485
  const sorted = (numbers instanceof Float64Array ? numbers.slice() : Float64Array.from(numbers)).sort();
6520
- const gaps = new Float64Array(n - 1);
6521
- for (let i = 0, len = n - 1; i < len; i++) {
6522
- gaps[i] = sorted[i + 1] - sorted[i];
6523
- }
6524
- const sortedGaps = gaps.slice().sort();
6525
- let threshold;
6526
- if (maxGap !== void 0) {
6527
- threshold = maxGap;
6528
- } else {
6529
- threshold = calcThreshold(sortedGaps, n);
6530
- }
6531
6486
  const clusters = [];
6532
- let clusterStart = 0;
6487
+ let start = 0;
6533
6488
  for (let i = 0, len = n - 1; i < len; i++) {
6534
- if (gaps[i] > threshold) {
6535
- clusters.push(sorted.subarray(clusterStart, i + 1));
6536
- clusterStart = i + 1;
6489
+ const paginationIndex = Math.floor((sorted[i] - startPageId) / pagingSize);
6490
+ const nextPaginationIndex = Math.floor((sorted[i + 1] - startPageId) / pagingSize);
6491
+ if (paginationIndex !== nextPaginationIndex) {
6492
+ clusters.push(sorted.subarray(start, i + 1));
6493
+ start = i + 1;
6537
6494
  }
6538
6495
  }
6539
- clusters.push(sorted.subarray(clusterStart));
6496
+ clusters.push(sorted.subarray(start));
6540
6497
  return clusters;
6541
6498
  }
6542
6499
  var Row = class _Row {
@@ -9196,10 +9153,15 @@ var require_cjs = __commonJS({
9196
9153
  }
9197
9154
  const metadataPage = await this.pfs.getMetadata(tx);
9198
9155
  const manager = this.factory.getManagerFromType(MetadataPageManager.CONSTANT.PAGE_TYPE_METADATA);
9156
+ const pageSize = manager.getPageSize(metadataPage);
9157
+ const pageCount = manager.getPageCount(metadataPage);
9158
+ const rowCount = manager.getRowCount(metadataPage);
9159
+ const usage = pageCount / Math.pow(2, 32);
9199
9160
  return {
9200
- pageSize: manager.getPageSize(metadataPage),
9201
- pageCount: manager.getPageCount(metadataPage),
9202
- rowCount: manager.getRowCount(metadataPage)
9161
+ pageSize,
9162
+ pageCount,
9163
+ rowCount,
9164
+ usage
9203
9165
  };
9204
9166
  }
9205
9167
  /**
@@ -9470,19 +9432,34 @@ var require_cjs = __commonJS({
9470
9432
  * @returns Array of raw data of the rows in the same order as input PKs
9471
9433
  */
9472
9434
  async selectMany(pks, tx) {
9435
+ const collections = await this.collectItemsByPage(pks, tx);
9436
+ return this.fetchRowsByRids(collections, pks.length, tx);
9437
+ }
9438
+ /**
9439
+ * Collects items by page ID to minimize I/O.
9440
+ * @param pks Array of PKs to look up
9441
+ * @param tx Transaction
9442
+ * @returns Map of page ID to array of {pk, slotIndex, index} pairs
9443
+ */
9444
+ async collectItemsByPage(pks, tx) {
9473
9445
  if (pks.length === 0) {
9474
- return [];
9446
+ return /* @__PURE__ */ new Map();
9475
9447
  }
9476
9448
  const pkIndexMap = /* @__PURE__ */ new Map();
9477
9449
  for (let i = 0, len = pks.length; i < len; i++) {
9478
9450
  pkIndexMap.set(pks[i], i);
9479
9451
  }
9480
- const validCount = pks.length;
9481
- const pkArray = new Float64Array(validCount).fill(0);
9482
- const ridArray = new Float64Array(validCount).fill(0);
9483
- const indexArray = new Float64Array(validCount).fill(0);
9484
9452
  const btx = await this.getBPTreeTransaction(tx);
9485
- const clusters = clusterNumbers(pks, this.order / 2);
9453
+ const clusters = clusterNumbersByPagination(pks, this.order, 1);
9454
+ const collections = /* @__PURE__ */ new Map();
9455
+ const insertToCollections = (pk, rid, index) => {
9456
+ const slotIndex = rid % 65536;
9457
+ const pageId = Math.floor(rid / 65536);
9458
+ if (!collections.has(pageId)) {
9459
+ collections.set(pageId, []);
9460
+ }
9461
+ collections.get(pageId).push({ pk, slotIndex, index });
9462
+ };
9486
9463
  for (let i = 0, len = clusters.length; i < len; i++) {
9487
9464
  const cluster = clusters[i];
9488
9465
  const minPk = cluster[0];
@@ -9493,9 +9470,7 @@ var require_cjs = __commonJS({
9493
9470
  const rid = keys.values().next().value;
9494
9471
  const index = pkIndexMap.get(minPk);
9495
9472
  if (index !== void 0) {
9496
- pkArray[index] = minPk;
9497
- ridArray[index] = rid;
9498
- indexArray[index] = index;
9473
+ insertToCollections(minPk, rid, index);
9499
9474
  }
9500
9475
  }
9501
9476
  continue;
@@ -9504,13 +9479,11 @@ var require_cjs = __commonJS({
9504
9479
  for await (const [rid, pk] of stream) {
9505
9480
  const index = pkIndexMap.get(pk);
9506
9481
  if (index !== void 0) {
9507
- pkArray[index] = pk;
9508
- ridArray[index] = rid;
9509
- indexArray[index] = index;
9482
+ insertToCollections(pk, rid, index);
9510
9483
  }
9511
9484
  }
9512
9485
  }
9513
- return this.fetchRowsByRids(validCount, pkArray, ridArray, indexArray, tx);
9486
+ return collections;
9514
9487
  }
9515
9488
  /**
9516
9489
  * Fetches multiple rows by their RID and PK combinations, grouping by page ID to minimize I/O.
@@ -9518,25 +9491,12 @@ var require_cjs = __commonJS({
9518
9491
  * @param tx Transaction
9519
9492
  * @returns Array of row data in the same order as input PKs
9520
9493
  */
9521
- async fetchRowsByRids(validCount, pkArray, ridArray, indexArray, tx) {
9522
- const result = new Array(validCount).fill(null);
9523
- if (validCount === 0) return result;
9524
- const pageGroupMap = /* @__PURE__ */ new Map();
9525
- for (let i = 0; i < validCount; i++) {
9526
- const pk = pkArray[i];
9527
- const rid = ridArray[i];
9528
- const index = indexArray[i];
9529
- if (pk === 0 && rid === 0 && index === 0) continue;
9530
- const slotIndex = rid % 65536;
9531
- const pageId = Math.floor(rid / 65536);
9532
- if (!pageGroupMap.has(pageId)) {
9533
- pageGroupMap.set(pageId, []);
9534
- }
9535
- pageGroupMap.get(pageId).push({ pk, slotIndex, index });
9536
- }
9537
- const sortedPageIds = Array.from(pageGroupMap.keys()).sort((a, b) => a - b);
9494
+ async fetchRowsByRids(collections, itemsCount, tx) {
9495
+ const result = new Array(itemsCount).fill(null);
9496
+ if (itemsCount === 0) return result;
9497
+ const sortedPageIds = Array.from(collections.keys()).sort((a, b) => a - b);
9538
9498
  await Promise.all(sortedPageIds.map(async (pageId) => {
9539
- const items = pageGroupMap.get(pageId);
9499
+ const items = collections.get(pageId);
9540
9500
  const page = await this.pfs.get(pageId, tx);
9541
9501
  if (!this.factory.isDataPage(page)) {
9542
9502
  throw new Error(`Page ${pageId} is not a data page`);
@@ -10283,9 +10243,11 @@ var require_cjs = __commonJS({
10283
10243
  if (!this.initialized) {
10284
10244
  throw new Error("Dataply instance is not initialized");
10285
10245
  }
10286
- return this.hook.trigger("close", void 0, async () => {
10287
- await this.pfs.close();
10288
- import_node_fs3.default.closeSync(this.fileHandle);
10246
+ return this.runWithDefaultWrite(() => {
10247
+ return this.hook.trigger("close", void 0, async () => {
10248
+ await this.pfs.close();
10249
+ import_node_fs3.default.closeSync(this.fileHandle);
10250
+ });
10289
10251
  });
10290
10252
  }
10291
10253
  };
@@ -10502,38 +10464,50 @@ var DocumentSerializeStrategyAsync = class extends import_dataply.SerializeStrat
10502
10464
 
10503
10465
  // src/core/bptree/documentComparator.ts
10504
10466
  var import_dataply2 = __toESM(require_cjs());
10505
- function compareDiff(a, b) {
10506
- if (typeof a !== "string" && typeof b !== "string") {
10507
- return +a - +b;
10508
- }
10509
- return (a + "").localeCompare(b + "");
10510
- }
10511
- function compareValue(a, b) {
10512
- const aArr = Array.isArray(a);
10513
- const bArr = Array.isArray(b);
10514
- if (!aArr && !bArr) {
10515
- return compareDiff(a, b);
10516
- }
10517
- const aList = aArr ? a : [a];
10518
- const bList = bArr ? b : [b];
10519
- const len = Math.min(aList.length, bList.length);
10520
- for (let i = 0; i < len; i++) {
10521
- const diff = compareDiff(aList[i], bList[i]);
10522
- if (diff !== 0) return diff;
10523
- }
10524
- return 0;
10525
- }
10526
10467
  var DocumentValueComparator = class extends import_dataply2.ValueComparator {
10468
+ _intlComparator = new Intl.Collator(void 0, {
10469
+ numeric: true,
10470
+ sensitivity: "variant",
10471
+ usage: "sort"
10472
+ });
10527
10473
  primaryAsc(a, b) {
10528
- return compareValue(a.v, b.v);
10474
+ return this._compareValue(a.v, b.v);
10529
10475
  }
10530
10476
  asc(a, b) {
10531
- const diff = compareValue(a.v, b.v);
10477
+ const diff = this._compareValue(a.v, b.v);
10532
10478
  return diff === 0 ? a.k - b.k : diff;
10533
10479
  }
10534
10480
  match(value) {
10535
10481
  return value.v + "";
10536
10482
  }
10483
+ /**
10484
+ * 두 Primitive 값을 비교합니다.
10485
+ */
10486
+ _compareDiff(a, b) {
10487
+ if (typeof a === "string" && typeof b === "string") {
10488
+ return this._intlComparator.compare(a, b);
10489
+ }
10490
+ return +a - +b;
10491
+ }
10492
+ /**
10493
+ * 두 v 값을 비교합니다. v는 Primitive 또는 Primitive[] (복합 인덱스)일 수 있습니다.
10494
+ * 배열인 경우 element-by-element로 비교합니다.
10495
+ */
10496
+ _compareValue(a, b) {
10497
+ const aArr = Array.isArray(a);
10498
+ const bArr = Array.isArray(b);
10499
+ if (!aArr && !bArr) {
10500
+ return this._compareDiff(a, b);
10501
+ }
10502
+ const aList = aArr ? a : [a];
10503
+ const bList = bArr ? b : [b];
10504
+ const len = Math.min(aList.length, bList.length);
10505
+ for (let i = 0; i < len; i++) {
10506
+ const diff = this._compareDiff(aList[i], bList[i]);
10507
+ if (diff !== 0) return diff;
10508
+ }
10509
+ return 0;
10510
+ }
10537
10511
  };
10538
10512
 
10539
10513
  // src/utils/catchPromise.ts
@@ -12000,7 +11974,7 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
12000
11974
  orderBy: orderByField
12001
11975
  } = options;
12002
11976
  const self = this;
12003
- const stream = this.streamWithDefault(async function* (tx2) {
11977
+ const stream = () => this.streamWithDefault(async function* (tx2) {
12004
11978
  const ftsConditions = [];
12005
11979
  for (const field in query) {
12006
11980
  const q = query[field];
@@ -12102,7 +12076,7 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
12102
12076
  }, tx);
12103
12077
  const drain = async () => {
12104
12078
  const result = [];
12105
- for await (const document of stream) {
12079
+ for await (const document of stream()) {
12106
12080
  result.push(document);
12107
12081
  }
12108
12082
  return result;
@@ -1,7 +1,17 @@
1
1
  import type { DataplyTreeValue, Primitive } from '../../types';
2
2
  import { ValueComparator } from 'dataply';
3
3
  export declare class DocumentValueComparator<T extends DataplyTreeValue<U>, U extends Primitive> extends ValueComparator<T> {
4
+ private readonly _intlComparator;
4
5
  primaryAsc(a: T, b: T): number;
5
6
  asc(a: T, b: T): number;
6
7
  match(value: T): string;
8
+ /**
9
+ * 두 Primitive 값을 비교합니다.
10
+ */
11
+ private _compareDiff;
12
+ /**
13
+ * 두 v 값을 비교합니다. v는 Primitive 또는 Primitive[] (복합 인덱스)일 수 있습니다.
14
+ * 배열인 경우 element-by-element로 비교합니다.
15
+ */
16
+ private _compareValue;
7
17
  }
@@ -122,7 +122,7 @@ export declare class DocumentDataply<T extends DocumentJSON> {
122
122
  * @throws Error if query or orderBy contains non-indexed fields
123
123
  */
124
124
  select(query: Partial<DocumentDataplyQuery<T>>, options?: DocumentDataplyQueryOptions, tx?: Transaction): {
125
- stream: AsyncIterableIterator<DataplyDocument<T>>;
125
+ stream: () => AsyncIterableIterator<DataplyDocument<T>>;
126
126
  drain: () => Promise<DataplyDocument<T>[]>;
127
127
  };
128
128
  /**
@@ -290,7 +290,7 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON> extends DataplyA
290
290
  * @throws Error if query or orderBy contains non-indexed fields
291
291
  */
292
292
  selectDocuments(query: Partial<DocumentDataplyQuery<T>>, options?: DocumentDataplyQueryOptions, tx?: Transaction): {
293
- stream: AsyncIterableIterator<DataplyDocument<T>>;
293
+ stream: () => AsyncIterableIterator<DataplyDocument<T>>;
294
294
  drain: () => Promise<DataplyDocument<T>[]>;
295
295
  };
296
296
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "document-dataply",
3
- "version": "0.0.9",
3
+ "version": "0.0.10-alpha.1",
4
4
  "description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
5
5
  "license": "MIT",
6
6
  "author": "izure <admin@izure.org>",
@@ -42,7 +42,7 @@
42
42
  "dataply"
43
43
  ],
44
44
  "dependencies": {
45
- "dataply": "^0.0.24"
45
+ "dataply": "^0.0.25-alpha.1"
46
46
  },
47
47
  "devDependencies": {
48
48
  "@types/jest": "^30.0.0",
package/readme.md CHANGED
@@ -8,7 +8,7 @@
8
8
  > **This project is currently in the Alpha stage.**
9
9
  > APIs and internal structures may change significantly between versions. Use with caution in production environments.
10
10
 
11
- `document-dataply` is a **pure JavaScript** high-performance document-oriented database library built on top of the [`dataply`](https://github.com/izure1/dataply) record storage engine. It is designed to handle **millions of rows** with high stability, providing a structured way to store, index, and query JSON-style documents.
11
+ `document-dataply` is a **pure JavaScript** high-performance document-oriented database library built on top of the [`dataply`](https://github.com/izure1/dataply) record storage engine. It is designed to handle at least millions of documents with high stability, providing a structured way to store, index, and query JSON-style documents.
12
12
 
13
13
  ## Key Features
14
14
 
@@ -85,7 +85,7 @@ async function main() {
85
85
  // Get all results
86
86
  const allResults = await query.drain();
87
87
  // Or iterate through results
88
- for await (const doc of query.stream) {
88
+ for await (const doc of query.stream()) {
89
89
  console.log(doc);
90
90
  }
91
91
 
@@ -161,7 +161,7 @@ For detailed usage and error handling patterns, see the [Transaction Guide (TRAN
161
161
 
162
162
  ### Updating and Deleting
163
163
 
164
- `document-dataply` provides flexible ways to update or delete documents based on query results. All these operations are **Stream-based**, allowing you to handle millions of records without memory concerns.
164
+ `document-dataply` provides flexible ways to update or delete documents based on query results. All these operations are **Stream-based**, allowing you to handle at least millions of records without memory concerns.
165
165
 
166
166
  - **Partial Update**: Modify only specific fields or use a function for dynamic updates.
167
167
  - **Full Update**: Replace the entire document while preserving the original `_id`.
@@ -188,7 +188,7 @@ For more information on performance optimization and advanced features, see [TIP
188
188
 
189
189
  - **Query Optimization**: Automatic index selection for maximum performance.
190
190
  - **Sorting and Pagination**: Detailed usage of `limit`, `orderBy`, and `sortOrder`.
191
- - **Memory Management**: When to use `stream` vs `drain()`.
191
+ - **Memory Management**: When to use `stream()` vs `drain()`.
192
192
  - **Performance**: Optimizing bulk data insertion using `insertBatch`.
193
193
  - **Indexing Policies**: Dynamic index creation and automatic backfilling.
194
194
  - **Composite Indexes**: Indexing multiple fields for complex queries.
@@ -226,7 +226,7 @@ Inserts multiple documents efficiently. Returns an array of `_ids` (`number[]`).
226
226
  ### `db.select(query, options?, tx?)`
227
227
  Searches for documents matching the query. Passing an empty object (`{}`) as the `query` retrieves all documents.
228
228
  Returns an object `{ stream, drain }`.
229
- - `stream`: An async iterator to traverse results one by one.
229
+ - `stream()`: An async iterator to traverse results one by one.
230
230
  - `drain()`: A promise that resolves to an array of all matching documents.
231
231
 
232
232
  ### `db.partialUpdate(query, newFields, tx?)`