document-dataply 0.0.9 → 0.0.10-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -6478,65 +6478,22 @@ var require_cjs = __commonJS({
6478
6478
  }
6479
6479
  return (crc ^ -1) >>> 0;
6480
6480
  }
6481
- function calcThreshold(sortedGaps, n) {
6482
- const gLen = sortedGaps.length;
6483
- if (gLen === 0) return 0;
6484
- const median = sortedGaps[Math.floor(gLen * 0.5)];
6485
- const q1 = sortedGaps[Math.floor(gLen * 0.25)];
6486
- const q3 = sortedGaps[Math.floor(gLen * 0.75)];
6487
- const iqr = q3 - q1;
6488
- const logN = Math.max(1, Math.log10(n));
6489
- if (iqr > 0) {
6490
- const threshold2 = q3 + iqr * 1.5 * logN;
6491
- const minJump = Math.max(median * 5, 20);
6492
- return Math.max(threshold2, minJump);
6493
- }
6494
- const baseGap = median > 0 ? median : 1;
6495
- const p90 = sortedGaps[Math.floor(gLen * 0.9)];
6496
- if (p90 > baseGap) {
6497
- const threshold2 = baseGap + (p90 - baseGap) * 0.5 * logN;
6498
- return Math.max(threshold2, baseGap * 5, 20);
6499
- }
6500
- let mean = 0;
6501
- for (let i = 0; i < gLen; i++) mean += sortedGaps[i];
6502
- mean /= gLen;
6503
- let variance = 0;
6504
- for (let i = 0; i < gLen; i++) {
6505
- const d = sortedGaps[i] - mean;
6506
- variance += d * d;
6507
- }
6508
- const stddev = Math.sqrt(variance / gLen);
6509
- if (stddev === 0) {
6510
- return baseGap * 2;
6511
- }
6512
- const threshold = mean + stddev * logN;
6513
- return Math.max(threshold, baseGap * 5, 20);
6514
- }
6515
- function clusterNumbers(numbers, maxGap) {
6481
+ function clusterNumbersByPagination(numbers, pagingSize, startPageId = 0) {
6516
6482
  const n = numbers.length;
6517
6483
  if (n === 0) return [];
6518
6484
  if (n === 1) return [new Float64Array([numbers[0]])];
6519
6485
  const sorted = (numbers instanceof Float64Array ? numbers.slice() : Float64Array.from(numbers)).sort();
6520
- const gaps = new Float64Array(n - 1);
6521
- for (let i = 0, len = n - 1; i < len; i++) {
6522
- gaps[i] = sorted[i + 1] - sorted[i];
6523
- }
6524
- const sortedGaps = gaps.slice().sort();
6525
- let threshold;
6526
- if (maxGap !== void 0) {
6527
- threshold = maxGap;
6528
- } else {
6529
- threshold = calcThreshold(sortedGaps, n);
6530
- }
6531
6486
  const clusters = [];
6532
- let clusterStart = 0;
6487
+ let start = 0;
6533
6488
  for (let i = 0, len = n - 1; i < len; i++) {
6534
- if (gaps[i] > threshold) {
6535
- clusters.push(sorted.subarray(clusterStart, i + 1));
6536
- clusterStart = i + 1;
6489
+ const paginationIndex = Math.floor((sorted[i] - startPageId) / pagingSize);
6490
+ const nextPaginationIndex = Math.floor((sorted[i + 1] - startPageId) / pagingSize);
6491
+ if (paginationIndex !== nextPaginationIndex) {
6492
+ clusters.push(sorted.subarray(start, i + 1));
6493
+ start = i + 1;
6537
6494
  }
6538
6495
  }
6539
- clusters.push(sorted.subarray(clusterStart));
6496
+ clusters.push(sorted.subarray(start));
6540
6497
  return clusters;
6541
6498
  }
6542
6499
  var Row = class _Row {
@@ -9470,19 +9427,34 @@ var require_cjs = __commonJS({
9470
9427
  * @returns Array of raw data of the rows in the same order as input PKs
9471
9428
  */
9472
9429
  async selectMany(pks, tx) {
9430
+ const collections = await this.collectItemsByPage(pks, tx);
9431
+ return this.fetchRowsByRids(collections, pks.length, tx);
9432
+ }
9433
+ /**
9434
+ * Collects items by page ID to minimize I/O.
9435
+ * @param pks Array of PKs to look up
9436
+ * @param tx Transaction
9437
+ * @returns Map of page ID to array of {pk, slotIndex, index} pairs
9438
+ */
9439
+ async collectItemsByPage(pks, tx) {
9473
9440
  if (pks.length === 0) {
9474
- return [];
9441
+ return /* @__PURE__ */ new Map();
9475
9442
  }
9476
9443
  const pkIndexMap = /* @__PURE__ */ new Map();
9477
9444
  for (let i = 0, len = pks.length; i < len; i++) {
9478
9445
  pkIndexMap.set(pks[i], i);
9479
9446
  }
9480
- const validCount = pks.length;
9481
- const pkArray = new Float64Array(validCount).fill(0);
9482
- const ridArray = new Float64Array(validCount).fill(0);
9483
- const indexArray = new Float64Array(validCount).fill(0);
9484
9447
  const btx = await this.getBPTreeTransaction(tx);
9485
- const clusters = clusterNumbers(pks, this.order / 2);
9448
+ const clusters = clusterNumbersByPagination(pks, this.order, 1);
9449
+ const collections = /* @__PURE__ */ new Map();
9450
+ const insertToCollections = (pk, rid, index) => {
9451
+ const slotIndex = rid % 65536;
9452
+ const pageId = Math.floor(rid / 65536);
9453
+ if (!collections.has(pageId)) {
9454
+ collections.set(pageId, []);
9455
+ }
9456
+ collections.get(pageId).push({ pk, slotIndex, index });
9457
+ };
9486
9458
  for (let i = 0, len = clusters.length; i < len; i++) {
9487
9459
  const cluster = clusters[i];
9488
9460
  const minPk = cluster[0];
@@ -9493,9 +9465,7 @@ var require_cjs = __commonJS({
9493
9465
  const rid = keys.values().next().value;
9494
9466
  const index = pkIndexMap.get(minPk);
9495
9467
  if (index !== void 0) {
9496
- pkArray[index] = minPk;
9497
- ridArray[index] = rid;
9498
- indexArray[index] = index;
9468
+ insertToCollections(minPk, rid, index);
9499
9469
  }
9500
9470
  }
9501
9471
  continue;
@@ -9504,13 +9474,11 @@ var require_cjs = __commonJS({
9504
9474
  for await (const [rid, pk] of stream) {
9505
9475
  const index = pkIndexMap.get(pk);
9506
9476
  if (index !== void 0) {
9507
- pkArray[index] = pk;
9508
- ridArray[index] = rid;
9509
- indexArray[index] = index;
9477
+ insertToCollections(pk, rid, index);
9510
9478
  }
9511
9479
  }
9512
9480
  }
9513
- return this.fetchRowsByRids(validCount, pkArray, ridArray, indexArray, tx);
9481
+ return collections;
9514
9482
  }
9515
9483
  /**
9516
9484
  * Fetches multiple rows by their RID and PK combinations, grouping by page ID to minimize I/O.
@@ -9518,25 +9486,12 @@ var require_cjs = __commonJS({
9518
9486
  * @param tx Transaction
9519
9487
  * @returns Array of row data in the same order as input PKs
9520
9488
  */
9521
- async fetchRowsByRids(validCount, pkArray, ridArray, indexArray, tx) {
9522
- const result = new Array(validCount).fill(null);
9523
- if (validCount === 0) return result;
9524
- const pageGroupMap = /* @__PURE__ */ new Map();
9525
- for (let i = 0; i < validCount; i++) {
9526
- const pk = pkArray[i];
9527
- const rid = ridArray[i];
9528
- const index = indexArray[i];
9529
- if (pk === 0 && rid === 0 && index === 0) continue;
9530
- const slotIndex = rid % 65536;
9531
- const pageId = Math.floor(rid / 65536);
9532
- if (!pageGroupMap.has(pageId)) {
9533
- pageGroupMap.set(pageId, []);
9534
- }
9535
- pageGroupMap.get(pageId).push({ pk, slotIndex, index });
9536
- }
9537
- const sortedPageIds = Array.from(pageGroupMap.keys()).sort((a, b) => a - b);
9489
+ async fetchRowsByRids(collections, itemsCount, tx) {
9490
+ const result = new Array(itemsCount).fill(null);
9491
+ if (itemsCount === 0) return result;
9492
+ const sortedPageIds = Array.from(collections.keys()).sort((a, b) => a - b);
9538
9493
  await Promise.all(sortedPageIds.map(async (pageId) => {
9539
- const items = pageGroupMap.get(pageId);
9494
+ const items = collections.get(pageId);
9540
9495
  const page = await this.pfs.get(pageId, tx);
9541
9496
  if (!this.factory.isDataPage(page)) {
9542
9497
  throw new Error(`Page ${pageId} is not a data page`);
@@ -10502,38 +10457,50 @@ var DocumentSerializeStrategyAsync = class extends import_dataply.SerializeStrat
10502
10457
 
10503
10458
  // src/core/bptree/documentComparator.ts
10504
10459
  var import_dataply2 = __toESM(require_cjs());
10505
- function compareDiff(a, b) {
10506
- if (typeof a !== "string" && typeof b !== "string") {
10507
- return +a - +b;
10508
- }
10509
- return (a + "").localeCompare(b + "");
10510
- }
10511
- function compareValue(a, b) {
10512
- const aArr = Array.isArray(a);
10513
- const bArr = Array.isArray(b);
10514
- if (!aArr && !bArr) {
10515
- return compareDiff(a, b);
10516
- }
10517
- const aList = aArr ? a : [a];
10518
- const bList = bArr ? b : [b];
10519
- const len = Math.min(aList.length, bList.length);
10520
- for (let i = 0; i < len; i++) {
10521
- const diff = compareDiff(aList[i], bList[i]);
10522
- if (diff !== 0) return diff;
10523
- }
10524
- return 0;
10525
- }
10526
10460
  var DocumentValueComparator = class extends import_dataply2.ValueComparator {
10461
+ _intlComparator = new Intl.Collator(void 0, {
10462
+ numeric: true,
10463
+ sensitivity: "variant",
10464
+ usage: "sort"
10465
+ });
10527
10466
  primaryAsc(a, b) {
10528
- return compareValue(a.v, b.v);
10467
+ return this._compareValue(a.v, b.v);
10529
10468
  }
10530
10469
  asc(a, b) {
10531
- const diff = compareValue(a.v, b.v);
10470
+ const diff = this._compareValue(a.v, b.v);
10532
10471
  return diff === 0 ? a.k - b.k : diff;
10533
10472
  }
10534
10473
  match(value) {
10535
10474
  return value.v + "";
10536
10475
  }
10476
+ /**
10477
+ * 두 Primitive 값을 비교합니다.
10478
+ */
10479
+ _compareDiff(a, b) {
10480
+ if (typeof a !== "string" && typeof b !== "string") {
10481
+ return +a - +b;
10482
+ }
10483
+ return this._intlComparator.compare(a + "", b + "");
10484
+ }
10485
+ /**
10486
+ * 두 v 값을 비교합니다. v는 Primitive 또는 Primitive[] (복합 인덱스)일 수 있습니다.
10487
+ * 배열인 경우 element-by-element로 비교합니다.
10488
+ */
10489
+ _compareValue(a, b) {
10490
+ const aArr = Array.isArray(a);
10491
+ const bArr = Array.isArray(b);
10492
+ if (!aArr && !bArr) {
10493
+ return this._compareDiff(a, b);
10494
+ }
10495
+ const aList = aArr ? a : [a];
10496
+ const bList = bArr ? b : [b];
10497
+ const len = Math.min(aList.length, bList.length);
10498
+ for (let i = 0; i < len; i++) {
10499
+ const diff = this._compareDiff(aList[i], bList[i]);
10500
+ if (diff !== 0) return diff;
10501
+ }
10502
+ return 0;
10503
+ }
10537
10504
  };
10538
10505
 
10539
10506
  // src/utils/catchPromise.ts
@@ -12000,7 +11967,7 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
12000
11967
  orderBy: orderByField
12001
11968
  } = options;
12002
11969
  const self = this;
12003
- const stream = this.streamWithDefault(async function* (tx2) {
11970
+ const stream = () => this.streamWithDefault(async function* (tx2) {
12004
11971
  const ftsConditions = [];
12005
11972
  for (const field in query) {
12006
11973
  const q = query[field];
@@ -12102,7 +12069,7 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
12102
12069
  }, tx);
12103
12070
  const drain = async () => {
12104
12071
  const result = [];
12105
- for await (const document of stream) {
12072
+ for await (const document of stream()) {
12106
12073
  result.push(document);
12107
12074
  }
12108
12075
  return result;
@@ -1,7 +1,17 @@
1
1
  import type { DataplyTreeValue, Primitive } from '../../types';
2
2
  import { ValueComparator } from 'dataply';
3
3
  export declare class DocumentValueComparator<T extends DataplyTreeValue<U>, U extends Primitive> extends ValueComparator<T> {
4
+ private readonly _intlComparator;
4
5
  primaryAsc(a: T, b: T): number;
5
6
  asc(a: T, b: T): number;
6
7
  match(value: T): string;
8
+ /**
9
+ * 두 Primitive 값을 비교합니다.
10
+ */
11
+ private _compareDiff;
12
+ /**
13
+ * 두 v 값을 비교합니다. v는 Primitive 또는 Primitive[] (복합 인덱스)일 수 있습니다.
14
+ * 배열인 경우 element-by-element로 비교합니다.
15
+ */
16
+ private _compareValue;
7
17
  }
@@ -122,7 +122,7 @@ export declare class DocumentDataply<T extends DocumentJSON> {
122
122
  * @throws Error if query or orderBy contains non-indexed fields
123
123
  */
124
124
  select(query: Partial<DocumentDataplyQuery<T>>, options?: DocumentDataplyQueryOptions, tx?: Transaction): {
125
- stream: AsyncIterableIterator<DataplyDocument<T>>;
125
+ stream: () => AsyncIterableIterator<DataplyDocument<T>>;
126
126
  drain: () => Promise<DataplyDocument<T>[]>;
127
127
  };
128
128
  /**
@@ -290,7 +290,7 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON> extends DataplyA
290
290
  * @throws Error if query or orderBy contains non-indexed fields
291
291
  */
292
292
  selectDocuments(query: Partial<DocumentDataplyQuery<T>>, options?: DocumentDataplyQueryOptions, tx?: Transaction): {
293
- stream: AsyncIterableIterator<DataplyDocument<T>>;
293
+ stream: () => AsyncIterableIterator<DataplyDocument<T>>;
294
294
  drain: () => Promise<DataplyDocument<T>[]>;
295
295
  };
296
296
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "document-dataply",
3
- "version": "0.0.9",
3
+ "version": "0.0.10-alpha.0",
4
4
  "description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
5
5
  "license": "MIT",
6
6
  "author": "izure <admin@izure.org>",
@@ -42,7 +42,7 @@
42
42
  "dataply"
43
43
  ],
44
44
  "dependencies": {
45
- "dataply": "^0.0.24"
45
+ "dataply": "^0.0.25-alpha.0"
46
46
  },
47
47
  "devDependencies": {
48
48
  "@types/jest": "^30.0.0",
@@ -51,4 +51,4 @@
51
51
  "ts-jest": "^29.4.6",
52
52
  "typescript": "^5.9.3"
53
53
  }
54
- }
54
+ }
package/readme.md CHANGED
@@ -85,7 +85,7 @@ async function main() {
85
85
  // Get all results
86
86
  const allResults = await query.drain();
87
87
  // Or iterate through results
88
- for await (const doc of query.stream) {
88
+ for await (const doc of query.stream()) {
89
89
  console.log(doc);
90
90
  }
91
91
 
@@ -188,7 +188,7 @@ For more information on performance optimization and advanced features, see [TIP
188
188
 
189
189
  - **Query Optimization**: Automatic index selection for maximum performance.
190
190
  - **Sorting and Pagination**: Detailed usage of `limit`, `orderBy`, and `sortOrder`.
191
- - **Memory Management**: When to use `stream` vs `drain()`.
191
+ - **Memory Management**: When to use `stream()` vs `drain()`.
192
192
  - **Performance**: Optimizing bulk data insertion using `insertBatch`.
193
193
  - **Indexing Policies**: Dynamic index creation and automatic backfilling.
194
194
  - **Composite Indexes**: Indexing multiple fields for complex queries.
@@ -226,7 +226,7 @@ Inserts multiple documents efficiently. Returns an array of `_ids` (`number[]`).
226
226
  ### `db.select(query, options?, tx?)`
227
227
  Searches for documents matching the query. Passing an empty object (`{}`) as the `query` retrieves all documents.
228
228
  Returns an object `{ stream, drain }`.
229
- - `stream`: An async iterator to traverse results one by one.
229
+ - `stream()`: An async iterator to traverse results one by one.
230
230
  - `drain()`: A promise that resolves to an array of all matching documents.
231
231
 
232
232
  ### `db.partialUpdate(query, newFields, tx?)`