document-dataply 0.0.4-alpha.4 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -7611,9 +7611,7 @@ var require_cjs = __commonJS({
7611
7611
  }
7612
7612
  const cached = this.cache.get(pageId);
7613
7613
  if (cached) {
7614
- const copy = new Uint8Array(this.pageSize);
7615
- copy.set(cached);
7616
- return copy;
7614
+ return cached;
7617
7615
  }
7618
7616
  const buffer = new Uint8Array(this.pageSize);
7619
7617
  const pageStartPos = pageId * this.pageSize;
@@ -8101,6 +8099,7 @@ var require_cjs = __commonJS({
8101
8099
  }
8102
8100
  }
8103
8101
  };
8102
+ var import_node_os = __toESM2(require("node:os"));
8104
8103
  var TextCodec = class _TextCodec {
8105
8104
  static TextEncoder = new TextEncoder();
8106
8105
  static TextDecoder = new TextDecoder();
@@ -8340,11 +8339,14 @@ var require_cjs = __commonJS({
8340
8339
  this.maxBodySize = this.pfs.pageSize - DataPageManager.CONSTANT.SIZE_PAGE_HEADER;
8341
8340
  this.order = this.getOptimalOrder(pfs.pageSize, IndexPageManager.CONSTANT.SIZE_KEY, IndexPageManager.CONSTANT.SIZE_VALUE);
8342
8341
  this.strategy = new RowIdentifierStrategy(this.order, pfs, txContext);
8342
+ const budget = import_node_os.default.freemem() * 0.1;
8343
+ const nodeMemory = this.order * 24 + 256;
8344
+ const capacity = Math.max(1e3, Math.min(1e6, Math.floor(budget / nodeMemory)));
8343
8345
  this.bptree = new BPTreeAsync2(
8344
8346
  this.strategy,
8345
8347
  new NumericComparator(),
8346
8348
  {
8347
- capacity: this.options.pageCacheCapacity
8349
+ capacity
8348
8350
  }
8349
8351
  );
8350
8352
  }
@@ -8711,6 +8713,30 @@ var require_cjs = __commonJS({
8711
8713
  }
8712
8714
  return this.fetchRowByRid(pk, rid, tx);
8713
8715
  }
8716
+ /**
8717
+ * Selects multiple rows by their PKs in a single B+ Tree traversal.
8718
+ * @param pks Array of PKs to look up
8719
+ * @param tx Transaction
8720
+ * @returns Array of raw data of the rows in the same order as input PKs
8721
+ */
8722
+ async selectMany(pks, tx) {
8723
+ if (pks.length === 0) {
8724
+ return [];
8725
+ }
8726
+ const minPk = Math.min(...pks);
8727
+ const maxPk = Math.max(...pks);
8728
+ const pkSet = new Set(pks);
8729
+ const resultMap = /* @__PURE__ */ new Map();
8730
+ const btx = await this.getBPTreeTransaction(tx);
8731
+ const stream = btx.whereStream({ gte: minPk, lte: maxPk });
8732
+ for await (const [rid, pk] of stream) {
8733
+ if (pkSet.has(pk)) {
8734
+ const rowData = await this.fetchRowByRid(pk, rid, tx);
8735
+ resultMap.set(pk, rowData);
8736
+ }
8737
+ }
8738
+ return pks.map((pk) => resultMap.get(pk) ?? null);
8739
+ }
8714
8740
  async fetchRowByRid(pk, rid, tx) {
8715
8741
  this.keyManager.setBufferFromKey(rid, this.ridBuffer);
8716
8742
  const pageId = this.keyManager.getPageId(this.ridBuffer);
@@ -9336,6 +9362,19 @@ var require_cjs = __commonJS({
9336
9362
  return this.textCodec.decode(data);
9337
9363
  }, tx);
9338
9364
  }
9365
+ async selectMany(pks, asRaw = false, tx) {
9366
+ if (!this.initialized) {
9367
+ throw new Error("Dataply instance is not initialized");
9368
+ }
9369
+ return this.runWithDefault(async (tx2) => {
9370
+ const results = await this.rowTableEngine.selectMany(pks, tx2);
9371
+ return results.map((data) => {
9372
+ if (data === null) return null;
9373
+ if (asRaw) return data;
9374
+ return this.textCodec.decode(data);
9375
+ });
9376
+ }, tx);
9377
+ }
9339
9378
  /**
9340
9379
  * Closes the dataply file.
9341
9380
  */
@@ -9424,6 +9463,9 @@ var require_cjs = __commonJS({
9424
9463
  async select(pk, asRaw = false, tx) {
9425
9464
  return this.api.select(pk, asRaw, tx);
9426
9465
  }
9466
+ async selectMany(pks, asRaw = false, tx) {
9467
+ return this.api.selectMany(pks, asRaw, tx);
9468
+ }
9427
9469
  /**
9428
9470
  * Closes the dataply file.
9429
9471
  */
@@ -9484,6 +9526,7 @@ __export(src_exports, {
9484
9526
  module.exports = __toCommonJS(src_exports);
9485
9527
 
9486
9528
  // src/core/document.ts
9529
+ var os = __toESM(require("node:os"));
9487
9530
  var import_dataply3 = __toESM(require_cjs());
9488
9531
 
9489
9532
  // src/core/bptree/documentStrategy.ts
@@ -9573,6 +9616,71 @@ async function catchPromise(promise) {
9573
9616
  return promise.then((res) => [void 0, res]).catch((reason) => [reason]);
9574
9617
  }
9575
9618
 
9619
+ // src/utils/heap.ts
9620
+ var BinaryHeap = class {
9621
+ constructor(comparator) {
9622
+ this.comparator = comparator;
9623
+ }
9624
+ heap = [];
9625
+ get size() {
9626
+ return this.heap.length;
9627
+ }
9628
+ peek() {
9629
+ return this.heap[0];
9630
+ }
9631
+ push(value) {
9632
+ this.heap.push(value);
9633
+ this.bubbleUp(this.heap.length - 1);
9634
+ }
9635
+ pop() {
9636
+ if (this.size === 0) return void 0;
9637
+ const top = this.heap[0];
9638
+ const bottom = this.heap.pop();
9639
+ if (this.size > 0) {
9640
+ this.heap[0] = bottom;
9641
+ this.sinkDown(0);
9642
+ }
9643
+ return top;
9644
+ }
9645
+ /**
9646
+ * Replace the root element with a new value and re-heapify.
9647
+ * Faster than pop() followed by push().
9648
+ */
9649
+ replace(value) {
9650
+ const top = this.heap[0];
9651
+ this.heap[0] = value;
9652
+ this.sinkDown(0);
9653
+ return top;
9654
+ }
9655
+ toArray() {
9656
+ return [...this.heap];
9657
+ }
9658
+ bubbleUp(index) {
9659
+ while (index > 0) {
9660
+ const parentIndex = Math.floor((index - 1) / 2);
9661
+ if (this.comparator(this.heap[index], this.heap[parentIndex]) >= 0) break;
9662
+ [this.heap[index], this.heap[parentIndex]] = [this.heap[parentIndex], this.heap[index]];
9663
+ index = parentIndex;
9664
+ }
9665
+ }
9666
+ sinkDown(index) {
9667
+ while (true) {
9668
+ let smallest = index;
9669
+ const left = 2 * index + 1;
9670
+ const right = 2 * index + 2;
9671
+ if (left < this.size && this.comparator(this.heap[left], this.heap[smallest]) < 0) {
9672
+ smallest = left;
9673
+ }
9674
+ if (right < this.size && this.comparator(this.heap[right], this.heap[smallest]) < 0) {
9675
+ smallest = right;
9676
+ }
9677
+ if (smallest === index) break;
9678
+ [this.heap[index], this.heap[smallest]] = [this.heap[smallest], this.heap[index]];
9679
+ index = smallest;
9680
+ }
9681
+ }
9682
+ };
9683
+
9576
9684
  // src/core/document.ts
9577
9685
  var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
9578
9686
  indices = {};
@@ -10196,7 +10304,10 @@ var DocumentDataply = class _DocumentDataply {
10196
10304
  } = options;
10197
10305
  const self = this;
10198
10306
  const stream = this.api.streamWithDefault(async function* (tx2) {
10199
- const keys = await self.getKeys(query, orderByField, sortOrder);
10307
+ const keySet = await self.getKeys(query, orderByField, sortOrder);
10308
+ const keys = new Uint32Array(keySet);
10309
+ const totalKeys = keys.length;
10310
+ if (totalKeys === 0) return;
10200
10311
  const selectivity = await self.getSelectivityCandidate(
10201
10312
  self.verboseQuery(query),
10202
10313
  orderByField
@@ -10205,14 +10316,72 @@ var DocumentDataply = class _DocumentDataply {
10205
10316
  if (selectivity) {
10206
10317
  selectivity.rollback();
10207
10318
  }
10319
+ let CHUNK_SIZE = 100;
10208
10320
  if (!isDriverOrderByField && orderByField) {
10321
+ const isTopK = limit !== Infinity;
10322
+ const heapSizeLimit = isTopK ? limit + offset : Infinity;
10323
+ const heapComparator = (a, b) => {
10324
+ const aVal = a[orderByField] ?? a._id;
10325
+ const bVal = b[orderByField] ?? b._id;
10326
+ const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
10327
+ return sortOrder === "asc" ? -cmp : cmp;
10328
+ };
10329
+ const heap = isTopK ? new BinaryHeap(heapComparator) : null;
10209
10330
  const results = [];
10210
- for (const key of keys) {
10211
- const stringified = await self.api.select(key, false, tx2);
10212
- if (!stringified) continue;
10213
- results.push(JSON.parse(stringified));
10331
+ let i = 0;
10332
+ const firstChunk = Array.from(keys.subarray(i, i + 100));
10333
+ const firstResults = await self.api.selectMany(firstChunk, false, tx2);
10334
+ let totalBytes = 0;
10335
+ let count = 0;
10336
+ for (const s of firstResults) {
10337
+ if (!s) continue;
10338
+ totalBytes += s.length;
10339
+ count++;
10340
+ const doc = JSON.parse(s);
10341
+ if (heap) {
10342
+ if (heap.size < heapSizeLimit) {
10343
+ heap.push(doc);
10344
+ } else if (heapComparator(doc, heap.peek()) > 0) {
10345
+ heap.replace(doc);
10346
+ }
10347
+ } else {
10348
+ results.push(doc);
10349
+ }
10350
+ }
10351
+ const avgSize = count > 0 ? totalBytes / count : 1024;
10352
+ CHUNK_SIZE = Math.max(32, Math.floor(os.freemem() * 0.1 / avgSize));
10353
+ i += firstChunk.length;
10354
+ let nextSortChunkPromise = null;
10355
+ if (i < totalKeys) {
10356
+ const nextChunk = Array.from(keys.subarray(i, i + CHUNK_SIZE));
10357
+ nextSortChunkPromise = self.api.selectMany(nextChunk, false, tx2);
10358
+ i += nextChunk.length;
10359
+ }
10360
+ while (nextSortChunkPromise) {
10361
+ const stringifiedResults = await nextSortChunkPromise;
10362
+ if (i < totalKeys) {
10363
+ const nextChunk = Array.from(keys.subarray(i, i + CHUNK_SIZE));
10364
+ nextSortChunkPromise = self.api.selectMany(nextChunk, false, tx2);
10365
+ i += nextChunk.length;
10366
+ } else {
10367
+ nextSortChunkPromise = null;
10368
+ }
10369
+ for (const stringified of stringifiedResults) {
10370
+ if (!stringified) continue;
10371
+ const doc = JSON.parse(stringified);
10372
+ if (heap) {
10373
+ if (heap.size < heapSizeLimit) {
10374
+ heap.push(doc);
10375
+ } else if (heapComparator(doc, heap.peek()) > 0) {
10376
+ heap.replace(doc);
10377
+ }
10378
+ } else {
10379
+ results.push(doc);
10380
+ }
10381
+ }
10214
10382
  }
10215
- results.sort((a, b) => {
10383
+ const finalResults = heap ? heap.toArray() : results;
10384
+ finalResults.sort((a, b) => {
10216
10385
  const aVal = a[orderByField] ?? a._id;
10217
10386
  const bVal = b[orderByField] ?? b._id;
10218
10387
  const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
@@ -10220,24 +10389,60 @@ var DocumentDataply = class _DocumentDataply {
10220
10389
  });
10221
10390
  const start = offset;
10222
10391
  const end = limit === Infinity ? void 0 : start + limit;
10223
- const limitedResults = results.slice(start, end);
10392
+ const limitedResults = finalResults.slice(start, end);
10224
10393
  for (const doc of limitedResults) {
10225
10394
  yield doc;
10226
10395
  }
10227
10396
  } else {
10228
- let i = 0;
10229
10397
  let yieldedCount = 0;
10230
- for (const key of keys) {
10398
+ let i = offset;
10399
+ if (i >= totalKeys || yieldedCount >= limit) return;
10400
+ const pksToFetchCount = Math.min(100, limit - yieldedCount);
10401
+ const firstChunk = Array.from(keys.subarray(i, i + pksToFetchCount));
10402
+ const firstResults = await self.api.selectMany(firstChunk, false, tx2);
10403
+ let totalBytes = 0;
10404
+ let count = 0;
10405
+ for (const s of firstResults) {
10406
+ if (!s) continue;
10407
+ yield JSON.parse(s);
10408
+ yieldedCount++;
10409
+ totalBytes += s.length;
10410
+ count++;
10231
10411
  if (yieldedCount >= limit) break;
10232
- if (i < offset) {
10233
- i++;
10234
- continue;
10412
+ }
10413
+ const avgSize = count > 0 ? totalBytes / count : 1024;
10414
+ CHUNK_SIZE = Math.max(32, Math.floor(os.freemem() * 0.1 / avgSize));
10415
+ i += firstChunk.length;
10416
+ let nextStreamChunkPromise = null;
10417
+ if (i < totalKeys && yieldedCount < limit) {
10418
+ const nextPksToFetchCount = Math.min(CHUNK_SIZE, limit - yieldedCount);
10419
+ const nextChunk = Array.from(keys.subarray(i, i + nextPksToFetchCount));
10420
+ nextStreamChunkPromise = self.api.selectMany(nextChunk, false, tx2);
10421
+ i += nextChunk.length;
10422
+ }
10423
+ while (nextStreamChunkPromise) {
10424
+ const stringifiedResults = await nextStreamChunkPromise;
10425
+ if (i < totalKeys && yieldedCount < limit) {
10426
+ const nextPksToFetchCount = Math.min(CHUNK_SIZE, limit - (yieldedCount + stringifiedResults.filter(Boolean).length));
10427
+ if (nextPksToFetchCount > 0) {
10428
+ const nextChunk = Array.from(keys.subarray(i, i + nextPksToFetchCount));
10429
+ nextStreamChunkPromise = self.api.selectMany(nextChunk, false, tx2);
10430
+ i += nextChunk.length;
10431
+ } else {
10432
+ nextStreamChunkPromise = null;
10433
+ }
10434
+ } else {
10435
+ nextStreamChunkPromise = null;
10436
+ }
10437
+ for (const stringified of stringifiedResults) {
10438
+ if (!stringified) continue;
10439
+ yield JSON.parse(stringified);
10440
+ yieldedCount++;
10441
+ if (yieldedCount >= limit) {
10442
+ nextStreamChunkPromise = null;
10443
+ break;
10444
+ }
10235
10445
  }
10236
- const stringified = await self.api.select(key, false, tx2);
10237
- if (!stringified) continue;
10238
- yield JSON.parse(stringified);
10239
- yieldedCount++;
10240
- i++;
10241
10446
  }
10242
10447
  }
10243
10448
  }, tx);
@@ -0,0 +1,21 @@
1
+ export type Comparator<T> = (a: T, b: T) => number;
2
+ /**
3
+ * A simple Binary Heap implementation.
4
+ */
5
+ export declare class BinaryHeap<T> {
6
+ private comparator;
7
+ private heap;
8
+ constructor(comparator: Comparator<T>);
9
+ get size(): number;
10
+ peek(): T | undefined;
11
+ push(value: T): void;
12
+ pop(): T | undefined;
13
+ /**
14
+ * Replace the root element with a new value and re-heapify.
15
+ * Faster than pop() followed by push().
16
+ */
17
+ replace(value: T): T | undefined;
18
+ toArray(): T[];
19
+ private bubbleUp;
20
+ private sinkDown;
21
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "document-dataply",
3
- "version": "0.0.4-alpha.4",
3
+ "version": "0.0.4",
4
4
  "description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
5
5
  "license": "MIT",
6
6
  "author": "izure <admin@izure.org>",
@@ -42,7 +42,7 @@
42
42
  "dataply"
43
43
  ],
44
44
  "dependencies": {
45
- "dataply": "^0.0.20-alpha.4"
45
+ "dataply": "^0.0.20"
46
46
  },
47
47
  "devDependencies": {
48
48
  "@types/jest": "^30.0.0",
@@ -51,4 +51,4 @@
51
51
  "ts-jest": "^29.4.6",
52
52
  "typescript": "^5.9.3"
53
53
  }
54
- }
54
+ }