document-dataply 0.0.4-alpha.4 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +226 -21
- package/dist/types/utils/heap.d.ts +21 -0
- package/package.json +3 -3
package/dist/cjs/index.js
CHANGED
|
@@ -7611,9 +7611,7 @@ var require_cjs = __commonJS({
|
|
|
7611
7611
|
}
|
|
7612
7612
|
const cached = this.cache.get(pageId);
|
|
7613
7613
|
if (cached) {
|
|
7614
|
-
|
|
7615
|
-
copy.set(cached);
|
|
7616
|
-
return copy;
|
|
7614
|
+
return cached;
|
|
7617
7615
|
}
|
|
7618
7616
|
const buffer = new Uint8Array(this.pageSize);
|
|
7619
7617
|
const pageStartPos = pageId * this.pageSize;
|
|
@@ -8101,6 +8099,7 @@ var require_cjs = __commonJS({
|
|
|
8101
8099
|
}
|
|
8102
8100
|
}
|
|
8103
8101
|
};
|
|
8102
|
+
var import_node_os = __toESM2(require("node:os"));
|
|
8104
8103
|
var TextCodec = class _TextCodec {
|
|
8105
8104
|
static TextEncoder = new TextEncoder();
|
|
8106
8105
|
static TextDecoder = new TextDecoder();
|
|
@@ -8340,11 +8339,14 @@ var require_cjs = __commonJS({
|
|
|
8340
8339
|
this.maxBodySize = this.pfs.pageSize - DataPageManager.CONSTANT.SIZE_PAGE_HEADER;
|
|
8341
8340
|
this.order = this.getOptimalOrder(pfs.pageSize, IndexPageManager.CONSTANT.SIZE_KEY, IndexPageManager.CONSTANT.SIZE_VALUE);
|
|
8342
8341
|
this.strategy = new RowIdentifierStrategy(this.order, pfs, txContext);
|
|
8342
|
+
const budget = import_node_os.default.freemem() * 0.1;
|
|
8343
|
+
const nodeMemory = this.order * 24 + 256;
|
|
8344
|
+
const capacity = Math.max(1e3, Math.min(1e6, Math.floor(budget / nodeMemory)));
|
|
8343
8345
|
this.bptree = new BPTreeAsync2(
|
|
8344
8346
|
this.strategy,
|
|
8345
8347
|
new NumericComparator(),
|
|
8346
8348
|
{
|
|
8347
|
-
capacity
|
|
8349
|
+
capacity
|
|
8348
8350
|
}
|
|
8349
8351
|
);
|
|
8350
8352
|
}
|
|
@@ -8711,6 +8713,30 @@ var require_cjs = __commonJS({
|
|
|
8711
8713
|
}
|
|
8712
8714
|
return this.fetchRowByRid(pk, rid, tx);
|
|
8713
8715
|
}
|
|
8716
|
+
/**
|
|
8717
|
+
* Selects multiple rows by their PKs in a single B+ Tree traversal.
|
|
8718
|
+
* @param pks Array of PKs to look up
|
|
8719
|
+
* @param tx Transaction
|
|
8720
|
+
* @returns Array of raw data of the rows in the same order as input PKs
|
|
8721
|
+
*/
|
|
8722
|
+
async selectMany(pks, tx) {
|
|
8723
|
+
if (pks.length === 0) {
|
|
8724
|
+
return [];
|
|
8725
|
+
}
|
|
8726
|
+
const minPk = Math.min(...pks);
|
|
8727
|
+
const maxPk = Math.max(...pks);
|
|
8728
|
+
const pkSet = new Set(pks);
|
|
8729
|
+
const resultMap = /* @__PURE__ */ new Map();
|
|
8730
|
+
const btx = await this.getBPTreeTransaction(tx);
|
|
8731
|
+
const stream = btx.whereStream({ gte: minPk, lte: maxPk });
|
|
8732
|
+
for await (const [rid, pk] of stream) {
|
|
8733
|
+
if (pkSet.has(pk)) {
|
|
8734
|
+
const rowData = await this.fetchRowByRid(pk, rid, tx);
|
|
8735
|
+
resultMap.set(pk, rowData);
|
|
8736
|
+
}
|
|
8737
|
+
}
|
|
8738
|
+
return pks.map((pk) => resultMap.get(pk) ?? null);
|
|
8739
|
+
}
|
|
8714
8740
|
async fetchRowByRid(pk, rid, tx) {
|
|
8715
8741
|
this.keyManager.setBufferFromKey(rid, this.ridBuffer);
|
|
8716
8742
|
const pageId = this.keyManager.getPageId(this.ridBuffer);
|
|
@@ -9336,6 +9362,19 @@ var require_cjs = __commonJS({
|
|
|
9336
9362
|
return this.textCodec.decode(data);
|
|
9337
9363
|
}, tx);
|
|
9338
9364
|
}
|
|
9365
|
+
async selectMany(pks, asRaw = false, tx) {
|
|
9366
|
+
if (!this.initialized) {
|
|
9367
|
+
throw new Error("Dataply instance is not initialized");
|
|
9368
|
+
}
|
|
9369
|
+
return this.runWithDefault(async (tx2) => {
|
|
9370
|
+
const results = await this.rowTableEngine.selectMany(pks, tx2);
|
|
9371
|
+
return results.map((data) => {
|
|
9372
|
+
if (data === null) return null;
|
|
9373
|
+
if (asRaw) return data;
|
|
9374
|
+
return this.textCodec.decode(data);
|
|
9375
|
+
});
|
|
9376
|
+
}, tx);
|
|
9377
|
+
}
|
|
9339
9378
|
/**
|
|
9340
9379
|
* Closes the dataply file.
|
|
9341
9380
|
*/
|
|
@@ -9424,6 +9463,9 @@ var require_cjs = __commonJS({
|
|
|
9424
9463
|
async select(pk, asRaw = false, tx) {
|
|
9425
9464
|
return this.api.select(pk, asRaw, tx);
|
|
9426
9465
|
}
|
|
9466
|
+
async selectMany(pks, asRaw = false, tx) {
|
|
9467
|
+
return this.api.selectMany(pks, asRaw, tx);
|
|
9468
|
+
}
|
|
9427
9469
|
/**
|
|
9428
9470
|
* Closes the dataply file.
|
|
9429
9471
|
*/
|
|
@@ -9484,6 +9526,7 @@ __export(src_exports, {
|
|
|
9484
9526
|
module.exports = __toCommonJS(src_exports);
|
|
9485
9527
|
|
|
9486
9528
|
// src/core/document.ts
|
|
9529
|
+
var os = __toESM(require("node:os"));
|
|
9487
9530
|
var import_dataply3 = __toESM(require_cjs());
|
|
9488
9531
|
|
|
9489
9532
|
// src/core/bptree/documentStrategy.ts
|
|
@@ -9573,6 +9616,71 @@ async function catchPromise(promise) {
|
|
|
9573
9616
|
return promise.then((res) => [void 0, res]).catch((reason) => [reason]);
|
|
9574
9617
|
}
|
|
9575
9618
|
|
|
9619
|
+
// src/utils/heap.ts
|
|
9620
|
+
var BinaryHeap = class {
|
|
9621
|
+
constructor(comparator) {
|
|
9622
|
+
this.comparator = comparator;
|
|
9623
|
+
}
|
|
9624
|
+
heap = [];
|
|
9625
|
+
get size() {
|
|
9626
|
+
return this.heap.length;
|
|
9627
|
+
}
|
|
9628
|
+
peek() {
|
|
9629
|
+
return this.heap[0];
|
|
9630
|
+
}
|
|
9631
|
+
push(value) {
|
|
9632
|
+
this.heap.push(value);
|
|
9633
|
+
this.bubbleUp(this.heap.length - 1);
|
|
9634
|
+
}
|
|
9635
|
+
pop() {
|
|
9636
|
+
if (this.size === 0) return void 0;
|
|
9637
|
+
const top = this.heap[0];
|
|
9638
|
+
const bottom = this.heap.pop();
|
|
9639
|
+
if (this.size > 0) {
|
|
9640
|
+
this.heap[0] = bottom;
|
|
9641
|
+
this.sinkDown(0);
|
|
9642
|
+
}
|
|
9643
|
+
return top;
|
|
9644
|
+
}
|
|
9645
|
+
/**
|
|
9646
|
+
* Replace the root element with a new value and re-heapify.
|
|
9647
|
+
* Faster than pop() followed by push().
|
|
9648
|
+
*/
|
|
9649
|
+
replace(value) {
|
|
9650
|
+
const top = this.heap[0];
|
|
9651
|
+
this.heap[0] = value;
|
|
9652
|
+
this.sinkDown(0);
|
|
9653
|
+
return top;
|
|
9654
|
+
}
|
|
9655
|
+
toArray() {
|
|
9656
|
+
return [...this.heap];
|
|
9657
|
+
}
|
|
9658
|
+
bubbleUp(index) {
|
|
9659
|
+
while (index > 0) {
|
|
9660
|
+
const parentIndex = Math.floor((index - 1) / 2);
|
|
9661
|
+
if (this.comparator(this.heap[index], this.heap[parentIndex]) >= 0) break;
|
|
9662
|
+
[this.heap[index], this.heap[parentIndex]] = [this.heap[parentIndex], this.heap[index]];
|
|
9663
|
+
index = parentIndex;
|
|
9664
|
+
}
|
|
9665
|
+
}
|
|
9666
|
+
sinkDown(index) {
|
|
9667
|
+
while (true) {
|
|
9668
|
+
let smallest = index;
|
|
9669
|
+
const left = 2 * index + 1;
|
|
9670
|
+
const right = 2 * index + 2;
|
|
9671
|
+
if (left < this.size && this.comparator(this.heap[left], this.heap[smallest]) < 0) {
|
|
9672
|
+
smallest = left;
|
|
9673
|
+
}
|
|
9674
|
+
if (right < this.size && this.comparator(this.heap[right], this.heap[smallest]) < 0) {
|
|
9675
|
+
smallest = right;
|
|
9676
|
+
}
|
|
9677
|
+
if (smallest === index) break;
|
|
9678
|
+
[this.heap[index], this.heap[smallest]] = [this.heap[smallest], this.heap[index]];
|
|
9679
|
+
index = smallest;
|
|
9680
|
+
}
|
|
9681
|
+
}
|
|
9682
|
+
};
|
|
9683
|
+
|
|
9576
9684
|
// src/core/document.ts
|
|
9577
9685
|
var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
|
|
9578
9686
|
indices = {};
|
|
@@ -10196,7 +10304,10 @@ var DocumentDataply = class _DocumentDataply {
|
|
|
10196
10304
|
} = options;
|
|
10197
10305
|
const self = this;
|
|
10198
10306
|
const stream = this.api.streamWithDefault(async function* (tx2) {
|
|
10199
|
-
const
|
|
10307
|
+
const keySet = await self.getKeys(query, orderByField, sortOrder);
|
|
10308
|
+
const keys = new Uint32Array(keySet);
|
|
10309
|
+
const totalKeys = keys.length;
|
|
10310
|
+
if (totalKeys === 0) return;
|
|
10200
10311
|
const selectivity = await self.getSelectivityCandidate(
|
|
10201
10312
|
self.verboseQuery(query),
|
|
10202
10313
|
orderByField
|
|
@@ -10205,14 +10316,72 @@ var DocumentDataply = class _DocumentDataply {
|
|
|
10205
10316
|
if (selectivity) {
|
|
10206
10317
|
selectivity.rollback();
|
|
10207
10318
|
}
|
|
10319
|
+
let CHUNK_SIZE = 100;
|
|
10208
10320
|
if (!isDriverOrderByField && orderByField) {
|
|
10321
|
+
const isTopK = limit !== Infinity;
|
|
10322
|
+
const heapSizeLimit = isTopK ? limit + offset : Infinity;
|
|
10323
|
+
const heapComparator = (a, b) => {
|
|
10324
|
+
const aVal = a[orderByField] ?? a._id;
|
|
10325
|
+
const bVal = b[orderByField] ?? b._id;
|
|
10326
|
+
const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
|
|
10327
|
+
return sortOrder === "asc" ? -cmp : cmp;
|
|
10328
|
+
};
|
|
10329
|
+
const heap = isTopK ? new BinaryHeap(heapComparator) : null;
|
|
10209
10330
|
const results = [];
|
|
10210
|
-
|
|
10211
|
-
|
|
10212
|
-
|
|
10213
|
-
|
|
10331
|
+
let i = 0;
|
|
10332
|
+
const firstChunk = Array.from(keys.subarray(i, i + 100));
|
|
10333
|
+
const firstResults = await self.api.selectMany(firstChunk, false, tx2);
|
|
10334
|
+
let totalBytes = 0;
|
|
10335
|
+
let count = 0;
|
|
10336
|
+
for (const s of firstResults) {
|
|
10337
|
+
if (!s) continue;
|
|
10338
|
+
totalBytes += s.length;
|
|
10339
|
+
count++;
|
|
10340
|
+
const doc = JSON.parse(s);
|
|
10341
|
+
if (heap) {
|
|
10342
|
+
if (heap.size < heapSizeLimit) {
|
|
10343
|
+
heap.push(doc);
|
|
10344
|
+
} else if (heapComparator(doc, heap.peek()) > 0) {
|
|
10345
|
+
heap.replace(doc);
|
|
10346
|
+
}
|
|
10347
|
+
} else {
|
|
10348
|
+
results.push(doc);
|
|
10349
|
+
}
|
|
10350
|
+
}
|
|
10351
|
+
const avgSize = count > 0 ? totalBytes / count : 1024;
|
|
10352
|
+
CHUNK_SIZE = Math.max(32, Math.floor(os.freemem() * 0.1 / avgSize));
|
|
10353
|
+
i += firstChunk.length;
|
|
10354
|
+
let nextSortChunkPromise = null;
|
|
10355
|
+
if (i < totalKeys) {
|
|
10356
|
+
const nextChunk = Array.from(keys.subarray(i, i + CHUNK_SIZE));
|
|
10357
|
+
nextSortChunkPromise = self.api.selectMany(nextChunk, false, tx2);
|
|
10358
|
+
i += nextChunk.length;
|
|
10359
|
+
}
|
|
10360
|
+
while (nextSortChunkPromise) {
|
|
10361
|
+
const stringifiedResults = await nextSortChunkPromise;
|
|
10362
|
+
if (i < totalKeys) {
|
|
10363
|
+
const nextChunk = Array.from(keys.subarray(i, i + CHUNK_SIZE));
|
|
10364
|
+
nextSortChunkPromise = self.api.selectMany(nextChunk, false, tx2);
|
|
10365
|
+
i += nextChunk.length;
|
|
10366
|
+
} else {
|
|
10367
|
+
nextSortChunkPromise = null;
|
|
10368
|
+
}
|
|
10369
|
+
for (const stringified of stringifiedResults) {
|
|
10370
|
+
if (!stringified) continue;
|
|
10371
|
+
const doc = JSON.parse(stringified);
|
|
10372
|
+
if (heap) {
|
|
10373
|
+
if (heap.size < heapSizeLimit) {
|
|
10374
|
+
heap.push(doc);
|
|
10375
|
+
} else if (heapComparator(doc, heap.peek()) > 0) {
|
|
10376
|
+
heap.replace(doc);
|
|
10377
|
+
}
|
|
10378
|
+
} else {
|
|
10379
|
+
results.push(doc);
|
|
10380
|
+
}
|
|
10381
|
+
}
|
|
10214
10382
|
}
|
|
10215
|
-
|
|
10383
|
+
const finalResults = heap ? heap.toArray() : results;
|
|
10384
|
+
finalResults.sort((a, b) => {
|
|
10216
10385
|
const aVal = a[orderByField] ?? a._id;
|
|
10217
10386
|
const bVal = b[orderByField] ?? b._id;
|
|
10218
10387
|
const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
|
|
@@ -10220,24 +10389,60 @@ var DocumentDataply = class _DocumentDataply {
|
|
|
10220
10389
|
});
|
|
10221
10390
|
const start = offset;
|
|
10222
10391
|
const end = limit === Infinity ? void 0 : start + limit;
|
|
10223
|
-
const limitedResults =
|
|
10392
|
+
const limitedResults = finalResults.slice(start, end);
|
|
10224
10393
|
for (const doc of limitedResults) {
|
|
10225
10394
|
yield doc;
|
|
10226
10395
|
}
|
|
10227
10396
|
} else {
|
|
10228
|
-
let i = 0;
|
|
10229
10397
|
let yieldedCount = 0;
|
|
10230
|
-
|
|
10398
|
+
let i = offset;
|
|
10399
|
+
if (i >= totalKeys || yieldedCount >= limit) return;
|
|
10400
|
+
const pksToFetchCount = Math.min(100, limit - yieldedCount);
|
|
10401
|
+
const firstChunk = Array.from(keys.subarray(i, i + pksToFetchCount));
|
|
10402
|
+
const firstResults = await self.api.selectMany(firstChunk, false, tx2);
|
|
10403
|
+
let totalBytes = 0;
|
|
10404
|
+
let count = 0;
|
|
10405
|
+
for (const s of firstResults) {
|
|
10406
|
+
if (!s) continue;
|
|
10407
|
+
yield JSON.parse(s);
|
|
10408
|
+
yieldedCount++;
|
|
10409
|
+
totalBytes += s.length;
|
|
10410
|
+
count++;
|
|
10231
10411
|
if (yieldedCount >= limit) break;
|
|
10232
|
-
|
|
10233
|
-
|
|
10234
|
-
|
|
10412
|
+
}
|
|
10413
|
+
const avgSize = count > 0 ? totalBytes / count : 1024;
|
|
10414
|
+
CHUNK_SIZE = Math.max(32, Math.floor(os.freemem() * 0.1 / avgSize));
|
|
10415
|
+
i += firstChunk.length;
|
|
10416
|
+
let nextStreamChunkPromise = null;
|
|
10417
|
+
if (i < totalKeys && yieldedCount < limit) {
|
|
10418
|
+
const nextPksToFetchCount = Math.min(CHUNK_SIZE, limit - yieldedCount);
|
|
10419
|
+
const nextChunk = Array.from(keys.subarray(i, i + nextPksToFetchCount));
|
|
10420
|
+
nextStreamChunkPromise = self.api.selectMany(nextChunk, false, tx2);
|
|
10421
|
+
i += nextChunk.length;
|
|
10422
|
+
}
|
|
10423
|
+
while (nextStreamChunkPromise) {
|
|
10424
|
+
const stringifiedResults = await nextStreamChunkPromise;
|
|
10425
|
+
if (i < totalKeys && yieldedCount < limit) {
|
|
10426
|
+
const nextPksToFetchCount = Math.min(CHUNK_SIZE, limit - (yieldedCount + stringifiedResults.filter(Boolean).length));
|
|
10427
|
+
if (nextPksToFetchCount > 0) {
|
|
10428
|
+
const nextChunk = Array.from(keys.subarray(i, i + nextPksToFetchCount));
|
|
10429
|
+
nextStreamChunkPromise = self.api.selectMany(nextChunk, false, tx2);
|
|
10430
|
+
i += nextChunk.length;
|
|
10431
|
+
} else {
|
|
10432
|
+
nextStreamChunkPromise = null;
|
|
10433
|
+
}
|
|
10434
|
+
} else {
|
|
10435
|
+
nextStreamChunkPromise = null;
|
|
10436
|
+
}
|
|
10437
|
+
for (const stringified of stringifiedResults) {
|
|
10438
|
+
if (!stringified) continue;
|
|
10439
|
+
yield JSON.parse(stringified);
|
|
10440
|
+
yieldedCount++;
|
|
10441
|
+
if (yieldedCount >= limit) {
|
|
10442
|
+
nextStreamChunkPromise = null;
|
|
10443
|
+
break;
|
|
10444
|
+
}
|
|
10235
10445
|
}
|
|
10236
|
-
const stringified = await self.api.select(key, false, tx2);
|
|
10237
|
-
if (!stringified) continue;
|
|
10238
|
-
yield JSON.parse(stringified);
|
|
10239
|
-
yieldedCount++;
|
|
10240
|
-
i++;
|
|
10241
10446
|
}
|
|
10242
10447
|
}
|
|
10243
10448
|
}, tx);
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export type Comparator<T> = (a: T, b: T) => number;
|
|
2
|
+
/**
|
|
3
|
+
* A simple Binary Heap implementation.
|
|
4
|
+
*/
|
|
5
|
+
export declare class BinaryHeap<T> {
|
|
6
|
+
private comparator;
|
|
7
|
+
private heap;
|
|
8
|
+
constructor(comparator: Comparator<T>);
|
|
9
|
+
get size(): number;
|
|
10
|
+
peek(): T | undefined;
|
|
11
|
+
push(value: T): void;
|
|
12
|
+
pop(): T | undefined;
|
|
13
|
+
/**
|
|
14
|
+
* Replace the root element with a new value and re-heapify.
|
|
15
|
+
* Faster than pop() followed by push().
|
|
16
|
+
*/
|
|
17
|
+
replace(value: T): T | undefined;
|
|
18
|
+
toArray(): T[];
|
|
19
|
+
private bubbleUp;
|
|
20
|
+
private sinkDown;
|
|
21
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "document-dataply",
|
|
3
|
-
"version": "0.0.4
|
|
3
|
+
"version": "0.0.4",
|
|
4
4
|
"description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "izure <admin@izure.org>",
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
"dataply"
|
|
43
43
|
],
|
|
44
44
|
"dependencies": {
|
|
45
|
-
"dataply": "^0.0.20
|
|
45
|
+
"dataply": "^0.0.20"
|
|
46
46
|
},
|
|
47
47
|
"devDependencies": {
|
|
48
48
|
"@types/jest": "^30.0.0",
|
|
@@ -51,4 +51,4 @@
|
|
|
51
51
|
"ts-jest": "^29.4.6",
|
|
52
52
|
"typescript": "^5.9.3"
|
|
53
53
|
}
|
|
54
|
-
}
|
|
54
|
+
}
|