document-dataply 0.0.9 → 0.0.10-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js
CHANGED
|
@@ -6478,65 +6478,22 @@ var require_cjs = __commonJS({
|
|
|
6478
6478
|
}
|
|
6479
6479
|
return (crc ^ -1) >>> 0;
|
|
6480
6480
|
}
|
|
6481
|
-
function
|
|
6482
|
-
const gLen = sortedGaps.length;
|
|
6483
|
-
if (gLen === 0) return 0;
|
|
6484
|
-
const median = sortedGaps[Math.floor(gLen * 0.5)];
|
|
6485
|
-
const q1 = sortedGaps[Math.floor(gLen * 0.25)];
|
|
6486
|
-
const q3 = sortedGaps[Math.floor(gLen * 0.75)];
|
|
6487
|
-
const iqr = q3 - q1;
|
|
6488
|
-
const logN = Math.max(1, Math.log10(n));
|
|
6489
|
-
if (iqr > 0) {
|
|
6490
|
-
const threshold2 = q3 + iqr * 1.5 * logN;
|
|
6491
|
-
const minJump = Math.max(median * 5, 20);
|
|
6492
|
-
return Math.max(threshold2, minJump);
|
|
6493
|
-
}
|
|
6494
|
-
const baseGap = median > 0 ? median : 1;
|
|
6495
|
-
const p90 = sortedGaps[Math.floor(gLen * 0.9)];
|
|
6496
|
-
if (p90 > baseGap) {
|
|
6497
|
-
const threshold2 = baseGap + (p90 - baseGap) * 0.5 * logN;
|
|
6498
|
-
return Math.max(threshold2, baseGap * 5, 20);
|
|
6499
|
-
}
|
|
6500
|
-
let mean = 0;
|
|
6501
|
-
for (let i = 0; i < gLen; i++) mean += sortedGaps[i];
|
|
6502
|
-
mean /= gLen;
|
|
6503
|
-
let variance = 0;
|
|
6504
|
-
for (let i = 0; i < gLen; i++) {
|
|
6505
|
-
const d = sortedGaps[i] - mean;
|
|
6506
|
-
variance += d * d;
|
|
6507
|
-
}
|
|
6508
|
-
const stddev = Math.sqrt(variance / gLen);
|
|
6509
|
-
if (stddev === 0) {
|
|
6510
|
-
return baseGap * 2;
|
|
6511
|
-
}
|
|
6512
|
-
const threshold = mean + stddev * logN;
|
|
6513
|
-
return Math.max(threshold, baseGap * 5, 20);
|
|
6514
|
-
}
|
|
6515
|
-
function clusterNumbers(numbers, maxGap) {
|
|
6481
|
+
function clusterNumbersByPagination(numbers, pagingSize, startPageId = 0) {
|
|
6516
6482
|
const n = numbers.length;
|
|
6517
6483
|
if (n === 0) return [];
|
|
6518
6484
|
if (n === 1) return [new Float64Array([numbers[0]])];
|
|
6519
6485
|
const sorted = (numbers instanceof Float64Array ? numbers.slice() : Float64Array.from(numbers)).sort();
|
|
6520
|
-
const gaps = new Float64Array(n - 1);
|
|
6521
|
-
for (let i = 0, len = n - 1; i < len; i++) {
|
|
6522
|
-
gaps[i] = sorted[i + 1] - sorted[i];
|
|
6523
|
-
}
|
|
6524
|
-
const sortedGaps = gaps.slice().sort();
|
|
6525
|
-
let threshold;
|
|
6526
|
-
if (maxGap !== void 0) {
|
|
6527
|
-
threshold = maxGap;
|
|
6528
|
-
} else {
|
|
6529
|
-
threshold = calcThreshold(sortedGaps, n);
|
|
6530
|
-
}
|
|
6531
6486
|
const clusters = [];
|
|
6532
|
-
let
|
|
6487
|
+
let start = 0;
|
|
6533
6488
|
for (let i = 0, len = n - 1; i < len; i++) {
|
|
6534
|
-
|
|
6535
|
-
|
|
6536
|
-
|
|
6489
|
+
const paginationIndex = Math.floor((sorted[i] - startPageId) / pagingSize);
|
|
6490
|
+
const nextPaginationIndex = Math.floor((sorted[i + 1] - startPageId) / pagingSize);
|
|
6491
|
+
if (paginationIndex !== nextPaginationIndex) {
|
|
6492
|
+
clusters.push(sorted.subarray(start, i + 1));
|
|
6493
|
+
start = i + 1;
|
|
6537
6494
|
}
|
|
6538
6495
|
}
|
|
6539
|
-
clusters.push(sorted.subarray(
|
|
6496
|
+
clusters.push(sorted.subarray(start));
|
|
6540
6497
|
return clusters;
|
|
6541
6498
|
}
|
|
6542
6499
|
var Row = class _Row {
|
|
@@ -9470,19 +9427,34 @@ var require_cjs = __commonJS({
|
|
|
9470
9427
|
* @returns Array of raw data of the rows in the same order as input PKs
|
|
9471
9428
|
*/
|
|
9472
9429
|
async selectMany(pks, tx) {
|
|
9430
|
+
const collections = await this.collectItemsByPage(pks, tx);
|
|
9431
|
+
return this.fetchRowsByRids(collections, pks.length, tx);
|
|
9432
|
+
}
|
|
9433
|
+
/**
|
|
9434
|
+
* Collects items by page ID to minimize I/O.
|
|
9435
|
+
* @param pks Array of PKs to look up
|
|
9436
|
+
* @param tx Transaction
|
|
9437
|
+
* @returns Map of page ID to array of {pk, slotIndex, index} pairs
|
|
9438
|
+
*/
|
|
9439
|
+
async collectItemsByPage(pks, tx) {
|
|
9473
9440
|
if (pks.length === 0) {
|
|
9474
|
-
return
|
|
9441
|
+
return /* @__PURE__ */ new Map();
|
|
9475
9442
|
}
|
|
9476
9443
|
const pkIndexMap = /* @__PURE__ */ new Map();
|
|
9477
9444
|
for (let i = 0, len = pks.length; i < len; i++) {
|
|
9478
9445
|
pkIndexMap.set(pks[i], i);
|
|
9479
9446
|
}
|
|
9480
|
-
const validCount = pks.length;
|
|
9481
|
-
const pkArray = new Float64Array(validCount).fill(0);
|
|
9482
|
-
const ridArray = new Float64Array(validCount).fill(0);
|
|
9483
|
-
const indexArray = new Float64Array(validCount).fill(0);
|
|
9484
9447
|
const btx = await this.getBPTreeTransaction(tx);
|
|
9485
|
-
const clusters =
|
|
9448
|
+
const clusters = clusterNumbersByPagination(pks, this.order, 1);
|
|
9449
|
+
const collections = /* @__PURE__ */ new Map();
|
|
9450
|
+
const insertToCollections = (pk, rid, index) => {
|
|
9451
|
+
const slotIndex = rid % 65536;
|
|
9452
|
+
const pageId = Math.floor(rid / 65536);
|
|
9453
|
+
if (!collections.has(pageId)) {
|
|
9454
|
+
collections.set(pageId, []);
|
|
9455
|
+
}
|
|
9456
|
+
collections.get(pageId).push({ pk, slotIndex, index });
|
|
9457
|
+
};
|
|
9486
9458
|
for (let i = 0, len = clusters.length; i < len; i++) {
|
|
9487
9459
|
const cluster = clusters[i];
|
|
9488
9460
|
const minPk = cluster[0];
|
|
@@ -9493,9 +9465,7 @@ var require_cjs = __commonJS({
|
|
|
9493
9465
|
const rid = keys.values().next().value;
|
|
9494
9466
|
const index = pkIndexMap.get(minPk);
|
|
9495
9467
|
if (index !== void 0) {
|
|
9496
|
-
|
|
9497
|
-
ridArray[index] = rid;
|
|
9498
|
-
indexArray[index] = index;
|
|
9468
|
+
insertToCollections(minPk, rid, index);
|
|
9499
9469
|
}
|
|
9500
9470
|
}
|
|
9501
9471
|
continue;
|
|
@@ -9504,13 +9474,11 @@ var require_cjs = __commonJS({
|
|
|
9504
9474
|
for await (const [rid, pk] of stream) {
|
|
9505
9475
|
const index = pkIndexMap.get(pk);
|
|
9506
9476
|
if (index !== void 0) {
|
|
9507
|
-
|
|
9508
|
-
ridArray[index] = rid;
|
|
9509
|
-
indexArray[index] = index;
|
|
9477
|
+
insertToCollections(pk, rid, index);
|
|
9510
9478
|
}
|
|
9511
9479
|
}
|
|
9512
9480
|
}
|
|
9513
|
-
return
|
|
9481
|
+
return collections;
|
|
9514
9482
|
}
|
|
9515
9483
|
/**
|
|
9516
9484
|
* Fetches multiple rows by their RID and PK combinations, grouping by page ID to minimize I/O.
|
|
@@ -9518,25 +9486,12 @@ var require_cjs = __commonJS({
|
|
|
9518
9486
|
* @param tx Transaction
|
|
9519
9487
|
* @returns Array of row data in the same order as input PKs
|
|
9520
9488
|
*/
|
|
9521
|
-
async fetchRowsByRids(
|
|
9522
|
-
const result = new Array(
|
|
9523
|
-
if (
|
|
9524
|
-
const
|
|
9525
|
-
for (let i = 0; i < validCount; i++) {
|
|
9526
|
-
const pk = pkArray[i];
|
|
9527
|
-
const rid = ridArray[i];
|
|
9528
|
-
const index = indexArray[i];
|
|
9529
|
-
if (pk === 0 && rid === 0 && index === 0) continue;
|
|
9530
|
-
const slotIndex = rid % 65536;
|
|
9531
|
-
const pageId = Math.floor(rid / 65536);
|
|
9532
|
-
if (!pageGroupMap.has(pageId)) {
|
|
9533
|
-
pageGroupMap.set(pageId, []);
|
|
9534
|
-
}
|
|
9535
|
-
pageGroupMap.get(pageId).push({ pk, slotIndex, index });
|
|
9536
|
-
}
|
|
9537
|
-
const sortedPageIds = Array.from(pageGroupMap.keys()).sort((a, b) => a - b);
|
|
9489
|
+
async fetchRowsByRids(collections, itemsCount, tx) {
|
|
9490
|
+
const result = new Array(itemsCount).fill(null);
|
|
9491
|
+
if (itemsCount === 0) return result;
|
|
9492
|
+
const sortedPageIds = Array.from(collections.keys()).sort((a, b) => a - b);
|
|
9538
9493
|
await Promise.all(sortedPageIds.map(async (pageId) => {
|
|
9539
|
-
const items =
|
|
9494
|
+
const items = collections.get(pageId);
|
|
9540
9495
|
const page = await this.pfs.get(pageId, tx);
|
|
9541
9496
|
if (!this.factory.isDataPage(page)) {
|
|
9542
9497
|
throw new Error(`Page ${pageId} is not a data page`);
|
|
@@ -10502,38 +10457,50 @@ var DocumentSerializeStrategyAsync = class extends import_dataply.SerializeStrat
|
|
|
10502
10457
|
|
|
10503
10458
|
// src/core/bptree/documentComparator.ts
|
|
10504
10459
|
var import_dataply2 = __toESM(require_cjs());
|
|
10505
|
-
function compareDiff(a, b) {
|
|
10506
|
-
if (typeof a !== "string" && typeof b !== "string") {
|
|
10507
|
-
return +a - +b;
|
|
10508
|
-
}
|
|
10509
|
-
return (a + "").localeCompare(b + "");
|
|
10510
|
-
}
|
|
10511
|
-
function compareValue(a, b) {
|
|
10512
|
-
const aArr = Array.isArray(a);
|
|
10513
|
-
const bArr = Array.isArray(b);
|
|
10514
|
-
if (!aArr && !bArr) {
|
|
10515
|
-
return compareDiff(a, b);
|
|
10516
|
-
}
|
|
10517
|
-
const aList = aArr ? a : [a];
|
|
10518
|
-
const bList = bArr ? b : [b];
|
|
10519
|
-
const len = Math.min(aList.length, bList.length);
|
|
10520
|
-
for (let i = 0; i < len; i++) {
|
|
10521
|
-
const diff = compareDiff(aList[i], bList[i]);
|
|
10522
|
-
if (diff !== 0) return diff;
|
|
10523
|
-
}
|
|
10524
|
-
return 0;
|
|
10525
|
-
}
|
|
10526
10460
|
var DocumentValueComparator = class extends import_dataply2.ValueComparator {
|
|
10461
|
+
_intlComparator = new Intl.Collator(void 0, {
|
|
10462
|
+
numeric: true,
|
|
10463
|
+
sensitivity: "variant",
|
|
10464
|
+
usage: "sort"
|
|
10465
|
+
});
|
|
10527
10466
|
primaryAsc(a, b) {
|
|
10528
|
-
return
|
|
10467
|
+
return this._compareValue(a.v, b.v);
|
|
10529
10468
|
}
|
|
10530
10469
|
asc(a, b) {
|
|
10531
|
-
const diff =
|
|
10470
|
+
const diff = this._compareValue(a.v, b.v);
|
|
10532
10471
|
return diff === 0 ? a.k - b.k : diff;
|
|
10533
10472
|
}
|
|
10534
10473
|
match(value) {
|
|
10535
10474
|
return value.v + "";
|
|
10536
10475
|
}
|
|
10476
|
+
/**
|
|
10477
|
+
* 두 Primitive 값을 비교합니다.
|
|
10478
|
+
*/
|
|
10479
|
+
_compareDiff(a, b) {
|
|
10480
|
+
if (typeof a !== "string" && typeof b !== "string") {
|
|
10481
|
+
return +a - +b;
|
|
10482
|
+
}
|
|
10483
|
+
return this._intlComparator.compare(a + "", b + "");
|
|
10484
|
+
}
|
|
10485
|
+
/**
|
|
10486
|
+
* 두 v 값을 비교합니다. v는 Primitive 또는 Primitive[] (복합 인덱스)일 수 있습니다.
|
|
10487
|
+
* 배열인 경우 element-by-element로 비교합니다.
|
|
10488
|
+
*/
|
|
10489
|
+
_compareValue(a, b) {
|
|
10490
|
+
const aArr = Array.isArray(a);
|
|
10491
|
+
const bArr = Array.isArray(b);
|
|
10492
|
+
if (!aArr && !bArr) {
|
|
10493
|
+
return this._compareDiff(a, b);
|
|
10494
|
+
}
|
|
10495
|
+
const aList = aArr ? a : [a];
|
|
10496
|
+
const bList = bArr ? b : [b];
|
|
10497
|
+
const len = Math.min(aList.length, bList.length);
|
|
10498
|
+
for (let i = 0; i < len; i++) {
|
|
10499
|
+
const diff = this._compareDiff(aList[i], bList[i]);
|
|
10500
|
+
if (diff !== 0) return diff;
|
|
10501
|
+
}
|
|
10502
|
+
return 0;
|
|
10503
|
+
}
|
|
10537
10504
|
};
|
|
10538
10505
|
|
|
10539
10506
|
// src/utils/catchPromise.ts
|
|
@@ -12000,7 +11967,7 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
|
|
|
12000
11967
|
orderBy: orderByField
|
|
12001
11968
|
} = options;
|
|
12002
11969
|
const self = this;
|
|
12003
|
-
const stream = this.streamWithDefault(async function* (tx2) {
|
|
11970
|
+
const stream = () => this.streamWithDefault(async function* (tx2) {
|
|
12004
11971
|
const ftsConditions = [];
|
|
12005
11972
|
for (const field in query) {
|
|
12006
11973
|
const q = query[field];
|
|
@@ -12102,7 +12069,7 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
|
|
|
12102
12069
|
}, tx);
|
|
12103
12070
|
const drain = async () => {
|
|
12104
12071
|
const result = [];
|
|
12105
|
-
for await (const document of stream) {
|
|
12072
|
+
for await (const document of stream()) {
|
|
12106
12073
|
result.push(document);
|
|
12107
12074
|
}
|
|
12108
12075
|
return result;
|
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
import type { DataplyTreeValue, Primitive } from '../../types';
|
|
2
2
|
import { ValueComparator } from 'dataply';
|
|
3
3
|
export declare class DocumentValueComparator<T extends DataplyTreeValue<U>, U extends Primitive> extends ValueComparator<T> {
|
|
4
|
+
private readonly _intlComparator;
|
|
4
5
|
primaryAsc(a: T, b: T): number;
|
|
5
6
|
asc(a: T, b: T): number;
|
|
6
7
|
match(value: T): string;
|
|
8
|
+
/**
|
|
9
|
+
* 두 Primitive 값을 비교합니다.
|
|
10
|
+
*/
|
|
11
|
+
private _compareDiff;
|
|
12
|
+
/**
|
|
13
|
+
* 두 v 값을 비교합니다. v는 Primitive 또는 Primitive[] (복합 인덱스)일 수 있습니다.
|
|
14
|
+
* 배열인 경우 element-by-element로 비교합니다.
|
|
15
|
+
*/
|
|
16
|
+
private _compareValue;
|
|
7
17
|
}
|
|
@@ -122,7 +122,7 @@ export declare class DocumentDataply<T extends DocumentJSON> {
|
|
|
122
122
|
* @throws Error if query or orderBy contains non-indexed fields
|
|
123
123
|
*/
|
|
124
124
|
select(query: Partial<DocumentDataplyQuery<T>>, options?: DocumentDataplyQueryOptions, tx?: Transaction): {
|
|
125
|
-
stream: AsyncIterableIterator<DataplyDocument<T>>;
|
|
125
|
+
stream: () => AsyncIterableIterator<DataplyDocument<T>>;
|
|
126
126
|
drain: () => Promise<DataplyDocument<T>[]>;
|
|
127
127
|
};
|
|
128
128
|
/**
|
|
@@ -290,7 +290,7 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON> extends DataplyA
|
|
|
290
290
|
* @throws Error if query or orderBy contains non-indexed fields
|
|
291
291
|
*/
|
|
292
292
|
selectDocuments(query: Partial<DocumentDataplyQuery<T>>, options?: DocumentDataplyQueryOptions, tx?: Transaction): {
|
|
293
|
-
stream: AsyncIterableIterator<DataplyDocument<T>>;
|
|
293
|
+
stream: () => AsyncIterableIterator<DataplyDocument<T>>;
|
|
294
294
|
drain: () => Promise<DataplyDocument<T>[]>;
|
|
295
295
|
};
|
|
296
296
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "document-dataply",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.10-alpha.0",
|
|
4
4
|
"description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "izure <admin@izure.org>",
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
"dataply"
|
|
43
43
|
],
|
|
44
44
|
"dependencies": {
|
|
45
|
-
"dataply": "^0.0.
|
|
45
|
+
"dataply": "^0.0.25-alpha.0"
|
|
46
46
|
},
|
|
47
47
|
"devDependencies": {
|
|
48
48
|
"@types/jest": "^30.0.0",
|
|
@@ -51,4 +51,4 @@
|
|
|
51
51
|
"ts-jest": "^29.4.6",
|
|
52
52
|
"typescript": "^5.9.3"
|
|
53
53
|
}
|
|
54
|
-
}
|
|
54
|
+
}
|
package/readme.md
CHANGED
|
@@ -85,7 +85,7 @@ async function main() {
|
|
|
85
85
|
// Get all results
|
|
86
86
|
const allResults = await query.drain();
|
|
87
87
|
// Or iterate through results
|
|
88
|
-
for await (const doc of query.stream) {
|
|
88
|
+
for await (const doc of query.stream()) {
|
|
89
89
|
console.log(doc);
|
|
90
90
|
}
|
|
91
91
|
|
|
@@ -188,7 +188,7 @@ For more information on performance optimization and advanced features, see [TIP
|
|
|
188
188
|
|
|
189
189
|
- **Query Optimization**: Automatic index selection for maximum performance.
|
|
190
190
|
- **Sorting and Pagination**: Detailed usage of `limit`, `orderBy`, and `sortOrder`.
|
|
191
|
-
- **Memory Management**: When to use `stream` vs `drain()`.
|
|
191
|
+
- **Memory Management**: When to use `stream()` vs `drain()`.
|
|
192
192
|
- **Performance**: Optimizing bulk data insertion using `insertBatch`.
|
|
193
193
|
- **Indexing Policies**: Dynamic index creation and automatic backfilling.
|
|
194
194
|
- **Composite Indexes**: Indexing multiple fields for complex queries.
|
|
@@ -226,7 +226,7 @@ Inserts multiple documents efficiently. Returns an array of `_ids` (`number[]`).
|
|
|
226
226
|
### `db.select(query, options?, tx?)`
|
|
227
227
|
Searches for documents matching the query. Passing an empty object (`{}`) as the `query` retrieves all documents.
|
|
228
228
|
Returns an object `{ stream, drain }`.
|
|
229
|
-
- `stream`: An async iterator to traverse results one by one.
|
|
229
|
+
- `stream()`: An async iterator to traverse results one by one.
|
|
230
230
|
- `drain()`: A promise that resolves to an array of all matching documents.
|
|
231
231
|
|
|
232
232
|
### `db.partialUpdate(query, newFields, tx?)`
|