dataply 0.0.24 → 0.0.25-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js
CHANGED
|
@@ -6463,65 +6463,22 @@ function crc32(buf) {
|
|
|
6463
6463
|
}
|
|
6464
6464
|
|
|
6465
6465
|
// src/utils/array.ts
|
|
6466
|
-
function
|
|
6467
|
-
const gLen = sortedGaps.length;
|
|
6468
|
-
if (gLen === 0) return 0;
|
|
6469
|
-
const median = sortedGaps[Math.floor(gLen * 0.5)];
|
|
6470
|
-
const q1 = sortedGaps[Math.floor(gLen * 0.25)];
|
|
6471
|
-
const q3 = sortedGaps[Math.floor(gLen * 0.75)];
|
|
6472
|
-
const iqr = q3 - q1;
|
|
6473
|
-
const logN = Math.max(1, Math.log10(n));
|
|
6474
|
-
if (iqr > 0) {
|
|
6475
|
-
const threshold2 = q3 + iqr * 1.5 * logN;
|
|
6476
|
-
const minJump = Math.max(median * 5, 20);
|
|
6477
|
-
return Math.max(threshold2, minJump);
|
|
6478
|
-
}
|
|
6479
|
-
const baseGap = median > 0 ? median : 1;
|
|
6480
|
-
const p90 = sortedGaps[Math.floor(gLen * 0.9)];
|
|
6481
|
-
if (p90 > baseGap) {
|
|
6482
|
-
const threshold2 = baseGap + (p90 - baseGap) * 0.5 * logN;
|
|
6483
|
-
return Math.max(threshold2, baseGap * 5, 20);
|
|
6484
|
-
}
|
|
6485
|
-
let mean = 0;
|
|
6486
|
-
for (let i = 0; i < gLen; i++) mean += sortedGaps[i];
|
|
6487
|
-
mean /= gLen;
|
|
6488
|
-
let variance = 0;
|
|
6489
|
-
for (let i = 0; i < gLen; i++) {
|
|
6490
|
-
const d = sortedGaps[i] - mean;
|
|
6491
|
-
variance += d * d;
|
|
6492
|
-
}
|
|
6493
|
-
const stddev = Math.sqrt(variance / gLen);
|
|
6494
|
-
if (stddev === 0) {
|
|
6495
|
-
return baseGap * 2;
|
|
6496
|
-
}
|
|
6497
|
-
const threshold = mean + stddev * logN;
|
|
6498
|
-
return Math.max(threshold, baseGap * 5, 20);
|
|
6499
|
-
}
|
|
6500
|
-
function clusterNumbers(numbers, maxGap) {
|
|
6466
|
+
function clusterNumbersByPagination(numbers, pagingSize, startPageId = 0) {
|
|
6501
6467
|
const n = numbers.length;
|
|
6502
6468
|
if (n === 0) return [];
|
|
6503
6469
|
if (n === 1) return [new Float64Array([numbers[0]])];
|
|
6504
6470
|
const sorted = (numbers instanceof Float64Array ? numbers.slice() : Float64Array.from(numbers)).sort();
|
|
6505
|
-
const gaps = new Float64Array(n - 1);
|
|
6506
|
-
for (let i = 0, len = n - 1; i < len; i++) {
|
|
6507
|
-
gaps[i] = sorted[i + 1] - sorted[i];
|
|
6508
|
-
}
|
|
6509
|
-
const sortedGaps = gaps.slice().sort();
|
|
6510
|
-
let threshold;
|
|
6511
|
-
if (maxGap !== void 0) {
|
|
6512
|
-
threshold = maxGap;
|
|
6513
|
-
} else {
|
|
6514
|
-
threshold = calcThreshold(sortedGaps, n);
|
|
6515
|
-
}
|
|
6516
6471
|
const clusters = [];
|
|
6517
|
-
let
|
|
6472
|
+
let start = 0;
|
|
6518
6473
|
for (let i = 0, len = n - 1; i < len; i++) {
|
|
6519
|
-
|
|
6520
|
-
|
|
6521
|
-
|
|
6474
|
+
const paginationIndex = Math.floor((sorted[i] - startPageId) / pagingSize);
|
|
6475
|
+
const nextPaginationIndex = Math.floor((sorted[i + 1] - startPageId) / pagingSize);
|
|
6476
|
+
if (paginationIndex !== nextPaginationIndex) {
|
|
6477
|
+
clusters.push(sorted.subarray(start, i + 1));
|
|
6478
|
+
start = i + 1;
|
|
6522
6479
|
}
|
|
6523
6480
|
}
|
|
6524
|
-
clusters.push(sorted.subarray(
|
|
6481
|
+
clusters.push(sorted.subarray(start));
|
|
6525
6482
|
return clusters;
|
|
6526
6483
|
}
|
|
6527
6484
|
|
|
@@ -9205,10 +9162,15 @@ var RowTableEngine = class {
|
|
|
9205
9162
|
}
|
|
9206
9163
|
const metadataPage = await this.pfs.getMetadata(tx);
|
|
9207
9164
|
const manager = this.factory.getManagerFromType(MetadataPageManager.CONSTANT.PAGE_TYPE_METADATA);
|
|
9165
|
+
const pageSize = manager.getPageSize(metadataPage);
|
|
9166
|
+
const pageCount = manager.getPageCount(metadataPage);
|
|
9167
|
+
const rowCount = manager.getRowCount(metadataPage);
|
|
9168
|
+
const usage = pageCount / Math.pow(2, 32);
|
|
9208
9169
|
return {
|
|
9209
|
-
pageSize
|
|
9210
|
-
pageCount
|
|
9211
|
-
rowCount
|
|
9170
|
+
pageSize,
|
|
9171
|
+
pageCount,
|
|
9172
|
+
rowCount,
|
|
9173
|
+
usage
|
|
9212
9174
|
};
|
|
9213
9175
|
}
|
|
9214
9176
|
/**
|
|
@@ -9479,19 +9441,34 @@ var RowTableEngine = class {
|
|
|
9479
9441
|
* @returns Array of raw data of the rows in the same order as input PKs
|
|
9480
9442
|
*/
|
|
9481
9443
|
async selectMany(pks, tx) {
|
|
9444
|
+
const collections = await this.collectItemsByPage(pks, tx);
|
|
9445
|
+
return this.fetchRowsByRids(collections, pks.length, tx);
|
|
9446
|
+
}
|
|
9447
|
+
/**
|
|
9448
|
+
* Collects items by page ID to minimize I/O.
|
|
9449
|
+
* @param pks Array of PKs to look up
|
|
9450
|
+
* @param tx Transaction
|
|
9451
|
+
* @returns Map of page ID to array of {pk, slotIndex, index} pairs
|
|
9452
|
+
*/
|
|
9453
|
+
async collectItemsByPage(pks, tx) {
|
|
9482
9454
|
if (pks.length === 0) {
|
|
9483
|
-
return
|
|
9455
|
+
return /* @__PURE__ */ new Map();
|
|
9484
9456
|
}
|
|
9485
9457
|
const pkIndexMap = /* @__PURE__ */ new Map();
|
|
9486
9458
|
for (let i = 0, len = pks.length; i < len; i++) {
|
|
9487
9459
|
pkIndexMap.set(pks[i], i);
|
|
9488
9460
|
}
|
|
9489
|
-
const validCount = pks.length;
|
|
9490
|
-
const pkArray = new Float64Array(validCount).fill(0);
|
|
9491
|
-
const ridArray = new Float64Array(validCount).fill(0);
|
|
9492
|
-
const indexArray = new Float64Array(validCount).fill(0);
|
|
9493
9461
|
const btx = await this.getBPTreeTransaction(tx);
|
|
9494
|
-
const clusters =
|
|
9462
|
+
const clusters = clusterNumbersByPagination(pks, this.order, 1);
|
|
9463
|
+
const collections = /* @__PURE__ */ new Map();
|
|
9464
|
+
const insertToCollections = (pk, rid, index) => {
|
|
9465
|
+
const slotIndex = rid % 65536;
|
|
9466
|
+
const pageId = Math.floor(rid / 65536);
|
|
9467
|
+
if (!collections.has(pageId)) {
|
|
9468
|
+
collections.set(pageId, []);
|
|
9469
|
+
}
|
|
9470
|
+
collections.get(pageId).push({ pk, slotIndex, index });
|
|
9471
|
+
};
|
|
9495
9472
|
for (let i = 0, len = clusters.length; i < len; i++) {
|
|
9496
9473
|
const cluster = clusters[i];
|
|
9497
9474
|
const minPk = cluster[0];
|
|
@@ -9502,9 +9479,7 @@ var RowTableEngine = class {
|
|
|
9502
9479
|
const rid = keys.values().next().value;
|
|
9503
9480
|
const index = pkIndexMap.get(minPk);
|
|
9504
9481
|
if (index !== void 0) {
|
|
9505
|
-
|
|
9506
|
-
ridArray[index] = rid;
|
|
9507
|
-
indexArray[index] = index;
|
|
9482
|
+
insertToCollections(minPk, rid, index);
|
|
9508
9483
|
}
|
|
9509
9484
|
}
|
|
9510
9485
|
continue;
|
|
@@ -9513,13 +9488,11 @@ var RowTableEngine = class {
|
|
|
9513
9488
|
for await (const [rid, pk] of stream) {
|
|
9514
9489
|
const index = pkIndexMap.get(pk);
|
|
9515
9490
|
if (index !== void 0) {
|
|
9516
|
-
|
|
9517
|
-
ridArray[index] = rid;
|
|
9518
|
-
indexArray[index] = index;
|
|
9491
|
+
insertToCollections(pk, rid, index);
|
|
9519
9492
|
}
|
|
9520
9493
|
}
|
|
9521
9494
|
}
|
|
9522
|
-
return
|
|
9495
|
+
return collections;
|
|
9523
9496
|
}
|
|
9524
9497
|
/**
|
|
9525
9498
|
* Fetches multiple rows by their RID and PK combinations, grouping by page ID to minimize I/O.
|
|
@@ -9527,25 +9500,12 @@ var RowTableEngine = class {
|
|
|
9527
9500
|
* @param tx Transaction
|
|
9528
9501
|
* @returns Array of row data in the same order as input PKs
|
|
9529
9502
|
*/
|
|
9530
|
-
async fetchRowsByRids(
|
|
9531
|
-
const result = new Array(
|
|
9532
|
-
if (
|
|
9533
|
-
const
|
|
9534
|
-
for (let i = 0; i < validCount; i++) {
|
|
9535
|
-
const pk = pkArray[i];
|
|
9536
|
-
const rid = ridArray[i];
|
|
9537
|
-
const index = indexArray[i];
|
|
9538
|
-
if (pk === 0 && rid === 0 && index === 0) continue;
|
|
9539
|
-
const slotIndex = rid % 65536;
|
|
9540
|
-
const pageId = Math.floor(rid / 65536);
|
|
9541
|
-
if (!pageGroupMap.has(pageId)) {
|
|
9542
|
-
pageGroupMap.set(pageId, []);
|
|
9543
|
-
}
|
|
9544
|
-
pageGroupMap.get(pageId).push({ pk, slotIndex, index });
|
|
9545
|
-
}
|
|
9546
|
-
const sortedPageIds = Array.from(pageGroupMap.keys()).sort((a, b) => a - b);
|
|
9503
|
+
async fetchRowsByRids(collections, itemsCount, tx) {
|
|
9504
|
+
const result = new Array(itemsCount).fill(null);
|
|
9505
|
+
if (itemsCount === 0) return result;
|
|
9506
|
+
const sortedPageIds = Array.from(collections.keys()).sort((a, b) => a - b);
|
|
9547
9507
|
await Promise.all(sortedPageIds.map(async (pageId) => {
|
|
9548
|
-
const items =
|
|
9508
|
+
const items = collections.get(pageId);
|
|
9549
9509
|
const page = await this.pfs.get(pageId, tx);
|
|
9550
9510
|
if (!this.factory.isDataPage(page)) {
|
|
9551
9511
|
throw new Error(`Page ${pageId} is not a data page`);
|
|
@@ -10302,9 +10262,11 @@ var DataplyAPI = class {
|
|
|
10302
10262
|
if (!this.initialized) {
|
|
10303
10263
|
throw new Error("Dataply instance is not initialized");
|
|
10304
10264
|
}
|
|
10305
|
-
return this.
|
|
10306
|
-
|
|
10307
|
-
|
|
10265
|
+
return this.runWithDefaultWrite(() => {
|
|
10266
|
+
return this.hook.trigger("close", void 0, async () => {
|
|
10267
|
+
await this.pfs.close();
|
|
10268
|
+
import_node_fs3.default.closeSync(this.fileHandle);
|
|
10269
|
+
});
|
|
10308
10270
|
});
|
|
10309
10271
|
}
|
|
10310
10272
|
};
|
|
@@ -131,6 +131,17 @@ export declare class RowTableEngine {
|
|
|
131
131
|
* @returns Array of raw data of the rows in the same order as input PKs
|
|
132
132
|
*/
|
|
133
133
|
selectMany(pks: number[] | Float64Array, tx: Transaction): Promise<(Uint8Array | null)[]>;
|
|
134
|
+
/**
|
|
135
|
+
* Collects items by page ID to minimize I/O.
|
|
136
|
+
* @param pks Array of PKs to look up
|
|
137
|
+
* @param tx Transaction
|
|
138
|
+
* @returns Map of page ID to array of {pk, slotIndex, index} pairs
|
|
139
|
+
*/
|
|
140
|
+
collectItemsByPage(pks: number[] | Float64Array, tx: Transaction): Promise<Map<number, {
|
|
141
|
+
pk: number;
|
|
142
|
+
slotIndex: number;
|
|
143
|
+
index: number;
|
|
144
|
+
}[]>>;
|
|
134
145
|
/**
|
|
135
146
|
* Fetches multiple rows by their RID and PK combinations, grouping by page ID to minimize I/O.
|
|
136
147
|
* @param pkRidPairs Array of {pk, rid} pairs
|
|
@@ -37,6 +37,11 @@ export interface DataplyMetadata {
|
|
|
37
37
|
* The total number of data rows in the dataply.
|
|
38
38
|
*/
|
|
39
39
|
rowCount: number;
|
|
40
|
+
/**
|
|
41
|
+
* The usage of the dataply. It is calculated based on the remaining page capacity.
|
|
42
|
+
* The value is between 0 and 1.
|
|
43
|
+
*/
|
|
44
|
+
usage: number;
|
|
40
45
|
}
|
|
41
46
|
export type DataPage = Uint8Array & {
|
|
42
47
|
__pageType: 'data';
|
|
@@ -8,5 +8,6 @@ export declare function getMinMaxValue(array: SupportedNumberArray): [number, nu
|
|
|
8
8
|
* @param maxGap Optional fixed gap threshold. If not provided, it is calculated automatically.
|
|
9
9
|
* @returns Array of clusters
|
|
10
10
|
*/
|
|
11
|
-
export declare function
|
|
11
|
+
export declare function clusterNumbersByGap(numbers: number[] | Float64Array, maxGap?: number): Float64Array[];
|
|
12
|
+
export declare function clusterNumbersByPagination(numbers: number[] | Float64Array, pagingSize: number, startPageId?: number): Float64Array[];
|
|
12
13
|
export {};
|