dataply 0.0.24 → 0.0.25-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js
CHANGED
|
@@ -6463,65 +6463,22 @@ function crc32(buf) {
|
|
|
6463
6463
|
}
|
|
6464
6464
|
|
|
6465
6465
|
// src/utils/array.ts
|
|
6466
|
-
function
|
|
6467
|
-
const gLen = sortedGaps.length;
|
|
6468
|
-
if (gLen === 0) return 0;
|
|
6469
|
-
const median = sortedGaps[Math.floor(gLen * 0.5)];
|
|
6470
|
-
const q1 = sortedGaps[Math.floor(gLen * 0.25)];
|
|
6471
|
-
const q3 = sortedGaps[Math.floor(gLen * 0.75)];
|
|
6472
|
-
const iqr = q3 - q1;
|
|
6473
|
-
const logN = Math.max(1, Math.log10(n));
|
|
6474
|
-
if (iqr > 0) {
|
|
6475
|
-
const threshold2 = q3 + iqr * 1.5 * logN;
|
|
6476
|
-
const minJump = Math.max(median * 5, 20);
|
|
6477
|
-
return Math.max(threshold2, minJump);
|
|
6478
|
-
}
|
|
6479
|
-
const baseGap = median > 0 ? median : 1;
|
|
6480
|
-
const p90 = sortedGaps[Math.floor(gLen * 0.9)];
|
|
6481
|
-
if (p90 > baseGap) {
|
|
6482
|
-
const threshold2 = baseGap + (p90 - baseGap) * 0.5 * logN;
|
|
6483
|
-
return Math.max(threshold2, baseGap * 5, 20);
|
|
6484
|
-
}
|
|
6485
|
-
let mean = 0;
|
|
6486
|
-
for (let i = 0; i < gLen; i++) mean += sortedGaps[i];
|
|
6487
|
-
mean /= gLen;
|
|
6488
|
-
let variance = 0;
|
|
6489
|
-
for (let i = 0; i < gLen; i++) {
|
|
6490
|
-
const d = sortedGaps[i] - mean;
|
|
6491
|
-
variance += d * d;
|
|
6492
|
-
}
|
|
6493
|
-
const stddev = Math.sqrt(variance / gLen);
|
|
6494
|
-
if (stddev === 0) {
|
|
6495
|
-
return baseGap * 2;
|
|
6496
|
-
}
|
|
6497
|
-
const threshold = mean + stddev * logN;
|
|
6498
|
-
return Math.max(threshold, baseGap * 5, 20);
|
|
6499
|
-
}
|
|
6500
|
-
function clusterNumbers(numbers, maxGap) {
|
|
6466
|
+
function clusterNumbersByPagination(numbers, pagingSize, startPageId = 0) {
|
|
6501
6467
|
const n = numbers.length;
|
|
6502
6468
|
if (n === 0) return [];
|
|
6503
6469
|
if (n === 1) return [new Float64Array([numbers[0]])];
|
|
6504
6470
|
const sorted = (numbers instanceof Float64Array ? numbers.slice() : Float64Array.from(numbers)).sort();
|
|
6505
|
-
const gaps = new Float64Array(n - 1);
|
|
6506
|
-
for (let i = 0, len = n - 1; i < len; i++) {
|
|
6507
|
-
gaps[i] = sorted[i + 1] - sorted[i];
|
|
6508
|
-
}
|
|
6509
|
-
const sortedGaps = gaps.slice().sort();
|
|
6510
|
-
let threshold;
|
|
6511
|
-
if (maxGap !== void 0) {
|
|
6512
|
-
threshold = maxGap;
|
|
6513
|
-
} else {
|
|
6514
|
-
threshold = calcThreshold(sortedGaps, n);
|
|
6515
|
-
}
|
|
6516
6471
|
const clusters = [];
|
|
6517
|
-
let
|
|
6472
|
+
let start = 0;
|
|
6518
6473
|
for (let i = 0, len = n - 1; i < len; i++) {
|
|
6519
|
-
|
|
6520
|
-
|
|
6521
|
-
|
|
6474
|
+
const paginationIndex = Math.floor((sorted[i] - startPageId) / pagingSize);
|
|
6475
|
+
const nextPaginationIndex = Math.floor((sorted[i + 1] - startPageId) / pagingSize);
|
|
6476
|
+
if (paginationIndex !== nextPaginationIndex) {
|
|
6477
|
+
clusters.push(sorted.subarray(start, i + 1));
|
|
6478
|
+
start = i + 1;
|
|
6522
6479
|
}
|
|
6523
6480
|
}
|
|
6524
|
-
clusters.push(sorted.subarray(
|
|
6481
|
+
clusters.push(sorted.subarray(start));
|
|
6525
6482
|
return clusters;
|
|
6526
6483
|
}
|
|
6527
6484
|
|
|
@@ -9479,19 +9436,34 @@ var RowTableEngine = class {
|
|
|
9479
9436
|
* @returns Array of raw data of the rows in the same order as input PKs
|
|
9480
9437
|
*/
|
|
9481
9438
|
async selectMany(pks, tx) {
|
|
9439
|
+
const collections = await this.collectItemsByPage(pks, tx);
|
|
9440
|
+
return this.fetchRowsByRids(collections, pks.length, tx);
|
|
9441
|
+
}
|
|
9442
|
+
/**
|
|
9443
|
+
* Collects items by page ID to minimize I/O.
|
|
9444
|
+
* @param pks Array of PKs to look up
|
|
9445
|
+
* @param tx Transaction
|
|
9446
|
+
* @returns Map of page ID to array of {pk, slotIndex, index} pairs
|
|
9447
|
+
*/
|
|
9448
|
+
async collectItemsByPage(pks, tx) {
|
|
9482
9449
|
if (pks.length === 0) {
|
|
9483
|
-
return
|
|
9450
|
+
return /* @__PURE__ */ new Map();
|
|
9484
9451
|
}
|
|
9485
9452
|
const pkIndexMap = /* @__PURE__ */ new Map();
|
|
9486
9453
|
for (let i = 0, len = pks.length; i < len; i++) {
|
|
9487
9454
|
pkIndexMap.set(pks[i], i);
|
|
9488
9455
|
}
|
|
9489
|
-
const validCount = pks.length;
|
|
9490
|
-
const pkArray = new Float64Array(validCount).fill(0);
|
|
9491
|
-
const ridArray = new Float64Array(validCount).fill(0);
|
|
9492
|
-
const indexArray = new Float64Array(validCount).fill(0);
|
|
9493
9456
|
const btx = await this.getBPTreeTransaction(tx);
|
|
9494
|
-
const clusters =
|
|
9457
|
+
const clusters = clusterNumbersByPagination(pks, this.order, 1);
|
|
9458
|
+
const collections = /* @__PURE__ */ new Map();
|
|
9459
|
+
const insertToCollections = (pk, rid, index) => {
|
|
9460
|
+
const slotIndex = rid % 65536;
|
|
9461
|
+
const pageId = Math.floor(rid / 65536);
|
|
9462
|
+
if (!collections.has(pageId)) {
|
|
9463
|
+
collections.set(pageId, []);
|
|
9464
|
+
}
|
|
9465
|
+
collections.get(pageId).push({ pk, slotIndex, index });
|
|
9466
|
+
};
|
|
9495
9467
|
for (let i = 0, len = clusters.length; i < len; i++) {
|
|
9496
9468
|
const cluster = clusters[i];
|
|
9497
9469
|
const minPk = cluster[0];
|
|
@@ -9502,9 +9474,7 @@ var RowTableEngine = class {
|
|
|
9502
9474
|
const rid = keys.values().next().value;
|
|
9503
9475
|
const index = pkIndexMap.get(minPk);
|
|
9504
9476
|
if (index !== void 0) {
|
|
9505
|
-
|
|
9506
|
-
ridArray[index] = rid;
|
|
9507
|
-
indexArray[index] = index;
|
|
9477
|
+
insertToCollections(minPk, rid, index);
|
|
9508
9478
|
}
|
|
9509
9479
|
}
|
|
9510
9480
|
continue;
|
|
@@ -9513,13 +9483,11 @@ var RowTableEngine = class {
|
|
|
9513
9483
|
for await (const [rid, pk] of stream) {
|
|
9514
9484
|
const index = pkIndexMap.get(pk);
|
|
9515
9485
|
if (index !== void 0) {
|
|
9516
|
-
|
|
9517
|
-
ridArray[index] = rid;
|
|
9518
|
-
indexArray[index] = index;
|
|
9486
|
+
insertToCollections(pk, rid, index);
|
|
9519
9487
|
}
|
|
9520
9488
|
}
|
|
9521
9489
|
}
|
|
9522
|
-
return
|
|
9490
|
+
return collections;
|
|
9523
9491
|
}
|
|
9524
9492
|
/**
|
|
9525
9493
|
* Fetches multiple rows by their RID and PK combinations, grouping by page ID to minimize I/O.
|
|
@@ -9527,25 +9495,12 @@ var RowTableEngine = class {
|
|
|
9527
9495
|
* @param tx Transaction
|
|
9528
9496
|
* @returns Array of row data in the same order as input PKs
|
|
9529
9497
|
*/
|
|
9530
|
-
async fetchRowsByRids(
|
|
9531
|
-
const result = new Array(
|
|
9532
|
-
if (
|
|
9533
|
-
const
|
|
9534
|
-
for (let i = 0; i < validCount; i++) {
|
|
9535
|
-
const pk = pkArray[i];
|
|
9536
|
-
const rid = ridArray[i];
|
|
9537
|
-
const index = indexArray[i];
|
|
9538
|
-
if (pk === 0 && rid === 0 && index === 0) continue;
|
|
9539
|
-
const slotIndex = rid % 65536;
|
|
9540
|
-
const pageId = Math.floor(rid / 65536);
|
|
9541
|
-
if (!pageGroupMap.has(pageId)) {
|
|
9542
|
-
pageGroupMap.set(pageId, []);
|
|
9543
|
-
}
|
|
9544
|
-
pageGroupMap.get(pageId).push({ pk, slotIndex, index });
|
|
9545
|
-
}
|
|
9546
|
-
const sortedPageIds = Array.from(pageGroupMap.keys()).sort((a, b) => a - b);
|
|
9498
|
+
async fetchRowsByRids(collections, itemsCount, tx) {
|
|
9499
|
+
const result = new Array(itemsCount).fill(null);
|
|
9500
|
+
if (itemsCount === 0) return result;
|
|
9501
|
+
const sortedPageIds = Array.from(collections.keys()).sort((a, b) => a - b);
|
|
9547
9502
|
await Promise.all(sortedPageIds.map(async (pageId) => {
|
|
9548
|
-
const items =
|
|
9503
|
+
const items = collections.get(pageId);
|
|
9549
9504
|
const page = await this.pfs.get(pageId, tx);
|
|
9550
9505
|
if (!this.factory.isDataPage(page)) {
|
|
9551
9506
|
throw new Error(`Page ${pageId} is not a data page`);
|
|
@@ -131,6 +131,17 @@ export declare class RowTableEngine {
|
|
|
131
131
|
* @returns Array of raw data of the rows in the same order as input PKs
|
|
132
132
|
*/
|
|
133
133
|
selectMany(pks: number[] | Float64Array, tx: Transaction): Promise<(Uint8Array | null)[]>;
|
|
134
|
+
/**
|
|
135
|
+
* Collects items by page ID to minimize I/O.
|
|
136
|
+
* @param pks Array of PKs to look up
|
|
137
|
+
* @param tx Transaction
|
|
138
|
+
* @returns Map of page ID to array of {pk, slotIndex, index} pairs
|
|
139
|
+
*/
|
|
140
|
+
collectItemsByPage(pks: number[] | Float64Array, tx: Transaction): Promise<Map<number, {
|
|
141
|
+
pk: number;
|
|
142
|
+
slotIndex: number;
|
|
143
|
+
index: number;
|
|
144
|
+
}[]>>;
|
|
134
145
|
/**
|
|
135
146
|
* Fetches multiple rows by their RID and PK combinations, grouping by page ID to minimize I/O.
|
|
136
147
|
* @param pkRidPairs Array of {pk, rid} pairs
|
|
@@ -8,5 +8,6 @@ export declare function getMinMaxValue(array: SupportedNumberArray): [number, nu
|
|
|
8
8
|
* @param maxGap Optional fixed gap threshold. If not provided, it is calculated automatically.
|
|
9
9
|
* @returns Array of clusters
|
|
10
10
|
*/
|
|
11
|
-
export declare function
|
|
11
|
+
export declare function clusterNumbersByGap(numbers: number[] | Float64Array, maxGap?: number): Float64Array[];
|
|
12
|
+
export declare function clusterNumbersByPagination(numbers: number[] | Float64Array, pagingSize: number, startPageId?: number): Float64Array[];
|
|
12
13
|
export {};
|