dataply 0.0.24 → 0.0.25-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -6463,65 +6463,22 @@ function crc32(buf) {
6463
6463
  }
6464
6464
 
6465
6465
  // src/utils/array.ts
6466
- function calcThreshold(sortedGaps, n) {
6467
- const gLen = sortedGaps.length;
6468
- if (gLen === 0) return 0;
6469
- const median = sortedGaps[Math.floor(gLen * 0.5)];
6470
- const q1 = sortedGaps[Math.floor(gLen * 0.25)];
6471
- const q3 = sortedGaps[Math.floor(gLen * 0.75)];
6472
- const iqr = q3 - q1;
6473
- const logN = Math.max(1, Math.log10(n));
6474
- if (iqr > 0) {
6475
- const threshold2 = q3 + iqr * 1.5 * logN;
6476
- const minJump = Math.max(median * 5, 20);
6477
- return Math.max(threshold2, minJump);
6478
- }
6479
- const baseGap = median > 0 ? median : 1;
6480
- const p90 = sortedGaps[Math.floor(gLen * 0.9)];
6481
- if (p90 > baseGap) {
6482
- const threshold2 = baseGap + (p90 - baseGap) * 0.5 * logN;
6483
- return Math.max(threshold2, baseGap * 5, 20);
6484
- }
6485
- let mean = 0;
6486
- for (let i = 0; i < gLen; i++) mean += sortedGaps[i];
6487
- mean /= gLen;
6488
- let variance = 0;
6489
- for (let i = 0; i < gLen; i++) {
6490
- const d = sortedGaps[i] - mean;
6491
- variance += d * d;
6492
- }
6493
- const stddev = Math.sqrt(variance / gLen);
6494
- if (stddev === 0) {
6495
- return baseGap * 2;
6496
- }
6497
- const threshold = mean + stddev * logN;
6498
- return Math.max(threshold, baseGap * 5, 20);
6499
- }
6500
- function clusterNumbers(numbers, maxGap) {
6466
+ function clusterNumbersByPagination(numbers, pagingSize, startPageId = 0) {
6501
6467
  const n = numbers.length;
6502
6468
  if (n === 0) return [];
6503
6469
  if (n === 1) return [new Float64Array([numbers[0]])];
6504
6470
  const sorted = (numbers instanceof Float64Array ? numbers.slice() : Float64Array.from(numbers)).sort();
6505
- const gaps = new Float64Array(n - 1);
6506
- for (let i = 0, len = n - 1; i < len; i++) {
6507
- gaps[i] = sorted[i + 1] - sorted[i];
6508
- }
6509
- const sortedGaps = gaps.slice().sort();
6510
- let threshold;
6511
- if (maxGap !== void 0) {
6512
- threshold = maxGap;
6513
- } else {
6514
- threshold = calcThreshold(sortedGaps, n);
6515
- }
6516
6471
  const clusters = [];
6517
- let clusterStart = 0;
6472
+ let start = 0;
6518
6473
  for (let i = 0, len = n - 1; i < len; i++) {
6519
- if (gaps[i] > threshold) {
6520
- clusters.push(sorted.subarray(clusterStart, i + 1));
6521
- clusterStart = i + 1;
6474
+ const paginationIndex = Math.floor((sorted[i] - startPageId) / pagingSize);
6475
+ const nextPaginationIndex = Math.floor((sorted[i + 1] - startPageId) / pagingSize);
6476
+ if (paginationIndex !== nextPaginationIndex) {
6477
+ clusters.push(sorted.subarray(start, i + 1));
6478
+ start = i + 1;
6522
6479
  }
6523
6480
  }
6524
- clusters.push(sorted.subarray(clusterStart));
6481
+ clusters.push(sorted.subarray(start));
6525
6482
  return clusters;
6526
6483
  }
6527
6484
 
@@ -9479,19 +9436,34 @@ var RowTableEngine = class {
9479
9436
  * @returns Array of raw data of the rows in the same order as input PKs
9480
9437
  */
9481
9438
  async selectMany(pks, tx) {
9439
+ const collections = await this.collectItemsByPage(pks, tx);
9440
+ return this.fetchRowsByRids(collections, pks.length, tx);
9441
+ }
9442
+ /**
9443
+ * Collects items by page ID to minimize I/O.
9444
+ * @param pks Array of PKs to look up
9445
+ * @param tx Transaction
9446
+ * @returns Map of page ID to array of {pk, slotIndex, index} pairs
9447
+ */
9448
+ async collectItemsByPage(pks, tx) {
9482
9449
  if (pks.length === 0) {
9483
- return [];
9450
+ return /* @__PURE__ */ new Map();
9484
9451
  }
9485
9452
  const pkIndexMap = /* @__PURE__ */ new Map();
9486
9453
  for (let i = 0, len = pks.length; i < len; i++) {
9487
9454
  pkIndexMap.set(pks[i], i);
9488
9455
  }
9489
- const validCount = pks.length;
9490
- const pkArray = new Float64Array(validCount).fill(0);
9491
- const ridArray = new Float64Array(validCount).fill(0);
9492
- const indexArray = new Float64Array(validCount).fill(0);
9493
9456
  const btx = await this.getBPTreeTransaction(tx);
9494
- const clusters = clusterNumbers(pks, this.order / 2);
9457
+ const clusters = clusterNumbersByPagination(pks, this.order, 1);
9458
+ const collections = /* @__PURE__ */ new Map();
9459
+ const insertToCollections = (pk, rid, index) => {
9460
+ const slotIndex = rid % 65536;
9461
+ const pageId = Math.floor(rid / 65536);
9462
+ if (!collections.has(pageId)) {
9463
+ collections.set(pageId, []);
9464
+ }
9465
+ collections.get(pageId).push({ pk, slotIndex, index });
9466
+ };
9495
9467
  for (let i = 0, len = clusters.length; i < len; i++) {
9496
9468
  const cluster = clusters[i];
9497
9469
  const minPk = cluster[0];
@@ -9502,9 +9474,7 @@ var RowTableEngine = class {
9502
9474
  const rid = keys.values().next().value;
9503
9475
  const index = pkIndexMap.get(minPk);
9504
9476
  if (index !== void 0) {
9505
- pkArray[index] = minPk;
9506
- ridArray[index] = rid;
9507
- indexArray[index] = index;
9477
+ insertToCollections(minPk, rid, index);
9508
9478
  }
9509
9479
  }
9510
9480
  continue;
@@ -9513,13 +9483,11 @@ var RowTableEngine = class {
9513
9483
  for await (const [rid, pk] of stream) {
9514
9484
  const index = pkIndexMap.get(pk);
9515
9485
  if (index !== void 0) {
9516
- pkArray[index] = pk;
9517
- ridArray[index] = rid;
9518
- indexArray[index] = index;
9486
+ insertToCollections(pk, rid, index);
9519
9487
  }
9520
9488
  }
9521
9489
  }
9522
- return this.fetchRowsByRids(validCount, pkArray, ridArray, indexArray, tx);
9490
+ return collections;
9523
9491
  }
9524
9492
  /**
9525
9493
  * Fetches multiple rows by their RID and PK combinations, grouping by page ID to minimize I/O.
@@ -9527,25 +9495,12 @@ var RowTableEngine = class {
9527
9495
  * @param tx Transaction
9528
9496
  * @returns Array of row data in the same order as input PKs
9529
9497
  */
9530
- async fetchRowsByRids(validCount, pkArray, ridArray, indexArray, tx) {
9531
- const result = new Array(validCount).fill(null);
9532
- if (validCount === 0) return result;
9533
- const pageGroupMap = /* @__PURE__ */ new Map();
9534
- for (let i = 0; i < validCount; i++) {
9535
- const pk = pkArray[i];
9536
- const rid = ridArray[i];
9537
- const index = indexArray[i];
9538
- if (pk === 0 && rid === 0 && index === 0) continue;
9539
- const slotIndex = rid % 65536;
9540
- const pageId = Math.floor(rid / 65536);
9541
- if (!pageGroupMap.has(pageId)) {
9542
- pageGroupMap.set(pageId, []);
9543
- }
9544
- pageGroupMap.get(pageId).push({ pk, slotIndex, index });
9545
- }
9546
- const sortedPageIds = Array.from(pageGroupMap.keys()).sort((a, b) => a - b);
9498
+ async fetchRowsByRids(collections, itemsCount, tx) {
9499
+ const result = new Array(itemsCount).fill(null);
9500
+ if (itemsCount === 0) return result;
9501
+ const sortedPageIds = Array.from(collections.keys()).sort((a, b) => a - b);
9547
9502
  await Promise.all(sortedPageIds.map(async (pageId) => {
9548
- const items = pageGroupMap.get(pageId);
9503
+ const items = collections.get(pageId);
9549
9504
  const page = await this.pfs.get(pageId, tx);
9550
9505
  if (!this.factory.isDataPage(page)) {
9551
9506
  throw new Error(`Page ${pageId} is not a data page`);
@@ -131,6 +131,17 @@ export declare class RowTableEngine {
131
131
  * @returns Array of raw data of the rows in the same order as input PKs
132
132
  */
133
133
  selectMany(pks: number[] | Float64Array, tx: Transaction): Promise<(Uint8Array | null)[]>;
134
+ /**
135
+ * Collects items by page ID to minimize I/O.
136
+ * @param pks Array of PKs to look up
137
+ * @param tx Transaction
138
+ * @returns Map of page ID to array of {pk, slotIndex, index} pairs
139
+ */
140
+ collectItemsByPage(pks: number[] | Float64Array, tx: Transaction): Promise<Map<number, {
141
+ pk: number;
142
+ slotIndex: number;
143
+ index: number;
144
+ }[]>>;
134
145
  /**
135
146
  * Fetches multiple rows by their RID and PK combinations, grouping by page ID to minimize I/O.
136
147
  * @param pkRidPairs Array of {pk, rid} pairs
@@ -8,5 +8,6 @@ export declare function getMinMaxValue(array: SupportedNumberArray): [number, nu
8
8
  * @param maxGap Optional fixed gap threshold. If not provided, it is calculated automatically.
9
9
  * @returns Array of clusters
10
10
  */
11
- export declare function clusterNumbers(numbers: number[] | Float64Array, maxGap?: number): Float64Array[];
11
+ export declare function clusterNumbersByGap(numbers: number[] | Float64Array, maxGap?: number): Float64Array[];
12
+ export declare function clusterNumbersByPagination(numbers: number[] | Float64Array, pagingSize: number, startPageId?: number): Float64Array[];
12
13
  export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dataply",
3
- "version": "0.0.24",
3
+ "version": "0.0.25-alpha.0",
4
4
  "description": "A lightweight storage engine for Node.js with support for MVCC, WAL.",
5
5
  "license": "MIT",
6
6
  "author": "izure <admin@izure.org>",