dataply 0.0.24 → 0.0.25-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -6463,65 +6463,22 @@ function crc32(buf) {
6463
6463
  }
6464
6464
 
6465
6465
  // src/utils/array.ts
6466
- function calcThreshold(sortedGaps, n) {
6467
- const gLen = sortedGaps.length;
6468
- if (gLen === 0) return 0;
6469
- const median = sortedGaps[Math.floor(gLen * 0.5)];
6470
- const q1 = sortedGaps[Math.floor(gLen * 0.25)];
6471
- const q3 = sortedGaps[Math.floor(gLen * 0.75)];
6472
- const iqr = q3 - q1;
6473
- const logN = Math.max(1, Math.log10(n));
6474
- if (iqr > 0) {
6475
- const threshold2 = q3 + iqr * 1.5 * logN;
6476
- const minJump = Math.max(median * 5, 20);
6477
- return Math.max(threshold2, minJump);
6478
- }
6479
- const baseGap = median > 0 ? median : 1;
6480
- const p90 = sortedGaps[Math.floor(gLen * 0.9)];
6481
- if (p90 > baseGap) {
6482
- const threshold2 = baseGap + (p90 - baseGap) * 0.5 * logN;
6483
- return Math.max(threshold2, baseGap * 5, 20);
6484
- }
6485
- let mean = 0;
6486
- for (let i = 0; i < gLen; i++) mean += sortedGaps[i];
6487
- mean /= gLen;
6488
- let variance = 0;
6489
- for (let i = 0; i < gLen; i++) {
6490
- const d = sortedGaps[i] - mean;
6491
- variance += d * d;
6492
- }
6493
- const stddev = Math.sqrt(variance / gLen);
6494
- if (stddev === 0) {
6495
- return baseGap * 2;
6496
- }
6497
- const threshold = mean + stddev * logN;
6498
- return Math.max(threshold, baseGap * 5, 20);
6499
- }
6500
- function clusterNumbers(numbers, maxGap) {
6466
+ function clusterNumbersByPagination(numbers, pagingSize, startPageId = 0) {
6501
6467
  const n = numbers.length;
6502
6468
  if (n === 0) return [];
6503
6469
  if (n === 1) return [new Float64Array([numbers[0]])];
6504
6470
  const sorted = (numbers instanceof Float64Array ? numbers.slice() : Float64Array.from(numbers)).sort();
6505
- const gaps = new Float64Array(n - 1);
6506
- for (let i = 0, len = n - 1; i < len; i++) {
6507
- gaps[i] = sorted[i + 1] - sorted[i];
6508
- }
6509
- const sortedGaps = gaps.slice().sort();
6510
- let threshold;
6511
- if (maxGap !== void 0) {
6512
- threshold = maxGap;
6513
- } else {
6514
- threshold = calcThreshold(sortedGaps, n);
6515
- }
6516
6471
  const clusters = [];
6517
- let clusterStart = 0;
6472
+ let start = 0;
6518
6473
  for (let i = 0, len = n - 1; i < len; i++) {
6519
- if (gaps[i] > threshold) {
6520
- clusters.push(sorted.subarray(clusterStart, i + 1));
6521
- clusterStart = i + 1;
6474
+ const paginationIndex = Math.floor((sorted[i] - startPageId) / pagingSize);
6475
+ const nextPaginationIndex = Math.floor((sorted[i + 1] - startPageId) / pagingSize);
6476
+ if (paginationIndex !== nextPaginationIndex) {
6477
+ clusters.push(sorted.subarray(start, i + 1));
6478
+ start = i + 1;
6522
6479
  }
6523
6480
  }
6524
- clusters.push(sorted.subarray(clusterStart));
6481
+ clusters.push(sorted.subarray(start));
6525
6482
  return clusters;
6526
6483
  }
6527
6484
 
@@ -9205,10 +9162,15 @@ var RowTableEngine = class {
9205
9162
  }
9206
9163
  const metadataPage = await this.pfs.getMetadata(tx);
9207
9164
  const manager = this.factory.getManagerFromType(MetadataPageManager.CONSTANT.PAGE_TYPE_METADATA);
9165
+ const pageSize = manager.getPageSize(metadataPage);
9166
+ const pageCount = manager.getPageCount(metadataPage);
9167
+ const rowCount = manager.getRowCount(metadataPage);
9168
+ const usage = pageCount / Math.pow(2, 32);
9208
9169
  return {
9209
- pageSize: manager.getPageSize(metadataPage),
9210
- pageCount: manager.getPageCount(metadataPage),
9211
- rowCount: manager.getRowCount(metadataPage)
9170
+ pageSize,
9171
+ pageCount,
9172
+ rowCount,
9173
+ usage
9212
9174
  };
9213
9175
  }
9214
9176
  /**
@@ -9479,19 +9441,34 @@ var RowTableEngine = class {
9479
9441
  * @returns Array of raw data of the rows in the same order as input PKs
9480
9442
  */
9481
9443
  async selectMany(pks, tx) {
9444
+ const collections = await this.collectItemsByPage(pks, tx);
9445
+ return this.fetchRowsByRids(collections, pks.length, tx);
9446
+ }
9447
+ /**
9448
+ * Collects items by page ID to minimize I/O.
9449
+ * @param pks Array of PKs to look up
9450
+ * @param tx Transaction
9451
+ * @returns Map of page ID to array of {pk, slotIndex, index} pairs
9452
+ */
9453
+ async collectItemsByPage(pks, tx) {
9482
9454
  if (pks.length === 0) {
9483
- return [];
9455
+ return /* @__PURE__ */ new Map();
9484
9456
  }
9485
9457
  const pkIndexMap = /* @__PURE__ */ new Map();
9486
9458
  for (let i = 0, len = pks.length; i < len; i++) {
9487
9459
  pkIndexMap.set(pks[i], i);
9488
9460
  }
9489
- const validCount = pks.length;
9490
- const pkArray = new Float64Array(validCount).fill(0);
9491
- const ridArray = new Float64Array(validCount).fill(0);
9492
- const indexArray = new Float64Array(validCount).fill(0);
9493
9461
  const btx = await this.getBPTreeTransaction(tx);
9494
- const clusters = clusterNumbers(pks, this.order / 2);
9462
+ const clusters = clusterNumbersByPagination(pks, this.order, 1);
9463
+ const collections = /* @__PURE__ */ new Map();
9464
+ const insertToCollections = (pk, rid, index) => {
9465
+ const slotIndex = rid % 65536;
9466
+ const pageId = Math.floor(rid / 65536);
9467
+ if (!collections.has(pageId)) {
9468
+ collections.set(pageId, []);
9469
+ }
9470
+ collections.get(pageId).push({ pk, slotIndex, index });
9471
+ };
9495
9472
  for (let i = 0, len = clusters.length; i < len; i++) {
9496
9473
  const cluster = clusters[i];
9497
9474
  const minPk = cluster[0];
@@ -9502,9 +9479,7 @@ var RowTableEngine = class {
9502
9479
  const rid = keys.values().next().value;
9503
9480
  const index = pkIndexMap.get(minPk);
9504
9481
  if (index !== void 0) {
9505
- pkArray[index] = minPk;
9506
- ridArray[index] = rid;
9507
- indexArray[index] = index;
9482
+ insertToCollections(minPk, rid, index);
9508
9483
  }
9509
9484
  }
9510
9485
  continue;
@@ -9513,13 +9488,11 @@ var RowTableEngine = class {
9513
9488
  for await (const [rid, pk] of stream) {
9514
9489
  const index = pkIndexMap.get(pk);
9515
9490
  if (index !== void 0) {
9516
- pkArray[index] = pk;
9517
- ridArray[index] = rid;
9518
- indexArray[index] = index;
9491
+ insertToCollections(pk, rid, index);
9519
9492
  }
9520
9493
  }
9521
9494
  }
9522
- return this.fetchRowsByRids(validCount, pkArray, ridArray, indexArray, tx);
9495
+ return collections;
9523
9496
  }
9524
9497
  /**
9525
9498
  * Fetches multiple rows by their RID and PK combinations, grouping by page ID to minimize I/O.
@@ -9527,25 +9500,12 @@ var RowTableEngine = class {
9527
9500
  * @param tx Transaction
9528
9501
  * @returns Array of row data in the same order as input PKs
9529
9502
  */
9530
- async fetchRowsByRids(validCount, pkArray, ridArray, indexArray, tx) {
9531
- const result = new Array(validCount).fill(null);
9532
- if (validCount === 0) return result;
9533
- const pageGroupMap = /* @__PURE__ */ new Map();
9534
- for (let i = 0; i < validCount; i++) {
9535
- const pk = pkArray[i];
9536
- const rid = ridArray[i];
9537
- const index = indexArray[i];
9538
- if (pk === 0 && rid === 0 && index === 0) continue;
9539
- const slotIndex = rid % 65536;
9540
- const pageId = Math.floor(rid / 65536);
9541
- if (!pageGroupMap.has(pageId)) {
9542
- pageGroupMap.set(pageId, []);
9543
- }
9544
- pageGroupMap.get(pageId).push({ pk, slotIndex, index });
9545
- }
9546
- const sortedPageIds = Array.from(pageGroupMap.keys()).sort((a, b) => a - b);
9503
+ async fetchRowsByRids(collections, itemsCount, tx) {
9504
+ const result = new Array(itemsCount).fill(null);
9505
+ if (itemsCount === 0) return result;
9506
+ const sortedPageIds = Array.from(collections.keys()).sort((a, b) => a - b);
9547
9507
  await Promise.all(sortedPageIds.map(async (pageId) => {
9548
- const items = pageGroupMap.get(pageId);
9508
+ const items = collections.get(pageId);
9549
9509
  const page = await this.pfs.get(pageId, tx);
9550
9510
  if (!this.factory.isDataPage(page)) {
9551
9511
  throw new Error(`Page ${pageId} is not a data page`);
@@ -10302,9 +10262,11 @@ var DataplyAPI = class {
10302
10262
  if (!this.initialized) {
10303
10263
  throw new Error("Dataply instance is not initialized");
10304
10264
  }
10305
- return this.hook.trigger("close", void 0, async () => {
10306
- await this.pfs.close();
10307
- import_node_fs3.default.closeSync(this.fileHandle);
10265
+ return this.runWithDefaultWrite(() => {
10266
+ return this.hook.trigger("close", void 0, async () => {
10267
+ await this.pfs.close();
10268
+ import_node_fs3.default.closeSync(this.fileHandle);
10269
+ });
10308
10270
  });
10309
10271
  }
10310
10272
  };
@@ -131,6 +131,17 @@ export declare class RowTableEngine {
131
131
  * @returns Array of raw data of the rows in the same order as input PKs
132
132
  */
133
133
  selectMany(pks: number[] | Float64Array, tx: Transaction): Promise<(Uint8Array | null)[]>;
134
+ /**
135
+ * Collects items by page ID to minimize I/O.
136
+ * @param pks Array of PKs to look up
137
+ * @param tx Transaction
138
+ * @returns Map of page ID to array of {pk, slotIndex, index} pairs
139
+ */
140
+ collectItemsByPage(pks: number[] | Float64Array, tx: Transaction): Promise<Map<number, {
141
+ pk: number;
142
+ slotIndex: number;
143
+ index: number;
144
+ }[]>>;
134
145
  /**
135
146
  * Fetches multiple rows by their RID and PK combinations, grouping by page ID to minimize I/O.
136
147
  * @param pkRidPairs Array of {pk, rid} pairs
@@ -37,6 +37,11 @@ export interface DataplyMetadata {
37
37
  * The total number of data rows in the dataply.
38
38
  */
39
39
  rowCount: number;
40
+ /**
41
+ * The usage of the dataply. It is calculated based on the remaining page capacity.
42
+ * The value is between 0 and 1.
43
+ */
44
+ usage: number;
40
45
  }
41
46
  export type DataPage = Uint8Array & {
42
47
  __pageType: 'data';
@@ -8,5 +8,6 @@ export declare function getMinMaxValue(array: SupportedNumberArray): [number, nu
8
8
  * @param maxGap Optional fixed gap threshold. If not provided, it is calculated automatically.
9
9
  * @returns Array of clusters
10
10
  */
11
- export declare function clusterNumbers(numbers: number[] | Float64Array, maxGap?: number): Float64Array[];
11
+ export declare function clusterNumbersByGap(numbers: number[] | Float64Array, maxGap?: number): Float64Array[];
12
+ export declare function clusterNumbersByPagination(numbers: number[] | Float64Array, pagingSize: number, startPageId?: number): Float64Array[];
12
13
  export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dataply",
3
- "version": "0.0.24",
3
+ "version": "0.0.25-alpha.1",
4
4
  "description": "A lightweight storage engine for Node.js with support for MVCC, WAL.",
5
5
  "license": "MIT",
6
6
  "author": "izure <admin@izure.org>",