@harperfast/harper-pro 5.0.17 → 5.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/core/resources/RecordEncoder.ts +15 -12
  2. package/core/resources/RocksTransactionLogStore.ts +47 -22
  3. package/core/resources/Table.ts +98 -32
  4. package/core/resources/auditStore.ts +87 -6
  5. package/core/resources/databases.ts +67 -7
  6. package/dist/cloneNode/cloneNode.js +13 -8
  7. package/dist/cloneNode/cloneNode.js.map +1 -1
  8. package/dist/core/resources/RecordEncoder.js +1 -1
  9. package/dist/core/resources/RecordEncoder.js.map +1 -1
  10. package/dist/core/resources/RocksTransactionLogStore.js +80 -21
  11. package/dist/core/resources/RocksTransactionLogStore.js.map +1 -1
  12. package/dist/core/resources/Table.js +96 -35
  13. package/dist/core/resources/Table.js.map +1 -1
  14. package/dist/core/resources/auditStore.js +83 -6
  15. package/dist/core/resources/auditStore.js.map +1 -1
  16. package/dist/core/resources/databases.js +68 -5
  17. package/dist/core/resources/databases.js.map +1 -1
  18. package/dist/replication/replicationConnection.js +63 -18
  19. package/dist/replication/replicationConnection.js.map +1 -1
  20. package/npm-shrinkwrap.json +2 -2
  21. package/package.json +1 -1
  22. package/replication/replicationConnection.ts +66 -20
  23. package/studio/web/assets/{index-DhLu-DHX.js → index-BIjBsaWw.js} +5 -5
  24. package/studio/web/assets/{index-DhLu-DHX.js.map → index-BIjBsaWw.js.map} +1 -1
  25. package/studio/web/assets/{index.lazy-DBjOisCz.js → index.lazy-DN6bSQzR.js} +2 -2
  26. package/studio/web/assets/{index.lazy-DBjOisCz.js.map → index.lazy-DN6bSQzR.js.map} +1 -1
  27. package/studio/web/assets/{profile-DSL-499E.js → profile-Dyrp-ZIJ.js} +2 -2
  28. package/studio/web/assets/{profile-DSL-499E.js.map → profile-Dyrp-ZIJ.js.map} +1 -1
  29. package/studio/web/assets/{status-BRW5QtzY.js → status-BrfTnnpt.js} +2 -2
  30. package/studio/web/assets/{status-BRW5QtzY.js.map → status-BrfTnnpt.js.map} +1 -1
  31. package/studio/web/index.html +1 -1
@@ -195,20 +195,23 @@ export class RecordEncoder extends Encoder {
195
195
  const superGetStructures = this.getStructures;
196
196
  this.saveStructures = function (structures, isCompatible): boolean | undefined {
197
197
  if (this.isRocksDB) {
198
- return this.rootStore.transactionSync((txn) => {
199
- const sharedStructuresKey = [Symbol.for('structures'), this.name];
200
- const existingStructuresBuffer = txn.getBinarySync(sharedStructuresKey);
201
- const existingStructures = existingStructuresBuffer ? this.decode(existingStructuresBuffer) : undefined;
202
- if (typeof isCompatible == 'function') {
203
- if (!isCompatible(existingStructures)) {
198
+ return this.rootStore.transactionSync(
199
+ (txn) => {
200
+ const sharedStructuresKey = [Symbol.for('structures'), this.name];
201
+ const existingStructuresBuffer = txn.getBinarySync(sharedStructuresKey);
202
+ const existingStructures = existingStructuresBuffer ? this.decode(existingStructuresBuffer) : undefined;
203
+ if (typeof isCompatible == 'function') {
204
+ if (!isCompatible(existingStructures)) {
205
+ return false;
206
+ }
207
+ } else if (existingStructures && existingStructures.length !== isCompatible) {
204
208
  return false;
205
209
  }
206
- } else if (existingStructures && existingStructures.length !== isCompatible) {
207
- return false;
208
- }
209
- txn.putSync(sharedStructuresKey, structures);
210
- this.structureUpdate = structures;
211
- });
210
+ txn.putSync(sharedStructuresKey, structures);
211
+ this.structureUpdate = structures;
212
+ },
213
+ { retryOnBusy: true }
214
+ );
212
215
  } else {
213
216
  const result = superSaveStructures.call(this, structures, isCompatible);
214
217
  this.structureUpdate = structures;
@@ -5,6 +5,7 @@ import { Decoder, readAuditEntry, ENTRY_DATAVIEW, AuditRecord, createAuditEntry
5
5
  import { isMainThread } from 'node:worker_threads';
6
6
  import { EventEmitter } from 'node:events';
7
7
  import { asBinary } from 'lmdb';
8
+ import * as harperLogger from '../utility/logging/harper_logger.ts';
8
9
 
9
10
  if (!process.env.HARPER_NO_FLUSH_ON_EXIT && isMainThread) {
10
11
  // we want to be able to test log replay
@@ -288,29 +289,53 @@ export class RocksTransactionLogStore extends EventEmitter {
288
289
  iterable.iterate = () => aggregateIterator;
289
290
  }
290
291
  const mappedAggregateIterable = iterable.map(({ timestamp, data, endTxn }: TransactionEntry) => {
291
- const decoder = new Decoder(data.buffer, data.byteOffset, data.byteLength);
292
- data.dataView = decoder;
293
- // This represents the data that shouldn't be transferred for replication
294
- let structureVersion = decoder.getUint32(0);
295
- let position = 4;
296
- let previousResidencyId: number;
297
- let previousVersion: number;
298
- if (structureVersion & HAS_PREVIOUS_RESIDENCY_ID) {
299
- previousResidencyId = decoder.getUint32(position);
300
- position += 4;
301
- }
302
- if (structureVersion & HAS_PREVIOUS_VERSION) {
303
- // does previous residency id and version actually require separate flags?
304
- previousVersion = decoder.getFloat64(position);
305
- position += 8;
292
+ // Per-entry try/catch: a corrupt rocks prelude (first 4-16 bytes) would otherwise
293
+ // throw a raw `RangeError: Offset is outside the bounds of the DataView` out
294
+ // through `iterable.map`, escape the for-of consumer, and land as an
295
+ // uncaughtException on a later tick — stalling outgoing replication at the
296
+ // failing offset on every catch-up attempt. On error, yield a sentinel record
297
+ // with the timestamp preserved so iteration advances past the bad entry;
298
+ // downstream consumers already skip records with no `tableId`/`type`.
299
+ try {
300
+ const decoder = new Decoder(data.buffer, data.byteOffset, data.byteLength);
301
+ (data as any).dataView = decoder;
302
+ // This represents the data that shouldn't be transferred for replication
303
+ let structureVersion = decoder.getUint32(0);
304
+ let position = 4;
305
+ let previousResidencyId: number;
306
+ let previousVersion: number;
307
+ if (structureVersion & HAS_PREVIOUS_RESIDENCY_ID) {
308
+ previousResidencyId = decoder.getUint32(position);
309
+ position += 4;
310
+ }
311
+ if (structureVersion & HAS_PREVIOUS_VERSION) {
312
+ // does previous residency id and version actually require separate flags?
313
+ previousVersion = decoder.getFloat64(position);
314
+ position += 8;
315
+ }
316
+ const auditRecord = readAuditEntry(data, position, undefined);
317
+ auditRecord.version = timestamp;
318
+ auditRecord.endTxn = endTxn;
319
+ auditRecord.previousResidencyId = previousResidencyId;
320
+ auditRecord.previousVersion = previousVersion;
321
+ auditRecord.structureVersion = structureVersion & 0x00ffffff;
322
+ return auditRecord;
323
+ } catch (error) {
324
+ harperLogger.error('Failed to decode rocks transaction log entry; skipping', error, {
325
+ timestamp,
326
+ byteLength: data?.byteLength,
327
+ });
328
+ return {
329
+ version: timestamp,
330
+ endTxn,
331
+ type: undefined,
332
+ tableId: undefined,
333
+ recordId: undefined,
334
+ getValue: () => undefined,
335
+ getBinaryValue: () => undefined,
336
+ getBinaryRecordId: () => undefined,
337
+ } as unknown as AuditRecord;
306
338
  }
307
- const auditRecord = readAuditEntry(data, position, undefined, true);
308
- auditRecord.version = timestamp;
309
- auditRecord.endTxn = endTxn;
310
- auditRecord.previousResidencyId = previousResidencyId;
311
- auditRecord.previousVersion = previousVersion;
312
- auditRecord.structureVersion = structureVersion & 0x00ffffff;
313
- return auditRecord;
314
339
  });
315
340
  // Add methods to the mapped iterable if we have an aggregate iterator
316
341
  if (aggregateIterator?.addLog) {
@@ -6,7 +6,7 @@
6
6
 
7
7
  import { CONFIG_PARAMS, OPERATIONS_ENUM, SYSTEM_TABLE_NAMES, SYSTEM_SCHEMA_NAME } from '../utility/hdbTerms.ts';
8
8
  import { type Database } from 'lmdb';
9
- import { getIndexedValues } from '../utility/lmdb/commonUtility.js';
9
+ import { getIndexedValues, getNextMonotonicTime } from '../utility/lmdb/commonUtility.js';
10
10
  import { getThisNodeId, exportIdMapping } from './nodeIdMapping.ts';
11
11
  import lodash from 'lodash';
12
12
  import { ExtendedIterable, SKIP } from '@harperfast/extended-iterable';
@@ -99,6 +99,7 @@ export const EVICTED = 8; // note that 2 is reserved for timestamps
99
99
  const TEST_WRITE_KEY_BUFFER = Buffer.allocUnsafeSlow(8192);
100
100
  const MAX_KEY_BYTES = 1978;
101
101
  const EVENT_HIGH_WATER_MARK = 100;
102
+ const REPLAY_YIELD_INTERVAL = 100; // yield to the event loop every N records during subscription replay
102
103
  const FULL_PERMISSIONS = {
103
104
  read: true,
104
105
  insert: true,
@@ -805,23 +806,23 @@ export function makeTable(options) {
805
806
  /**
806
807
  * Set TTL expiration for records in this table. On retrieval, record timestamps are checked for expiration.
807
808
  * This also informs the scheduling for record eviction.
808
- * @param expirationTime Time in seconds until records expire (are stale)
809
- * @param evictionTime Time in seconds until records are evicted (removed)
809
+ * @param opts Time in seconds until records expire, or an options object with `expiration`, `eviction`,
810
+ * and `scanInterval` (all in seconds, all optional). Number form preserves any previously configured
811
+ * eviction/scanInterval; object form replaces all three.
810
812
  */
811
- static setTTLExpiration(expiration: number | { expiration: number; eviction?: number; scanInterval?: number }) {
812
- // we set up a timer to remove expired entries. we only want the timer/reaper to run in one thread,
813
- // so we use the first one
814
- if (typeof expiration === 'number') {
815
- expirationMs = expiration * 1000;
816
- if (!evictionMs) evictionMs = 0; // by default, no extra time for eviction
817
- } else if (expiration && typeof expiration === 'object') {
818
- // an object with expiration times/options specified
819
- expirationMs = expiration.expiration * 1000;
820
- evictionMs = (expiration.eviction || 0) * 1000;
821
- cleanupInterval = expiration.scanInterval * 1000;
822
- } else throw new Error('Invalid expiration value type');
813
+ static setTTLExpiration(opts: number | { expiration?: number; eviction?: number; scanInterval?: number }) {
814
+ if (opts == null || (typeof opts !== 'number' && typeof opts !== 'object'))
815
+ throw new Error('Invalid expiration value type');
816
+ if (typeof opts === 'number') {
817
+ expirationMs = opts * 1000;
818
+ } else {
819
+ // `??` so an explicit 0 is treated as the user's chosen value, not as "missing"
820
+ expirationMs = (opts.expiration ?? 0) * 1000;
821
+ evictionMs = (opts.eviction ?? 0) * 1000;
822
+ cleanupInterval = (opts.scanInterval ?? 0) * 1000;
823
+ }
823
824
  if (expirationMs < 0) throw new Error('Expiration can not be negative');
824
- // default to one quarter of the total eviction time, and make sure it fits into a 32-bit signed integer
825
+ // default to one quarter of the total expiration+eviction window
825
826
  cleanupInterval = cleanupInterval || (expirationMs + evictionMs) / 4;
826
827
  scheduleCleanup();
827
828
  }
@@ -2641,12 +2642,21 @@ export function makeTable(options) {
2641
2642
  }
2642
2643
  if (!request) request = {};
2643
2644
  const getFullRecord = !request.rawEvents;
2644
- let pendingRealTimeQueue = []; // while we are servicing a loop for older messages, we have to queue up real-time messages and deliver them in order
2645
+ // While the count, !omitCurrent, and non-collection branches replay older messages, real-time
2646
+ // messages from the listener accumulate here and are drained at the end of the IIFE so they
2647
+ // arrive after the replayed history, in order. The startTime branch sets this to null and
2648
+ // uses dropDuringReplay instead — its snapshot:false cursor picks up the live tail directly.
2649
+ let pendingRealTimeQueue: any[] | null = [];
2650
+ // Set during the startTime audit-log replay. The cursor iterates the audit log forward with
2651
+ // snapshot:false, which catches any commits that land during yield points; dropping in the
2652
+ // listener avoids duplicate delivery.
2653
+ let dropDuringReplay = false;
2645
2654
  const thisId = requestTargetToId(request) ?? null; // treat undefined and null as the root
2646
2655
  const subscription = addSubscription(
2647
2656
  TableResource,
2648
2657
  thisId,
2649
2658
  function (id: Id, auditRecord: any, localTime: number, beginTxn: boolean) {
2659
+ if (dropDuringReplay) return;
2650
2660
  try {
2651
2661
  let type = auditRecord.type;
2652
2662
  let value;
@@ -2689,6 +2699,11 @@ export function makeTable(options) {
2689
2699
  request.startTime || 0,
2690
2700
  request
2691
2701
  );
2702
+ // Attach the request.listener BEFORE invoking the IIFE so that sync sends from the
2703
+ // IIFE's prologue go directly to the listener via emit('data') instead of accumulating
2704
+ // in subscription.queue. Without this, the IIFE can fill the queue past
2705
+ // EVENT_HIGH_WATER_MARK and hit waitForDrain before the consumer's listener exists.
2706
+ if (request.listener) subscription!.on('data', request.listener);
2692
2707
  const result = (async () => {
2693
2708
  const isCollection = request.isCollection ?? thisId == null;
2694
2709
  if (isCollection) {
@@ -2699,17 +2714,27 @@ export function makeTable(options) {
2699
2714
  let count = request.previousCount;
2700
2715
  if (count > 1000) count = 1000; // don't allow too many, we have to hold these in memory
2701
2716
  let startTime = request.startTime;
2717
+ let recordsSinceYield = 0;
2718
+
2702
2719
  if (isCollection) {
2703
2720
  // a collection should retrieve all descendant ids
2704
2721
  if (startTime) {
2705
2722
  if (count)
2706
2723
  throw new ClientError('startTime and previousCount can not be combined for a table level subscription');
2707
- // start time specified, get the audit history for this time range
2724
+ // start time specified, get the audit history for this time range. We drop real-time
2725
+ // messages during this loop because the snapshot:false cursor will pick them up itself.
2726
+ pendingRealTimeQueue = null;
2727
+ dropDuringReplay = true;
2728
+
2708
2729
  for (const auditRecord of auditStore.getRange({
2709
2730
  start: startTime,
2710
2731
  exclusiveStart: true,
2711
2732
  snapshot: false, // no need for a snapshot, audits don't change
2712
2733
  })) {
2734
+ if (++recordsSinceYield >= REPLAY_YIELD_INTERVAL) {
2735
+ recordsSinceYield = 0;
2736
+ await rest();
2737
+ }
2713
2738
  if (auditRecord.tableId !== tableId) continue;
2714
2739
  const id = auditRecord.recordId;
2715
2740
  if (thisId == null || isDescendantId(thisId, id)) {
@@ -2727,14 +2752,33 @@ export function makeTable(options) {
2727
2752
  if ((await subscription.waitForDrain()) === false) return;
2728
2753
  }
2729
2754
  }
2730
- // TODO: Would like to do this asynchronously, but would need to catch up on anything published during iteration
2731
- //await rest(); // yield for fairness
2732
- subscription.startTime = auditRecord.localTime; // update so we don't double send
2755
+ subscription!.startTime = auditRecord.localTime ?? auditRecord.version; // update so we don't double send
2733
2756
  }
2757
+ // No catch-up sweep needed. With snapshot:false (lmdb), notifyFromTransactionData
2758
+ // calls resetReadTxn before iterating, which bumps renewId; on the cursor's next
2759
+ // .next() it renews to a fresh txn whose snapshot is at least as recent. With
2760
+ // rocksdb, the audit-log iterator re-reads `_lastCommittedPosition` on each next()
2761
+ // (live tail). Either way, at loop exit subscription.startTime is at or past
2762
+ // lastTxnTime, and the gate in notifyFromTransactionData handles the handoff
2763
+ // once dropDuringReplay flips back.
2764
+ dropDuringReplay = false;
2734
2765
  } else if (count) {
2766
+ // Raise the listener's gate up front so that any in-flight 'committed' callbacks
2767
+ // for records the cursor will capture in `history` get gated out of
2768
+ // pendingRealTimeQueue rather than queued and re-emitted as duplicates after
2769
+ // history is sent. getNextMonotonicTime() returns a strictly-greater value than
2770
+ // any audit record's localTime issued so far — it's the same source Harper uses
2771
+ // to assign localTimes — so this gates exactly the records the cursor's
2772
+ // snapshot:true view can see. Anything committed strictly after this point will
2773
+ // pass the gate and reach the queue.
2774
+ subscription!.startTime = getNextMonotonicTime();
2735
2775
  const history = [];
2736
2776
  // we are collecting the history in reverse order to get the right count, then reversing to send
2737
2777
  for (const auditRecord of auditStore.getRange({ start: 'z', end: false, reverse: true })) {
2778
+ if (++recordsSinceYield >= REPLAY_YIELD_INTERVAL) {
2779
+ recordsSinceYield = 0;
2780
+ await rest();
2781
+ }
2738
2782
  try {
2739
2783
  if (auditRecord.tableId !== tableId) continue;
2740
2784
  const id = auditRecord.recordId;
@@ -2752,20 +2796,34 @@ export function makeTable(options) {
2752
2796
  } catch (error) {
2753
2797
  logger.error?.('Error getting history entry', auditRecord.localTime, error);
2754
2798
  }
2755
- // TODO: Would like to do this asynchronously, but would need to catch up on anything published during iteration
2756
- //await rest(); // yield for fairness
2757
2799
  }
2758
2800
  for (let i = history.length; i > 0; ) {
2759
2801
  send(history[--i]);
2760
2802
  }
2761
- if (history[0]) subscription.startTime = history[0].localTime; // update so don't double send
2762
2803
  } else if (!request.omitCurrent) {
2804
+ // Raise the listener's gate up front so that any in-flight 'committed' callbacks
2805
+ // for pre-subscribe commits (which haven't yet advanced lastTxnTime when subscribe
2806
+ // is called) get gated out of the queue. Otherwise the listener fires for them
2807
+ // during cursor yields and emits stale events the cursor either covered (current
2808
+ // state) or correctly skipped (e.g., deletes via `if (!value) continue`).
2809
+ // getNextMonotonicTime() is the same source Harper uses to assign audit record
2810
+ // localTimes, so the gate cuts at a precise instant in the same time domain.
2811
+ subscription!.startTime = getNextMonotonicTime();
2812
+
2813
+ // Retained-message semantics: subscriber may legitimately receive a record twice
2814
+ // if a post-subscribe write hits a key the cursor also visits. This is
2815
+ // idempotent for "current state then live updates" — both deliveries land at
2816
+ // the same final state. We don't dedupe.
2763
2817
  for (const { key: id, value, version, localTime, size } of primaryStore.getRange({
2764
2818
  start: thisId ?? false,
2765
2819
  end: thisId == null ? undefined : [thisId, MAXIMUM_KEY],
2766
2820
  versions: true,
2767
2821
  snapshot: false, // no need for a snapshot, just want the latest data
2768
2822
  })) {
2823
+ if (++recordsSinceYield >= REPLAY_YIELD_INTERVAL) {
2824
+ recordsSinceYield = 0;
2825
+ await rest();
2826
+ }
2769
2827
  if (!value) continue;
2770
2828
  send({ id, localTime, value, version, type: 'put', size });
2771
2829
  if (subscription.queue?.length > EVENT_HIGH_WATER_MARK) {
@@ -2791,13 +2849,19 @@ export function makeTable(options) {
2791
2849
  }
2792
2850
  logger.trace?.('Subscription from', startTime, 'from', thisId, localTime);
2793
2851
  if (startTime < localTime) {
2794
- // start time specified, get the audit history for this record
2852
+ // start time specified, get the audit history for this record. Set startTime up
2853
+ // front so the listener gate skips any in-flight 'committed' for this version
2854
+ // during the yields below — otherwise that event would be queued and drained as a
2855
+ // duplicate of the entry send.
2856
+ subscription!.startTime = localTime ?? entry?.version;
2795
2857
  const history = [];
2796
2858
  let nextTime = localTime;
2797
2859
  let nodeId = entry?.nodeId;
2798
2860
  do {
2799
- //TODO: Would like to do this asynchronously, but we will need to run catch after this to ensure we didn't miss anything
2800
- //await auditStore.prefetch([key]); // do it asynchronously for better fairness/concurrency and avoid page faults
2861
+ if (++recordsSinceYield >= REPLAY_YIELD_INTERVAL) {
2862
+ recordsSinceYield = 0;
2863
+ await rest();
2864
+ }
2801
2865
  const auditRecord = auditStore.getSync(nextTime, tableId, thisId, nodeId);
2802
2866
  if (auditRecord) {
2803
2867
  if (startTime < nextTime) {
@@ -2819,7 +2883,6 @@ export function makeTable(options) {
2819
2883
  for (let i = history.length; i > 0; ) {
2820
2884
  send(history[--i]);
2821
2885
  }
2822
- subscription.startTime = localTime; // make sure we don't re-broadcast the current version that we already sent
2823
2886
  }
2824
2887
  if (!request.omitCurrent && entry?.value) {
2825
2888
  // if retain and it exists, send the current value first
@@ -2831,10 +2894,12 @@ export function makeTable(options) {
2831
2894
  }
2832
2895
  }
2833
2896
  // now send any queued messages
2834
- for (const event of pendingRealTimeQueue) {
2835
- send(event);
2897
+ if (pendingRealTimeQueue) {
2898
+ for (const event of pendingRealTimeQueue) {
2899
+ send(event);
2900
+ }
2901
+ pendingRealTimeQueue = null;
2836
2902
  }
2837
- pendingRealTimeQueue = null;
2838
2903
  })();
2839
2904
  result.catch((error) => {
2840
2905
  harperLogger.error?.('Error in real-time subscription:', error);
@@ -2846,7 +2911,6 @@ export function makeTable(options) {
2846
2911
  }
2847
2912
  subscription.send(event);
2848
2913
  }
2849
- if (request.listener) subscription.on('data', request.listener);
2850
2914
  return subscription;
2851
2915
  }
2852
2916
 
@@ -4245,6 +4309,8 @@ export function makeTable(options) {
4245
4309
  Boolean(invalidated),
4246
4310
  auditRecord
4247
4311
  );
4312
+ // arm the eviction scanner, mirroring the .put() path
4313
+ if (sourceContext.expiresAt) scheduleCleanup();
4248
4314
  } else if (existingEntry) {
4249
4315
  logger.trace?.(
4250
4316
  `Deleting resolved record from source with id: ${id}, timestamp: ${new Date(txnTime).toISOString()}`
@@ -50,6 +50,7 @@ export type AuditRecord = {
50
50
  previousAdditionalAuditRefs?: Array<{ version: number; nodeId: number }>;
51
51
  endTxn?: boolean;
52
52
  structureVersion?: number;
53
+ getBinaryRecordId?: any;
53
54
  };
54
55
 
55
56
  const ENTRY_HEADER = Buffer.alloc(2816); // this is sized to be large enough for the maximum key size (1976) plus large usernames. We may want to consider some limits on usernames to ensure this all fits
@@ -73,6 +74,16 @@ export const transactionKeyEncoder = {
73
74
  if (buffer[start] === 66) {
74
75
  const dataView =
75
76
  buffer.dataView || (buffer.dataView = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength));
77
+ // Without this bounds check, a truncated key buffer escapes as RangeError up
78
+ // through lmdb-js's iterator and lands as an uncaughtException on a later tick,
79
+ // stalling outgoing replication for the affected (peer, db) pair.
80
+ if (start + 8 > buffer.byteLength) {
81
+ harperLogger.warn('Audit key buffer too short for float64 read; returning NaN sentinel', {
82
+ start,
83
+ byteLength: buffer.byteLength,
84
+ });
85
+ return NaN;
86
+ }
76
87
  return dataView.getFloat64(start);
77
88
  } else {
78
89
  return readKey(buffer, start, end);
@@ -439,6 +450,15 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
439
450
  const nodeId = decoder.readInt();
440
451
  const tableId = decoder.readInt();
441
452
  let length = decoder.readInt();
453
+ // A corrupt length field (e.g., a 0xff-prefixed uint32) would otherwise push
454
+ // decoder.position hundreds of megabytes past the buffer; the next readFloat64
455
+ // then throws with the bogus position in the message. Failing fast here keeps
456
+ // the throw inside this try/catch so we surface a sentinel instead.
457
+ if (length < 0 || decoder.position + length > buffer.byteLength) {
458
+ throw new RangeError(
459
+ `Audit entry recordId length ${length} exceeds remaining buffer (position ${decoder.position}, byteLength ${buffer.byteLength})`
460
+ );
461
+ }
442
462
  const recordIdStart = decoder.position;
443
463
  const recordIdEnd = (decoder.position += length);
444
464
  // TODO: Once we support multiple format versions, we can conditionally read the version (and the previousResidencyId)
@@ -469,6 +489,11 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
469
489
  }
470
490
  }
471
491
  length = decoder.readInt();
492
+ if (length < 0 || decoder.position + length > buffer.byteLength) {
493
+ throw new RangeError(
494
+ `Audit entry username length ${length} exceeds remaining buffer (position ${decoder.position}, byteLength ${buffer.byteLength})`
495
+ );
496
+ }
472
497
  const usernameStart = decoder.position;
473
498
  const usernameEnd = (decoder.position += length);
474
499
  let value: any;
@@ -477,8 +502,17 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
477
502
  tableId,
478
503
  nodeId,
479
504
  get recordId() {
480
- // use a subarray to protect against the underlying buffer being modified
481
- return readKey(buffer.subarray(0, recordIdEnd), recordIdStart, recordIdEnd);
505
+ // The recordId is decoded lazily and lives outside readAuditEntry's try/catch,
506
+ // so a corrupt recordId region would otherwise escape as an uncaught RangeError
507
+ // on property access. Catch and return undefined; callers already treat missing
508
+ // recordId as a skip-eligible entry.
509
+ try {
510
+ // use a subarray to protect against the underlying buffer being modified
511
+ return readKey(buffer.subarray(0, recordIdEnd), recordIdStart, recordIdEnd);
512
+ } catch (error) {
513
+ harperLogger.warn('Failed to decode audit recordId; treating as corrupt', error);
514
+ return undefined;
515
+ }
482
516
  },
483
517
  getBinaryRecordId() {
484
518
  return buffer.subarray(recordIdStart, recordIdEnd);
@@ -486,9 +520,14 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
486
520
  version,
487
521
  previousVersion,
488
522
  get user() {
489
- return usernameEnd > usernameStart
490
- ? readKey(buffer.subarray(0, usernameEnd), usernameStart, usernameEnd)
491
- : undefined;
523
+ try {
524
+ return usernameEnd > usernameStart
525
+ ? readKey(buffer.subarray(0, usernameEnd), usernameStart, usernameEnd)
526
+ : undefined;
527
+ } catch (error) {
528
+ harperLogger.warn('Failed to decode audit username; treating as corrupt', error);
529
+ return undefined;
530
+ }
492
531
  },
493
532
  get encoded() {
494
533
  return start ? buffer.subarray(start, end) : buffer;
@@ -523,10 +562,52 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
523
562
  };
524
563
  } catch (error) {
525
564
  harperLogger.error('Reading audit entry error', error, buffer);
526
- return {};
565
+ return createCorruptAuditSentinel(buffer, start, end);
527
566
  }
528
567
  }
529
568
 
569
+ /**
570
+ * Build a structurally complete audit record for an entry that failed to decode. The fields
571
+ * mirror the happy-path shape so downstream consumers that access (e.g.) `getValue` or the
572
+ * `recordId` getter don't blow up with a `TypeError: not a function` / `undefined.is(...)`
573
+ * after the header decode already failed. Consumers identify these by the undefined
574
+ * `tableId`/`type` (the same signal lmdb has produced from this catch since before this
575
+ * change) and skip them — `classifyAuditEntryForReplay` calls them out as `corrupt-header`,
576
+ * and the dispatch loops in Table.ts / transactionBroadcast.ts filter via tableId guards.
577
+ */
578
+ function createCorruptAuditSentinel(buffer: Uint8Array, start: number, end: number | undefined): AuditRecord {
579
+ return {
580
+ type: undefined,
581
+ tableId: undefined,
582
+ nodeId: undefined,
583
+ recordId: undefined,
584
+ version: undefined,
585
+ previousVersion: undefined,
586
+ user: undefined,
587
+ extendedType: undefined,
588
+ residencyId: undefined,
589
+ previousResidencyId: undefined,
590
+ expiresAt: undefined,
591
+ originatingOperation: undefined,
592
+ previousAdditionalAuditRefs: undefined,
593
+ get encoded() {
594
+ return start ? buffer.subarray(start, end) : buffer;
595
+ },
596
+ get size() {
597
+ return start !== undefined && end !== undefined ? end - start : buffer.byteLength;
598
+ },
599
+ getBinaryRecordId() {
600
+ return undefined;
601
+ },
602
+ getValue() {
603
+ return undefined;
604
+ },
605
+ getBinaryValue() {
606
+ return undefined;
607
+ },
608
+ } as any;
609
+ }
610
+
530
611
  export class Decoder extends DataView<ArrayBufferLike> {
531
612
  position = 0;
532
613
  readInt() {
@@ -1063,6 +1063,7 @@ export function table<TableResourceType>(tableDefinition: TableDefinition): Tabl
1063
1063
  const dbi = openIndex(dbiKey, rootStore, attribute);
1064
1064
  if (
1065
1065
  changed ||
1066
+ attributeDescriptor.indexingFailed ||
1066
1067
  (attributeDescriptor.indexingPID && attributeDescriptor.indexingPID !== process.pid) ||
1067
1068
  attributeDescriptor.restartNumber < workerData?.restartNumber
1068
1069
  ) {
@@ -1071,6 +1072,7 @@ export function table<TableResourceType>(tableDefinition: TableDefinition): Tabl
1071
1072
  attributeDescriptor = attributesDbi.getSync(dbiKey);
1072
1073
  if (
1073
1074
  changed ||
1075
+ attributeDescriptor.indexingFailed ||
1074
1076
  (attributeDescriptor.indexingPID && attributeDescriptor.indexingPID !== process.pid) ||
1075
1077
  attributeDescriptor.restartNumber < workerData?.restartNumber
1076
1078
  ) {
@@ -1084,14 +1086,20 @@ export function table<TableResourceType>(tableDefinition: TableDefinition): Tabl
1084
1086
  if (hasExistingData) {
1085
1087
  attribute.lastIndexedKey = attributeDescriptor?.lastIndexedKey ?? undefined;
1086
1088
  attribute.indexingPID = process.pid;
1089
+ delete attribute.indexingFailed; // clear failure flag for the new run
1087
1090
  dbi.isIndexing = true;
1088
- Object.defineProperty(attribute, 'dbi', { value: dbi });
1091
+ Object.defineProperty(attribute, 'dbi', { value: dbi, configurable: true, enumerable: false });
1089
1092
  // we only set indexing nulls to true if new or reindexing, we can't have partial indexing of null
1090
1093
  attributesToIndex.push(attribute);
1091
1094
  }
1092
1095
  }
1093
1096
  attributesDbi.put(dbiKey, attribute);
1094
1097
  }
1098
+ // If a migration is in progress (indexingPID set), any newly opened dbi must also
1099
+ // reflect isIndexing = true. A resetDatabases() during an active runIndexing creates
1100
+ // a new dbi object; without this, queries could use the new dbi (isIndexing = false)
1101
+ // and return incomplete results while the backfill is still running.
1102
+ if (attributeDescriptor?.indexingPID) dbi.isIndexing = true;
1095
1103
  if (attributeDescriptor?.indexNulls && attribute.indexNulls === undefined) attribute.indexNulls = true;
1096
1104
  dbi.indexNulls = attribute.indexNulls;
1097
1105
  indices[attribute.name] = dbi;
@@ -1162,6 +1170,7 @@ async function runIndexing(Table, attributes, indicesToRemove) {
1162
1170
  lastResolution = index.drop();
1163
1171
  }
1164
1172
  let interrupted;
1173
+ let hadIndexingErrors = false;
1165
1174
  const attributeErrorReported = {};
1166
1175
  let indexed = 0;
1167
1176
  const attributesLength = attributes.length;
@@ -1215,6 +1224,7 @@ async function runIndexing(Table, attributes, indicesToRemove) {
1215
1224
  }
1216
1225
  }
1217
1226
  } catch (error) {
1227
+ hadIndexingErrors = true;
1218
1228
  if (!attributeErrorReported[property]) {
1219
1229
  // just report an indexing error once per attribute so we don't spam the logs
1220
1230
  attributeErrorReported[property] = true;
@@ -1227,6 +1237,7 @@ async function runIndexing(Table, attributes, indicesToRemove) {
1227
1237
  () => outstanding--,
1228
1238
  (error) => {
1229
1239
  outstanding--;
1240
+ hadIndexingErrors = true;
1230
1241
  logger.error(error);
1231
1242
  }
1232
1243
  );
@@ -1244,20 +1255,69 @@ async function runIndexing(Table, attributes, indicesToRemove) {
1244
1255
  if (outstanding > MAX_OUTSTANDING_INDEXING) await lastResolution;
1245
1256
  else if (outstanding > MIN_OUTSTANDING_INDEXING) await new Promise((resolve) => setImmediate(resolve)); // yield event turn, don't want to use all computation
1246
1257
  }
1258
+ }
1259
+ // Await the last pending put. If it rejects, that is also an indexing error.
1260
+ // Note: the when() calls above already attach rejection handlers to each record's
1261
+ // last-put promise; this try-catch specifically handles the case where lastResolution
1262
+ // itself rejects (i.e. the very last put in the loop failed) which would otherwise
1263
+ // throw past the hadIndexingErrors check to the outer catch. The broader issue of
1264
+ // unhandled rejections from non-last puts in multi-value attributes is pre-existing
1265
+ // and out of scope for this fix.
1266
+ try {
1267
+ await lastResolution;
1268
+ } catch (error) {
1269
+ hadIndexingErrors = true;
1270
+ logger.error(error);
1271
+ }
1272
+ // Yield one more event turn so any queued when() error callbacks (which fire as
1273
+ // microtasks when their tracked promise settles) have a chance to set hadIndexingErrors
1274
+ // before we decide whether to mark indexing as complete.
1275
+ await new Promise((resolve) => setImmediate(resolve));
1276
+ if (hadIndexingErrors) {
1277
+ // Some records failed to index. Persist the failure marker in the descriptor so
1278
+ // the next call to table() (including after a restart with a fresh PID) re-triggers
1279
+ // the backfill from the last checkpoint. Do NOT clear indexingPID or isIndexing —
1280
+ // leave the index in its incomplete state so queries return 503 "not indexed yet"
1281
+ // rather than silently returning partial results. This is the key fix for the
1282
+ // serent-canopy issue #135 fingerprint: a completed migration with transient errors
1283
+ // (e.g. ERR_BUSY from RocksDB under load) leaving gaps while appearing successful.
1284
+ for (const attribute of attributes) {
1285
+ attribute.indexingFailed = true;
1286
+ // Preserve lastIndexedKey so the retry resumes from the last checkpoint.
1287
+ lastResolution = Table.dbisDB.put(attribute.key, attribute);
1288
+ // Keep isIndexing = true on both the attribute.dbi and the currently-active dbi
1289
+ // in Table.indices (which may differ if resetDatabases() ran during this pass).
1290
+ attribute.dbi.isIndexing = true;
1291
+ const activeDbi = Table.indices[attribute.name];
1292
+ if (activeDbi) activeDbi.isIndexing = true;
1293
+ }
1294
+ await lastResolution;
1295
+ logger.warn(
1296
+ `Indexing of ${Table.tableName} encountered errors on some records - index will remain incomplete. ` +
1297
+ `On next restart the migration will be retried from the last checkpoint (indexingFailed=true). ` +
1298
+ `Affected attributes: ${attributes.map((a) => a.name).join(', ')}`
1299
+ );
1300
+ } else {
1247
1301
  // update the attributes to indicate that we are finished
1248
1302
  for (const attribute of attributes) {
1249
1303
  delete attribute.lastIndexedKey;
1250
1304
  delete attribute.indexingPID;
1305
+ delete attribute.indexingFailed;
1251
1306
  attribute.dbi.isIndexing = false;
1307
+ // Also clear isIndexing on the currently-active dbi in Table.indices, which may
1308
+ // differ from attribute.dbi if a resetDatabases() call during this migration
1309
+ // opened a new dbi and registered it there.
1310
+ const activeDbi = Table.indices[attribute.name];
1311
+ if (activeDbi) activeDbi.isIndexing = false;
1252
1312
  lastResolution = Table.dbisDB.put(attribute.key, attribute);
1253
1313
  }
1314
+ await lastResolution;
1315
+ // now notify all the threads that we are done and the index is ready to use
1316
+ await signalling.signalSchemaChange(
1317
+ new SchemaEventMsg(process.pid, 'indexing-finished', Table.databaseName, Table.tableName)
1318
+ );
1319
+ logger.info(`Finished indexing ${Table.tableName} attributes`, attributes);
1254
1320
  }
1255
- await lastResolution;
1256
- // now notify all the threads that we are done and the index is ready to use
1257
- await signalling.signalSchemaChange(
1258
- new SchemaEventMsg(process.pid, 'indexing-finished', Table.databaseName, Table.tableName)
1259
- );
1260
- logger.info(`Finished indexing ${Table.tableName} attributes`, attributes);
1261
1321
  } catch (error) {
1262
1322
  logger.error('Error in indexing', error);
1263
1323
  }