harper 5.0.17 → 5.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import { Decoder, readAuditEntry, ENTRY_DATAVIEW, AuditRecord, createAuditEntry
5
5
  import { isMainThread } from 'node:worker_threads';
6
6
  import { EventEmitter } from 'node:events';
7
7
  import { asBinary } from 'lmdb';
8
+ import * as harperLogger from '../utility/logging/harper_logger.ts';
8
9
 
9
10
  if (!process.env.HARPER_NO_FLUSH_ON_EXIT && isMainThread) {
10
11
  // we want to be able to test log replay
@@ -288,6 +289,7 @@ export class RocksTransactionLogStore extends EventEmitter {
288
289
  iterable.iterate = () => aggregateIterator;
289
290
  }
290
291
  const mappedAggregateIterable = iterable.map(({ timestamp, data, endTxn }: TransactionEntry) => {
292
+ <<<<<<< HEAD
291
293
  const decoder = new Decoder(data.buffer, data.byteOffset, data.byteLength);
292
294
  data.dataView = decoder;
293
295
  // This represents the data that shouldn't be transferred for replication
@@ -311,6 +313,55 @@ export class RocksTransactionLogStore extends EventEmitter {
311
313
  auditRecord.previousVersion = previousVersion;
312
314
  auditRecord.structureVersion = structureVersion & 0x00ffffff;
313
315
  return auditRecord;
316
+ =======
317
+ // Per-entry try/catch: a corrupt rocks prelude (first 4-16 bytes) would otherwise
318
+ // throw a raw `RangeError: Offset is outside the bounds of the DataView` out
319
+ // through `iterable.map`, escape the for-of consumer, and land as an
320
+ // uncaughtException on a later tick — stalling outgoing replication at the
321
+ // failing offset on every catch-up attempt. On error, yield a sentinel record
322
+ // with the timestamp preserved so iteration advances past the bad entry;
323
+ // downstream consumers already skip records with no `tableId`/`type`.
324
+ try {
325
+ const decoder = new Decoder(data.buffer, data.byteOffset, data.byteLength);
326
+ (data as any).dataView = decoder;
327
+ // This represents the data that shouldn't be transferred for replication
328
+ let structureVersion = decoder.getUint32(0);
329
+ let position = 4;
330
+ let previousResidencyId: number;
331
+ let previousVersion: number;
332
+ if (structureVersion & HAS_PREVIOUS_RESIDENCY_ID) {
333
+ previousResidencyId = decoder.getUint32(position);
334
+ position += 4;
335
+ }
336
+ if (structureVersion & HAS_PREVIOUS_VERSION) {
337
+ // does previous residency id and version actually require separate flags?
338
+ previousVersion = decoder.getFloat64(position);
339
+ position += 8;
340
+ }
341
+ const auditRecord = readAuditEntry(data, position, undefined);
342
+ auditRecord.version = timestamp;
343
+ auditRecord.endTxn = endTxn;
344
+ auditRecord.previousResidencyId = previousResidencyId;
345
+ auditRecord.previousVersion = previousVersion;
346
+ auditRecord.structureVersion = structureVersion & 0x00ffffff;
347
+ return auditRecord;
348
+ } catch (error) {
349
+ harperLogger.error('Failed to decode rocks transaction log entry; skipping', error, {
350
+ timestamp,
351
+ byteLength: data?.byteLength,
352
+ });
353
+ return {
354
+ version: timestamp,
355
+ endTxn,
356
+ type: undefined,
357
+ tableId: undefined,
358
+ recordId: undefined,
359
+ getValue: () => undefined,
360
+ getBinaryValue: () => undefined,
361
+ getBinaryRecordId: () => undefined,
362
+ } as unknown as AuditRecord;
363
+ }
364
+ >>>>>>> b84fbbd (fix: skip corrupt audit entries during iteration instead of throwing)
314
365
  });
315
366
  // Add methods to the mapped iterable if we have an aggregate iterator
316
367
  if (aggregateIterator?.addLog) {
@@ -805,23 +805,23 @@ export function makeTable(options) {
805
805
  /**
806
806
  * Set TTL expiration for records in this table. On retrieval, record timestamps are checked for expiration.
807
807
  * This also informs the scheduling for record eviction.
808
- * @param expirationTime Time in seconds until records expire (are stale)
809
- * @param evictionTime Time in seconds until records are evicted (removed)
808
+ * @param opts Time in seconds until records expire, or an options object with `expiration`, `eviction`,
809
+ * and `scanInterval` (all in seconds, all optional). Number form preserves any previously configured
810
+ * eviction/scanInterval; object form replaces all three.
810
811
  */
811
- static setTTLExpiration(expiration: number | { expiration: number; eviction?: number; scanInterval?: number }) {
812
- // we set up a timer to remove expired entries. we only want the timer/reaper to run in one thread,
813
- // so we use the first one
814
- if (typeof expiration === 'number') {
815
- expirationMs = expiration * 1000;
816
- if (!evictionMs) evictionMs = 0; // by default, no extra time for eviction
817
- } else if (expiration && typeof expiration === 'object') {
818
- // an object with expiration times/options specified
819
- expirationMs = expiration.expiration * 1000;
820
- evictionMs = (expiration.eviction || 0) * 1000;
821
- cleanupInterval = expiration.scanInterval * 1000;
822
- } else throw new Error('Invalid expiration value type');
812
+ static setTTLExpiration(opts: number | { expiration?: number; eviction?: number; scanInterval?: number }) {
813
+ if (opts == null || (typeof opts !== 'number' && typeof opts !== 'object'))
814
+ throw new Error('Invalid expiration value type');
815
+ if (typeof opts === 'number') {
816
+ expirationMs = opts * 1000;
817
+ } else {
818
+ // `??` so an explicit 0 is treated as the user's chosen value, not as "missing"
819
+ expirationMs = (opts.expiration ?? 0) * 1000;
820
+ evictionMs = (opts.eviction ?? 0) * 1000;
821
+ cleanupInterval = (opts.scanInterval ?? 0) * 1000;
822
+ }
823
823
  if (expirationMs < 0) throw new Error('Expiration can not be negative');
824
- // default to one quarter of the total eviction time, and make sure it fits into a 32-bit signed integer
824
+ // default to one quarter of the total expiration+eviction window
825
825
  cleanupInterval = cleanupInterval || (expirationMs + evictionMs) / 4;
826
826
  scheduleCleanup();
827
827
  }
@@ -4245,6 +4245,8 @@ export function makeTable(options) {
4245
4245
  Boolean(invalidated),
4246
4246
  auditRecord
4247
4247
  );
4248
+ // arm the eviction scanner, mirroring the .put() path
4249
+ if (sourceContext.expiresAt) scheduleCleanup();
4248
4250
  } else if (existingEntry) {
4249
4251
  logger.trace?.(
4250
4252
  `Deleting resolved record from source with id: ${id}, timestamp: ${new Date(txnTime).toISOString()}`
@@ -49,7 +49,15 @@ export type AuditRecord = {
49
49
  previousNodeId?: number;
50
50
  previousAdditionalAuditRefs?: Array<{ version: number; nodeId: number }>;
51
51
  endTxn?: boolean;
52
+ <<<<<<< HEAD
52
53
  structureVersion?: number;
54
+ =======
55
+ getBinaryRecordId?: any;
56
+ <<<<<<< HEAD
57
+ corrupt?: boolean;
58
+ >>>>>>> b84fbbd (fix: skip corrupt audit entries during iteration instead of throwing)
59
+ =======
60
+ >>>>>>> 6b6192c (test: cover lmdb keyEncoder and rocks-prelude paths; drop unused corrupt flag)
53
61
  };
54
62
 
55
63
  const ENTRY_HEADER = Buffer.alloc(2816); // this is sized to be large enough for the maximum key size (1976) plus large usernames. We may want to consider some limits on usernames to ensure this all fits
@@ -73,6 +81,16 @@ export const transactionKeyEncoder = {
73
81
  if (buffer[start] === 66) {
74
82
  const dataView =
75
83
  buffer.dataView || (buffer.dataView = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength));
84
+ // Without this bounds check, a truncated key buffer escapes as RangeError up
85
+ // through lmdb-js's iterator and lands as an uncaughtException on a later tick,
86
+ // stalling outgoing replication for the affected (peer, db) pair.
87
+ if (start + 8 > buffer.byteLength) {
88
+ harperLogger.warn('Audit key buffer too short for float64 read; returning NaN sentinel', {
89
+ start,
90
+ byteLength: buffer.byteLength,
91
+ });
92
+ return NaN;
93
+ }
76
94
  return dataView.getFloat64(start);
77
95
  } else {
78
96
  return readKey(buffer, start, end);
@@ -439,6 +457,15 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
439
457
  const nodeId = decoder.readInt();
440
458
  const tableId = decoder.readInt();
441
459
  let length = decoder.readInt();
460
+ // A corrupt length field (e.g., a 0xff-prefixed uint32) would otherwise push
461
+ // decoder.position hundreds of megabytes past the buffer; the next readFloat64
462
+ // then throws with the bogus position in the message. Failing fast here keeps
463
+ // the throw inside this try/catch so we surface a sentinel instead.
464
+ if (length < 0 || decoder.position + length > buffer.byteLength) {
465
+ throw new RangeError(
466
+ `Audit entry recordId length ${length} exceeds remaining buffer (position ${decoder.position}, byteLength ${buffer.byteLength})`
467
+ );
468
+ }
442
469
  const recordIdStart = decoder.position;
443
470
  const recordIdEnd = (decoder.position += length);
444
471
  // TODO: Once we support multiple format versions, we can conditionally read the version (and the previousResidencyId)
@@ -469,6 +496,11 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
469
496
  }
470
497
  }
471
498
  length = decoder.readInt();
499
+ if (length < 0 || decoder.position + length > buffer.byteLength) {
500
+ throw new RangeError(
501
+ `Audit entry username length ${length} exceeds remaining buffer (position ${decoder.position}, byteLength ${buffer.byteLength})`
502
+ );
503
+ }
472
504
  const usernameStart = decoder.position;
473
505
  const usernameEnd = (decoder.position += length);
474
506
  let value: any;
@@ -477,8 +509,17 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
477
509
  tableId,
478
510
  nodeId,
479
511
  get recordId() {
480
- // use a subarray to protect against the underlying buffer being modified
481
- return readKey(buffer.subarray(0, recordIdEnd), recordIdStart, recordIdEnd);
512
+ // The recordId is decoded lazily and lives outside readAuditEntry's try/catch,
513
+ // so a corrupt recordId region would otherwise escape as an uncaught RangeError
514
+ // on property access. Catch and return undefined; callers already treat missing
515
+ // recordId as a skip-eligible entry.
516
+ try {
517
+ // use a subarray to protect against the underlying buffer being modified
518
+ return readKey(buffer.subarray(0, recordIdEnd), recordIdStart, recordIdEnd);
519
+ } catch (error) {
520
+ harperLogger.warn('Failed to decode audit recordId; treating as corrupt', error);
521
+ return undefined;
522
+ }
482
523
  },
483
524
  getBinaryRecordId() {
484
525
  return buffer.subarray(recordIdStart, recordIdEnd);
@@ -486,9 +527,14 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
486
527
  version,
487
528
  previousVersion,
488
529
  get user() {
489
- return usernameEnd > usernameStart
490
- ? readKey(buffer.subarray(0, usernameEnd), usernameStart, usernameEnd)
491
- : undefined;
530
+ try {
531
+ return usernameEnd > usernameStart
532
+ ? readKey(buffer.subarray(0, usernameEnd), usernameStart, usernameEnd)
533
+ : undefined;
534
+ } catch (error) {
535
+ harperLogger.warn('Failed to decode audit username; treating as corrupt', error);
536
+ return undefined;
537
+ }
492
538
  },
493
539
  get encoded() {
494
540
  return start ? buffer.subarray(start, end) : buffer;
@@ -523,10 +569,56 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
523
569
  };
524
570
  } catch (error) {
525
571
  harperLogger.error('Reading audit entry error', error, buffer);
572
+ <<<<<<< HEAD
526
573
  return {};
574
+ =======
575
+ return createCorruptAuditSentinel(buffer, start, end);
576
+ >>>>>>> b84fbbd (fix: skip corrupt audit entries during iteration instead of throwing)
527
577
  }
528
578
  }
529
579
 
580
+ /**
581
+ * Build a structurally complete audit record for an entry that failed to decode. The fields
582
+ * mirror the happy-path shape so downstream consumers that access (e.g.) `getValue` or the
583
+ * `recordId` getter don't blow up with a `TypeError: not a function` / `undefined.is(...)`
584
+ * after the header decode already failed. Consumers identify these by the undefined
585
+ * `tableId`/`type` (the same signal lmdb has produced from this catch since before this
586
+ * change) and skip them — `classifyAuditEntryForReplay` calls them out as `corrupt-header`,
587
+ * and the dispatch loops in Table.ts / transactionBroadcast.ts filter via tableId guards.
588
+ */
589
+ function createCorruptAuditSentinel(buffer: Uint8Array, start: number, end: number | undefined): AuditRecord {
590
+ return {
591
+ type: undefined,
592
+ tableId: undefined,
593
+ nodeId: undefined,
594
+ recordId: undefined,
595
+ version: undefined,
596
+ previousVersion: undefined,
597
+ user: undefined,
598
+ extendedType: undefined,
599
+ residencyId: undefined,
600
+ previousResidencyId: undefined,
601
+ expiresAt: undefined,
602
+ originatingOperation: undefined,
603
+ previousAdditionalAuditRefs: undefined,
604
+ get encoded() {
605
+ return start ? buffer.subarray(start, end) : buffer;
606
+ },
607
+ get size() {
608
+ return start !== undefined && end !== undefined ? end - start : buffer.byteLength;
609
+ },
610
+ getBinaryRecordId() {
611
+ return undefined;
612
+ },
613
+ getValue() {
614
+ return undefined;
615
+ },
616
+ getBinaryValue() {
617
+ return undefined;
618
+ },
619
+ } as any;
620
+ }
621
+
530
622
  export class Decoder extends DataView<ArrayBufferLike> {
531
623
  position = 0;
532
624
  readInt() {
@@ -1063,6 +1063,7 @@ export function table<TableResourceType>(tableDefinition: TableDefinition): Tabl
1063
1063
  const dbi = openIndex(dbiKey, rootStore, attribute);
1064
1064
  if (
1065
1065
  changed ||
1066
+ attributeDescriptor.indexingFailed ||
1066
1067
  (attributeDescriptor.indexingPID && attributeDescriptor.indexingPID !== process.pid) ||
1067
1068
  attributeDescriptor.restartNumber < workerData?.restartNumber
1068
1069
  ) {
@@ -1071,6 +1072,7 @@ export function table<TableResourceType>(tableDefinition: TableDefinition): Tabl
1071
1072
  attributeDescriptor = attributesDbi.getSync(dbiKey);
1072
1073
  if (
1073
1074
  changed ||
1075
+ attributeDescriptor.indexingFailed ||
1074
1076
  (attributeDescriptor.indexingPID && attributeDescriptor.indexingPID !== process.pid) ||
1075
1077
  attributeDescriptor.restartNumber < workerData?.restartNumber
1076
1078
  ) {
@@ -1084,14 +1086,20 @@ export function table<TableResourceType>(tableDefinition: TableDefinition): Tabl
1084
1086
  if (hasExistingData) {
1085
1087
  attribute.lastIndexedKey = attributeDescriptor?.lastIndexedKey ?? undefined;
1086
1088
  attribute.indexingPID = process.pid;
1089
+ delete attribute.indexingFailed; // clear failure flag for the new run
1087
1090
  dbi.isIndexing = true;
1088
- Object.defineProperty(attribute, 'dbi', { value: dbi });
1091
+ Object.defineProperty(attribute, 'dbi', { value: dbi, configurable: true, enumerable: false });
1089
1092
  // we only set indexing nulls to true if new or reindexing, we can't have partial indexing of null
1090
1093
  attributesToIndex.push(attribute);
1091
1094
  }
1092
1095
  }
1093
1096
  attributesDbi.put(dbiKey, attribute);
1094
1097
  }
1098
+ // If a migration is in progress (indexingPID set), any newly opened dbi must also
1099
+ // reflect isIndexing = true. A resetDatabases() during an active runIndexing creates
1100
+ // a new dbi object; without this, queries could use the new dbi (isIndexing = false)
1101
+ // and return incomplete results while the backfill is still running.
1102
+ if (attributeDescriptor?.indexingPID) dbi.isIndexing = true;
1095
1103
  if (attributeDescriptor?.indexNulls && attribute.indexNulls === undefined) attribute.indexNulls = true;
1096
1104
  dbi.indexNulls = attribute.indexNulls;
1097
1105
  indices[attribute.name] = dbi;
@@ -1162,6 +1170,7 @@ async function runIndexing(Table, attributes, indicesToRemove) {
1162
1170
  lastResolution = index.drop();
1163
1171
  }
1164
1172
  let interrupted;
1173
+ let hadIndexingErrors = false;
1165
1174
  const attributeErrorReported = {};
1166
1175
  let indexed = 0;
1167
1176
  const attributesLength = attributes.length;
@@ -1215,6 +1224,7 @@ async function runIndexing(Table, attributes, indicesToRemove) {
1215
1224
  }
1216
1225
  }
1217
1226
  } catch (error) {
1227
+ hadIndexingErrors = true;
1218
1228
  if (!attributeErrorReported[property]) {
1219
1229
  // just report an indexing error once per attribute so we don't spam the logs
1220
1230
  attributeErrorReported[property] = true;
@@ -1227,6 +1237,7 @@ async function runIndexing(Table, attributes, indicesToRemove) {
1227
1237
  () => outstanding--,
1228
1238
  (error) => {
1229
1239
  outstanding--;
1240
+ hadIndexingErrors = true;
1230
1241
  logger.error(error);
1231
1242
  }
1232
1243
  );
@@ -1244,20 +1255,69 @@ async function runIndexing(Table, attributes, indicesToRemove) {
1244
1255
  if (outstanding > MAX_OUTSTANDING_INDEXING) await lastResolution;
1245
1256
  else if (outstanding > MIN_OUTSTANDING_INDEXING) await new Promise((resolve) => setImmediate(resolve)); // yield event turn, don't want to use all computation
1246
1257
  }
1258
+ }
1259
+ // Await the last pending put. If it rejects, that is also an indexing error.
1260
+ // Note: the when() calls above already attach rejection handlers to each record's
1261
+ // last-put promise; this try-catch specifically handles the case where lastResolution
1262
+ // itself rejects (i.e. the very last put in the loop failed) which would otherwise
1263
+ // throw past the hadIndexingErrors check to the outer catch. The broader issue of
1264
+ // unhandled rejections from non-last puts in multi-value attributes is pre-existing
1265
+ // and out of scope for this fix.
1266
+ try {
1267
+ await lastResolution;
1268
+ } catch (error) {
1269
+ hadIndexingErrors = true;
1270
+ logger.error(error);
1271
+ }
1272
+ // Yield one more event turn so any queued when() error callbacks (which fire as
1273
+ // microtasks when their tracked promise settles) have a chance to set hadIndexingErrors
1274
+ // before we decide whether to mark indexing as complete.
1275
+ await new Promise((resolve) => setImmediate(resolve));
1276
+ if (hadIndexingErrors) {
1277
+ // Some records failed to index. Persist the failure marker in the descriptor so
1278
+ // the next call to table() (including after a restart with a fresh PID) re-triggers
1279
+ // the backfill from the last checkpoint. Do NOT clear indexingPID or isIndexing —
1280
+ // leave the index in its incomplete state so queries return 503 "not indexed yet"
1281
+ // rather than silently returning partial results. This is the key fix for the
1282
+ // serent-canopy issue #135 fingerprint: a completed migration with transient errors
1283
+ // (e.g. ERR_BUSY from RocksDB under load) leaving gaps while appearing successful.
1284
+ for (const attribute of attributes) {
1285
+ attribute.indexingFailed = true;
1286
+ // Preserve lastIndexedKey so the retry resumes from the last checkpoint.
1287
+ lastResolution = Table.dbisDB.put(attribute.key, attribute);
1288
+ // Keep isIndexing = true on both the attribute.dbi and the currently-active dbi
1289
+ // in Table.indices (which may differ if resetDatabases() ran during this pass).
1290
+ attribute.dbi.isIndexing = true;
1291
+ const activeDbi = Table.indices[attribute.name];
1292
+ if (activeDbi) activeDbi.isIndexing = true;
1293
+ }
1294
+ await lastResolution;
1295
+ logger.warn(
1296
+ `Indexing of ${Table.tableName} encountered errors on some records - index will remain incomplete. ` +
1297
+ `On next restart the migration will be retried from the last checkpoint (indexingFailed=true). ` +
1298
+ `Affected attributes: ${attributes.map((a) => a.name).join(', ')}`
1299
+ );
1300
+ } else {
1247
1301
  // update the attributes to indicate that we are finished
1248
1302
  for (const attribute of attributes) {
1249
1303
  delete attribute.lastIndexedKey;
1250
1304
  delete attribute.indexingPID;
1305
+ delete attribute.indexingFailed;
1251
1306
  attribute.dbi.isIndexing = false;
1307
+ // Also clear isIndexing on the currently-active dbi in Table.indices, which may
1308
+ // differ from attribute.dbi if a resetDatabases() call during this migration
1309
+ // opened a new dbi and registered it there.
1310
+ const activeDbi = Table.indices[attribute.name];
1311
+ if (activeDbi) activeDbi.isIndexing = false;
1252
1312
  lastResolution = Table.dbisDB.put(attribute.key, attribute);
1253
1313
  }
1314
+ await lastResolution;
1315
+ // now notify all the threads that we are done and the index is ready to use
1316
+ await signalling.signalSchemaChange(
1317
+ new SchemaEventMsg(process.pid, 'indexing-finished', Table.databaseName, Table.tableName)
1318
+ );
1319
+ logger.info(`Finished indexing ${Table.tableName} attributes`, attributes);
1254
1320
  }
1255
- await lastResolution;
1256
- // now notify all the threads that we are done and the index is ready to use
1257
- await signalling.signalSchemaChange(
1258
- new SchemaEventMsg(process.pid, 'indexing-finished', Table.databaseName, Table.tableName)
1259
- );
1260
- logger.info(`Finished indexing ${Table.tableName} attributes`, attributes);
1261
1321
  } catch (error) {
1262
1322
  logger.error('Error in indexing', error);
1263
1323
  }