npm - harper - Versions diffs - 5.0.17 → 5.0.18 - Mend

harper 5.0.17 → 5.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/resources/RocksTransactionLogStore.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import { Decoder, readAuditEntry, ENTRY_DATAVIEW, AuditRecord, createAuditEntry
 import { isMainThread } from 'node:worker_threads';
 import { EventEmitter } from 'node:events';
 import { asBinary } from 'lmdb';
+import * as harperLogger from '../utility/logging/harper_logger.ts';
 if (!process.env.HARPER_NO_FLUSH_ON_EXIT && isMainThread) {
 	// we want to be able to test log replay
@@ -288,6 +289,7 @@ export class RocksTransactionLogStore extends EventEmitter {
 			iterable.iterate = () => aggregateIterator;
 		}
 		const mappedAggregateIterable = iterable.map(({ timestamp, data, endTxn }: TransactionEntry) => {
+<<<<<<< HEAD
 			const decoder = new Decoder(data.buffer, data.byteOffset, data.byteLength);
 			data.dataView = decoder;
 			// This represents the data that shouldn't be transferred for replication
@@ -311,6 +313,55 @@ export class RocksTransactionLogStore extends EventEmitter {
 			auditRecord.previousVersion = previousVersion;
 			auditRecord.structureVersion = structureVersion & 0x00ffffff;
 			return auditRecord;
+=======
+			// Per-entry try/catch: a corrupt rocks prelude (first 4-16 bytes) would otherwise
+			// throw a raw `RangeError: Offset is outside the bounds of the DataView` out
+			// through `iterable.map`, escape the for-of consumer, and land as an
+			// uncaughtException on a later tick — stalling outgoing replication at the
+			// failing offset on every catch-up attempt. On error, yield a sentinel record
+			// with the timestamp preserved so iteration advances past the bad entry;
+			// downstream consumers already skip records with no `tableId`/`type`.
+			try {
+				const decoder = new Decoder(data.buffer, data.byteOffset, data.byteLength);
+				(data as any).dataView = decoder;
+				// This represents the data that shouldn't be transferred for replication
+				let structureVersion = decoder.getUint32(0);
+				let position = 4;
+				let previousResidencyId: number;
+				let previousVersion: number;
+				if (structureVersion & HAS_PREVIOUS_RESIDENCY_ID) {
+					previousResidencyId = decoder.getUint32(position);
+					position += 4;
+				}
+				if (structureVersion & HAS_PREVIOUS_VERSION) {
+					// does previous residency id and version actually require separate flags?
+					previousVersion = decoder.getFloat64(position);
+					position += 8;
+				}
+				const auditRecord = readAuditEntry(data, position, undefined);
+				auditRecord.version = timestamp;
+				auditRecord.endTxn = endTxn;
+				auditRecord.previousResidencyId = previousResidencyId;
+				auditRecord.previousVersion = previousVersion;
+				auditRecord.structureVersion = structureVersion & 0x00ffffff;
+				return auditRecord;
+			} catch (error) {
+				harperLogger.error('Failed to decode rocks transaction log entry; skipping', error, {
+					timestamp,
+					byteLength: data?.byteLength,
+				});
+				return {
+					version: timestamp,
+					endTxn,
+					type: undefined,
+					tableId: undefined,
+					recordId: undefined,
+					getValue: () => undefined,
+					getBinaryValue: () => undefined,
+					getBinaryRecordId: () => undefined,
+				} as unknown as AuditRecord;
+			}
+>>>>>>> b84fbbd (fix: skip corrupt audit entries during iteration instead of throwing)
 		});
 		// Add methods to the mapped iterable if we have an aggregate iterator
 		if (aggregateIterator?.addLog) {

package/resources/Table.ts CHANGED Viewed

@@ -805,23 +805,23 @@ export function makeTable(options) {
 		/**
 		 * Set TTL expiration for records in this table. On retrieval, record timestamps are checked for expiration.
 		 * This also informs the scheduling for record eviction.
-		 * @param expirationTime Time in seconds until records expire (are stale)
-		 * @param evictionTime Time in seconds until records are evicted (removed)
+		 * @param opts Time in seconds until records expire, or an options object with `expiration`, `eviction`,
+		 * and `scanInterval` (all in seconds, all optional). Number form preserves any previously configured
+		 * eviction/scanInterval; object form replaces all three.
 		 */
-		static setTTLExpiration(expiration: number | { expiration: number; eviction?: number; scanInterval?: number }) {
-			// we set up a timer to remove expired entries. we only want the timer/reaper to run in one thread,
-			// so we use the first one
-			if (typeof expiration === 'number') {
-				expirationMs = expiration * 1000;
-				if (!evictionMs) evictionMs = 0; // by default, no extra time for eviction
-			} else if (expiration && typeof expiration === 'object') {
-				// an object with expiration times/options specified
-				expirationMs = expiration.expiration * 1000;
-				evictionMs = (expiration.eviction || 0) * 1000;
-				cleanupInterval = expiration.scanInterval * 1000;
-			} else throw new Error('Invalid expiration value type');
+		static setTTLExpiration(opts: number | { expiration?: number; eviction?: number; scanInterval?: number }) {
+			if (opts == null || (typeof opts !== 'number' && typeof opts !== 'object'))
+				throw new Error('Invalid expiration value type');
+			if (typeof opts === 'number') {
+				expirationMs = opts * 1000;
+			} else {
+				// `??` so an explicit 0 is treated as the user's chosen value, not as "missing"
+				expirationMs = (opts.expiration ?? 0) * 1000;
+				evictionMs = (opts.eviction ?? 0) * 1000;
+				cleanupInterval = (opts.scanInterval ?? 0) * 1000;
+			}
 			if (expirationMs < 0) throw new Error('Expiration can not be negative');
-			// default to one quarter of the total eviction time, and make sure it fits into a 32-bit signed integer
+			// default to one quarter of the total expiration+eviction window
 			cleanupInterval = cleanupInterval || (expirationMs + evictionMs) / 4;
 			scheduleCleanup();
 		}
@@ -4245,6 +4245,8 @@ export function makeTable(options) {
 									Boolean(invalidated),
 									auditRecord
 								);
+								// arm the eviction scanner, mirroring the .put() path
+								if (sourceContext.expiresAt) scheduleCleanup();
 							} else if (existingEntry) {
 								logger.trace?.(
 									`Deleting resolved record from source with id: ${id}, timestamp: ${new Date(txnTime).toISOString()}`

package/resources/auditStore.ts CHANGED Viewed

@@ -49,7 +49,15 @@ export type AuditRecord = {
 	previousNodeId?: number;
 	previousAdditionalAuditRefs?: Array<{ version: number; nodeId: number }>;
 	endTxn?: boolean;
+<<<<<<< HEAD
 	structureVersion?: number;
+=======
+	getBinaryRecordId?: any;
+<<<<<<< HEAD
+	corrupt?: boolean;
+>>>>>>> b84fbbd (fix: skip corrupt audit entries during iteration instead of throwing)
+=======
+>>>>>>> 6b6192c (test: cover lmdb keyEncoder and rocks-prelude paths; drop unused corrupt flag)
 };
 const ENTRY_HEADER = Buffer.alloc(2816); // this is sized to be large enough for the maximum key size (1976) plus large usernames. We may want to consider some limits on usernames to ensure this all fits
@@ -73,6 +81,16 @@ export const transactionKeyEncoder = {
 		if (buffer[start] === 66) {
 			const dataView =
 				buffer.dataView || (buffer.dataView = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength));
+			// Without this bounds check, a truncated key buffer escapes as RangeError up
+			// through lmdb-js's iterator and lands as an uncaughtException on a later tick,
+			// stalling outgoing replication for the affected (peer, db) pair.
+			if (start + 8 > buffer.byteLength) {
+				harperLogger.warn('Audit key buffer too short for float64 read; returning NaN sentinel', {
+					start,
+					byteLength: buffer.byteLength,
+				});
+				return NaN;
+			}
 			return dataView.getFloat64(start);
 		} else {
 			return readKey(buffer, start, end);
@@ -439,6 +457,15 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
 		const nodeId = decoder.readInt();
 		const tableId = decoder.readInt();
 		let length = decoder.readInt();
+		// A corrupt length field (e.g., a 0xff-prefixed uint32) would otherwise push
+		// decoder.position hundreds of megabytes past the buffer; the next readFloat64
+		// then throws with the bogus position in the message. Failing fast here keeps
+		// the throw inside this try/catch so we surface a sentinel instead.
+		if (length < 0 || decoder.position + length > buffer.byteLength) {
+			throw new RangeError(
+				`Audit entry recordId length ${length} exceeds remaining buffer (position ${decoder.position}, byteLength ${buffer.byteLength})`
+			);
+		}
 		const recordIdStart = decoder.position;
 		const recordIdEnd = (decoder.position += length);
 		// TODO: Once we support multiple format versions, we can conditionally read the version (and the previousResidencyId)
@@ -469,6 +496,11 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
 			}
 		}
 		length = decoder.readInt();
+		if (length < 0 || decoder.position + length > buffer.byteLength) {
+			throw new RangeError(
+				`Audit entry username length ${length} exceeds remaining buffer (position ${decoder.position}, byteLength ${buffer.byteLength})`
+			);
+		}
 		const usernameStart = decoder.position;
 		const usernameEnd = (decoder.position += length);
 		let value: any;
@@ -477,8 +509,17 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
 			tableId,
 			nodeId,
 			get recordId() {
-				// use a subarray to protect against the underlying buffer being modified
-				return readKey(buffer.subarray(0, recordIdEnd), recordIdStart, recordIdEnd);
+				// The recordId is decoded lazily and lives outside readAuditEntry's try/catch,
+				// so a corrupt recordId region would otherwise escape as an uncaught RangeError
+				// on property access. Catch and return undefined; callers already treat missing
+				// recordId as a skip-eligible entry.
+				try {
+					// use a subarray to protect against the underlying buffer being modified
+					return readKey(buffer.subarray(0, recordIdEnd), recordIdStart, recordIdEnd);
+				} catch (error) {
+					harperLogger.warn('Failed to decode audit recordId; treating as corrupt', error);
+					return undefined;
+				}
 			},
 			getBinaryRecordId() {
 				return buffer.subarray(recordIdStart, recordIdEnd);
@@ -486,9 +527,14 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
 			version,
 			previousVersion,
 			get user() {
-				return usernameEnd > usernameStart
-					? readKey(buffer.subarray(0, usernameEnd), usernameStart, usernameEnd)
-					: undefined;
+				try {
+					return usernameEnd > usernameStart
+						? readKey(buffer.subarray(0, usernameEnd), usernameStart, usernameEnd)
+						: undefined;
+				} catch (error) {
+					harperLogger.warn('Failed to decode audit username; treating as corrupt', error);
+					return undefined;
+				}
 			},
 			get encoded() {
 				return start ? buffer.subarray(start, end) : buffer;
@@ -523,10 +569,56 @@ export function readAuditEntry(buffer: Uint8Array, start = 0, end = undefined):
 		};
 	} catch (error) {
 		harperLogger.error('Reading audit entry error', error, buffer);
+<<<<<<< HEAD
 		return {};
+=======
+		return createCorruptAuditSentinel(buffer, start, end);
+>>>>>>> b84fbbd (fix: skip corrupt audit entries during iteration instead of throwing)
 	}
 }
+/**
+ * Build a structurally complete audit record for an entry that failed to decode. The fields
+ * mirror the happy-path shape so downstream consumers that access (e.g.) `getValue` or the
+ * `recordId` getter don't blow up with a `TypeError: not a function` / `undefined.is(...)`
+ * after the header decode already failed. Consumers identify these by the undefined
+ * `tableId`/`type` (the same signal lmdb has produced from this catch since before this
+ * change) and skip them — `classifyAuditEntryForReplay` calls them out as `corrupt-header`,
+ * and the dispatch loops in Table.ts / transactionBroadcast.ts filter via tableId guards.
+ */
+function createCorruptAuditSentinel(buffer: Uint8Array, start: number, end: number | undefined): AuditRecord {
+	return {
+		type: undefined,
+		tableId: undefined,
+		nodeId: undefined,
+		recordId: undefined,
+		version: undefined,
+		previousVersion: undefined,
+		user: undefined,
+		extendedType: undefined,
+		residencyId: undefined,
+		previousResidencyId: undefined,
+		expiresAt: undefined,
+		originatingOperation: undefined,
+		previousAdditionalAuditRefs: undefined,
+		get encoded() {
+			return start ? buffer.subarray(start, end) : buffer;
+		},
+		get size() {
+			return start !== undefined && end !== undefined ? end - start : buffer.byteLength;
+		},
+		getBinaryRecordId() {
+			return undefined;
+		},
+		getValue() {
+			return undefined;
+		},
+		getBinaryValue() {
+			return undefined;
+		},
+	} as any;
+}
 export class Decoder extends DataView<ArrayBufferLike> {
 	position = 0;
 	readInt() {

package/resources/databases.ts CHANGED Viewed

@@ -1063,6 +1063,7 @@ export function table<TableResourceType>(tableDefinition: TableDefinition): Tabl
 				const dbi = openIndex(dbiKey, rootStore, attribute);
 				if (
 					changed ||
+					attributeDescriptor.indexingFailed ||
 					(attributeDescriptor.indexingPID && attributeDescriptor.indexingPID !== process.pid) ||
 					attributeDescriptor.restartNumber < workerData?.restartNumber
 				) {
@@ -1071,6 +1072,7 @@ export function table<TableResourceType>(tableDefinition: TableDefinition): Tabl
 					attributeDescriptor = attributesDbi.getSync(dbiKey);
 					if (
 						changed ||
+						attributeDescriptor.indexingFailed ||
 						(attributeDescriptor.indexingPID && attributeDescriptor.indexingPID !== process.pid) ||
 						attributeDescriptor.restartNumber < workerData?.restartNumber
 					) {
@@ -1084,14 +1086,20 @@ export function table<TableResourceType>(tableDefinition: TableDefinition): Tabl
 						if (hasExistingData) {
 							attribute.lastIndexedKey = attributeDescriptor?.lastIndexedKey ?? undefined;
 							attribute.indexingPID = process.pid;
+							delete attribute.indexingFailed; // clear failure flag for the new run
 							dbi.isIndexing = true;
-							Object.defineProperty(attribute, 'dbi', { value: dbi });
+							Object.defineProperty(attribute, 'dbi', { value: dbi, configurable: true, enumerable: false });
 							// we only set indexing nulls to true if new or reindexing, we can't have partial indexing of null
 							attributesToIndex.push(attribute);
 						}
 					}
 					attributesDbi.put(dbiKey, attribute);
 				}
+				// If a migration is in progress (indexingPID set), any newly opened dbi must also
+				// reflect isIndexing = true. A resetDatabases() during an active runIndexing creates
+				// a new dbi object; without this, queries could use the new dbi (isIndexing = false)
+				// and return incomplete results while the backfill is still running.
+				if (attributeDescriptor?.indexingPID) dbi.isIndexing = true;
 				if (attributeDescriptor?.indexNulls && attribute.indexNulls === undefined) attribute.indexNulls = true;
 				dbi.indexNulls = attribute.indexNulls;
 				indices[attribute.name] = dbi;
@@ -1162,6 +1170,7 @@ async function runIndexing(Table, attributes, indicesToRemove) {
 			lastResolution = index.drop();
 		}
 		let interrupted;
+		let hadIndexingErrors = false;
 		const attributeErrorReported = {};
 		let indexed = 0;
 		const attributesLength = attributes.length;
@@ -1215,6 +1224,7 @@ async function runIndexing(Table, attributes, indicesToRemove) {
 							}
 						}
 					} catch (error) {
+						hadIndexingErrors = true;
 						if (!attributeErrorReported[property]) {
 							// just report an indexing error once per attribute so we don't spam the logs
 							attributeErrorReported[property] = true;
@@ -1227,6 +1237,7 @@ async function runIndexing(Table, attributes, indicesToRemove) {
 					() => outstanding--,
 					(error) => {
 						outstanding--;
+						hadIndexingErrors = true;
 						logger.error(error);
 					}
 				);
@@ -1244,20 +1255,69 @@ async function runIndexing(Table, attributes, indicesToRemove) {
 				if (outstanding > MAX_OUTSTANDING_INDEXING) await lastResolution;
 				else if (outstanding > MIN_OUTSTANDING_INDEXING) await new Promise((resolve) => setImmediate(resolve)); // yield event turn, don't want to use all computation
 			}
+		}
+		// Await the last pending put. If it rejects, that is also an indexing error.
+		// Note: the when() calls above already attach rejection handlers to each record's
+		// last-put promise; this try-catch specifically handles the case where lastResolution
+		// itself rejects (i.e. the very last put in the loop failed) which would otherwise
+		// throw past the hadIndexingErrors check to the outer catch. The broader issue of
+		// unhandled rejections from non-last puts in multi-value attributes is pre-existing
+		// and out of scope for this fix.
+		try {
+			await lastResolution;
+		} catch (error) {
+			hadIndexingErrors = true;
+			logger.error(error);
+		}
+		// Yield one more event turn so any queued when() error callbacks (which fire as
+		// microtasks when their tracked promise settles) have a chance to set hadIndexingErrors
+		// before we decide whether to mark indexing as complete.
+		await new Promise((resolve) => setImmediate(resolve));
+		if (hadIndexingErrors) {
+			// Some records failed to index. Persist the failure marker in the descriptor so
+			// the next call to table() (including after a restart with a fresh PID) re-triggers
+			// the backfill from the last checkpoint. Do NOT clear indexingPID or isIndexing —
+			// leave the index in its incomplete state so queries return 503 "not indexed yet"
+			// rather than silently returning partial results. This is the key fix for the
+			// serent-canopy issue #135 fingerprint: a completed migration with transient errors
+			// (e.g. ERR_BUSY from RocksDB under load) leaving gaps while appearing successful.
+			for (const attribute of attributes) {
+				attribute.indexingFailed = true;
+				// Preserve lastIndexedKey so the retry resumes from the last checkpoint.
+				lastResolution = Table.dbisDB.put(attribute.key, attribute);
+				// Keep isIndexing = true on both the attribute.dbi and the currently-active dbi
+				// in Table.indices (which may differ if resetDatabases() ran during this pass).
+				attribute.dbi.isIndexing = true;
+				const activeDbi = Table.indices[attribute.name];
+				if (activeDbi) activeDbi.isIndexing = true;
+			}
+			await lastResolution;
+			logger.warn(
+				`Indexing of ${Table.tableName} encountered errors on some records - index will remain incomplete. ` +
+					`On next restart the migration will be retried from the last checkpoint (indexingFailed=true). ` +
+					`Affected attributes: ${attributes.map((a) => a.name).join(', ')}`
+			);
+		} else {
 			// update the attributes to indicate that we are finished
 			for (const attribute of attributes) {
 				delete attribute.lastIndexedKey;
 				delete attribute.indexingPID;
+				delete attribute.indexingFailed;
 				attribute.dbi.isIndexing = false;
+				// Also clear isIndexing on the currently-active dbi in Table.indices, which may
+				// differ from attribute.dbi if a resetDatabases() call during this migration
+				// opened a new dbi and registered it there.
+				const activeDbi = Table.indices[attribute.name];
+				if (activeDbi) activeDbi.isIndexing = false;
 				lastResolution = Table.dbisDB.put(attribute.key, attribute);
 			}
+			await lastResolution;
+			// now notify all the threads that we are done and the index is ready to use
+			await signalling.signalSchemaChange(
+				new SchemaEventMsg(process.pid, 'indexing-finished', Table.databaseName, Table.tableName)
+			);
+			logger.info(`Finished indexing ${Table.tableName} attributes`, attributes);
 		}
-		await lastResolution;
-		// now notify all the threads that we are done and the index is ready to use
-		await signalling.signalSchemaChange(
-			new SchemaEventMsg(process.pid, 'indexing-finished', Table.databaseName, Table.tableName)
-		);
-		logger.info(`Finished indexing ${Table.tableName} attributes`, attributes);
 	} catch (error) {
 		logger.error('Error in indexing', error);
 	}