@equationalapplications/core-llm-wiki 3.1.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -302,6 +302,39 @@ wikiMemory.clearVectorCache();
302
302
 
303
303
  The cache is also automatically invalidated on any mutation (`runLibrarian`, `runHeal`, `runPrune`, `runReembed`, `ingestDocument`, `importDump`, `forget`).
304
304
 
305
+ ## Security
306
+
307
+ `@equationalapplications/core-llm-wiki` enforces multiple security layers:
308
+
309
+ ### VectorRanker Adapter Security
310
+
311
+ If implementing a custom `VectorRanker`:
312
+
313
+ - **SQL Injection**: ALWAYS use parameterized queries for `entityId`, `factId`, `candidateIds`. Never concatenate into SQL strings.
314
+ - **Entity Isolation**: Filter by `entityId` in all queries to prevent cross-tenant data leaks.
315
+ - **Credential Scrubbing**: Strip API keys, tokens, connection strings from thrown errors before surfacing to host.
316
+ - **Resource Limits**: Cap `limit` and `candidateIds.length` to prevent DoS. Do NOT retain `vector` references beyond callback scope — blocks GC.
317
+
318
+ See [SECURITY.md](../../SECURITY.md) for complete adapter security guidance and code examples.
319
+
320
+ ### Host Application Security
321
+
322
+ When using `VectorRanker`:
323
+
324
+ - **Error Sanitization**: `sanitizeRankerErrors: true` (default) scrubs ranker errors before mirroring via `error.cause`.
325
+ - **Fallback Policy**: Choose `vectorRankerFallback` based on availability vs consistency requirements:
326
+ - `'js-cosine'` (default): Best availability
327
+ - `'keyword'`: Fast fallback without semantic ranking
328
+ - `'empty'`: Strict consistency (no facts on failure)
329
+ - `'throw'`: Fail-fast error propagation
330
+ - **Deletion Hook Contract**: `forget()` / `runPrune()` reject on hook timeout/failure. Prevents GDPR violations (deleted vectors still retrievable). Handle failures with retry or queue for reconciliation.
331
+ - **Timeout Tuning**: Set `deletionHookTimeoutMs` per deployment (default 30s). Interactive UX: 5s. Background jobs: 60s.
332
+
333
+ Core WikiMemory provides:
334
+ - **Defensive Copies**: Query/embedding vectors copied before ranker/hook calls
335
+ - **Input Validation**: `sourceRef`/`sourceHash` normalized; embedding dimensions validated
336
+ - **Parameterized Queries**: All SQL uses bind parameters
337
+
305
338
  ## Usage
306
339
 
307
340
  ```typescript
package/dist/index.d.mts CHANGED
@@ -60,7 +60,15 @@ interface WikiFact {
60
60
  body: string;
61
61
  tags: string[];
62
62
  confidence: 'certain' | 'inferred' | 'tentative';
63
- source_type: 'user_stated' | 'agent_inferred' | 'user_confirmed' | 'user_document';
63
+ /**
64
+ * Source type of this fact.
65
+ * - 'immutable_document': From ingestDocument(), cannot be modified by system (librarian/heal).
66
+ * Only removable via forget() or replaced via re-ingest.
67
+ * - 'librarian_inferred': Created by runLibrarian() from events, or by runHeal() when synthesizing new inferred facts.
68
+ * - 'user_stated': Direct user statement.
69
+ * - 'user_confirmed': User-confirmed fact.
70
+ */
71
+ source_type: 'user_stated' | 'librarian_inferred' | 'user_confirmed' | 'immutable_document';
64
72
  source_hash: string | null;
65
73
  source_ref: string | null;
66
74
  created_at: number;
@@ -145,6 +153,11 @@ interface VectorRankerSemanticResult {
145
153
  */
146
154
  interface VectorRankerRankArgs {
147
155
  entityId: string;
156
+ /**
157
+ * Query embedding. Treat as readonly — core provides a defensive copy,
158
+ * but adapters MUST NOT mutate this array. Mutation can corrupt
159
+ * WikiMemory's internal vector cache and JS-cosine fallback path.
160
+ */
148
161
  queryVec: Float32Array | number[];
149
162
  /**
150
163
  * When set (MiniSearch pre-filter path): ranker MUST only produce results for ids in this set.
@@ -172,6 +185,13 @@ interface VectorRanker {
172
185
  /**
173
186
  * Called after a fact's embedding is successfully persisted to embedding_blob (or cleared).
174
187
  * Hosts use this to keep sqlite-vec / external indexes consistent with SQLite as source of truth.
188
+ *
189
+ * On deletion paths (forget, prune, hard-delete), core awaits this hook to ensure ANN cleanup
190
+ * completes before the deletion call resolves (GDPR compliance). Hook failures or timeouts on
191
+ * those paths reject the deletion call.
192
+ *
193
+ * Treat `vector` as readonly — core provides a defensive copy, but adapters MUST NOT mutate.
194
+ *
175
195
  * Optional: if omitted, hosts MUST document "index rebuilt separately" and accept stale ANN until rebuild.
176
196
  */
177
197
  onEmbeddingPersisted?(event: {
@@ -226,6 +246,29 @@ interface WikiOptions {
226
246
  * Ignored when vectorRankerFallback is 'throw'. Default false.
227
247
  */
228
248
  propagateRankerFailureToRetrievalFallback?: boolean;
249
+ /**
250
+ * When true (default), sanitize ranker errors before exposing via error.cause
251
+ * to prevent credential leakage in host telemetry. Disable only when you
252
+ * control the ranker implementation.
253
+ *
254
+ * Sanitization replaces error message/stack with a generic message preserving
255
+ * only the error type (constructor name).
256
+ */
257
+ sanitizeRankerErrors?: boolean;
258
+ /**
259
+ * Timeout (ms) for onEmbeddingPersisted hook on GDPR deletion paths
260
+ * (forget, _doPrune). Hook must complete within this window or the
261
+ * deletion operation rejects. Default 30000.
262
+ * Lower for interactive deletes; raise for slow remote ANN backends.
263
+ */
264
+ deletionHookTimeoutMs?: number;
265
+ /**
266
+ * Escape hatch: skip onEmbeddingPersisted on deletion paths entirely.
267
+ * Use ONLY when the ANN backend is permanently decommissioned. Vectors
268
+ * orphaned in the (unreachable) external index are accepted as a tradeoff.
269
+ * NOT GDPR-safe for live indexes. Default false.
270
+ */
271
+ forceDeleteIgnoreRankerHook?: boolean;
229
272
  }
230
273
  interface MemoryBundle {
231
274
  facts: WikiFact[];
@@ -278,6 +321,15 @@ declare class WikiBusyError extends Error {
278
321
  readonly entityId: string;
279
322
  constructor(operation: WikiBusyOperation, entityId: string);
280
323
  }
324
+ declare class PrunePartialFailureError extends Error {
325
+ readonly deleted: number;
326
+ readonly failedAt: string;
327
+ readonly remaining: number;
328
+ readonly deletedTasks: number;
329
+ readonly deletedEvents: number;
330
+ readonly cause: Error;
331
+ constructor(deleted: number, failedAt: string, remaining: number, cause: Error, deletedTasks?: number, deletedEvents?: number);
332
+ }
281
333
 
282
334
  declare class WikiMemory {
283
335
  private db;
@@ -314,7 +366,16 @@ declare class WikiMemory {
314
366
  private _librarianKey;
315
367
  private _healKey;
316
368
  private _warnCrossEntityCollision;
369
+ /** Maps pre-rename enum strings from older dumps to current source_type values. */
370
+ private _normalizeImportedSourceType;
371
+ private assertNoLegacySourceTypes;
317
372
  private _notifyEmbeddingPersisted;
373
+ /**
374
+ * GDPR-critical variant: awaits the hook with a timeout and rethrows failures.
375
+ * Use ONLY on deletion paths. forget() calls after soft-delete UPDATE; runPrune()
376
+ * calls before hard DELETE. For best-effort sync, use _notifyEmbeddingPersisted.
377
+ */
378
+ private _notifyEmbeddingPersistedOrThrow;
318
379
  constructor(db: SQLiteAdapter, options: WikiOptions);
319
380
  setup(): Promise<void>;
320
381
  hasChanged(entityId: string, sourceRef: string, sourceHash: string): Promise<boolean>;
@@ -351,6 +412,12 @@ declare class WikiMemory {
351
412
  * Negative return means "a ranks ahead of b" for descending score order.
352
413
  */
353
414
  private _compareScoredRows;
415
+ /**
416
+ * Strip potentially sensitive data from ranker errors before exposing to host callbacks.
417
+ * Preserves error type for debugging but removes message/stack that may contain credentials.
418
+ * Recursively sanitizes one level of .cause; deeper chains collapse to type only.
419
+ */
420
+ private _sanitizeRankerError;
354
421
  /**
355
422
  * Score candidate rows using in-process JS cosine similarity.
356
423
  * Applies hybrid blending (if weight set) and tie-break sorting before returning.
@@ -414,6 +481,8 @@ declare function formatContext(bundle: MemoryBundle, options?: FormatContextOpti
414
481
 
415
482
  declare function formatMemoryDump(dump: MemoryDump): FormattedMemoryDump;
416
483
 
484
+ declare function parseEmbedding(blob: Uint8Array | null | undefined, text: string | null | undefined): Float32Array | null;
485
+
417
486
  declare function createWiki(db: SQLiteAdapter, options: WikiOptions): WikiMemory;
418
487
 
419
- export { type EntityStatus, type ExtractedFact, type ExtractedTask, type FormatContextOptions, type FormattedMemoryDump, type LLMProvider, type MemoryBundle, type MemoryDump, type ReadOptions, type SQLiteAdapter, type VectorRanker, type VectorRankerFallback, type VectorRankerRankArgs, type VectorRankerSemanticResult, WikiBusyError, type WikiBusyOperation, type WikiCheckpoint, type WikiConfig, type WikiEvent, type WikiFact, WikiMemory, type WikiOptions, type WikiTask, createWiki, formatContext, formatMemoryDump };
488
+ export { type EntityStatus, type ExtractedFact, type ExtractedTask, type FormatContextOptions, type FormattedMemoryDump, type LLMProvider, type MemoryBundle, type MemoryDump, PrunePartialFailureError, type ReadOptions, type SQLiteAdapter, type VectorRanker, type VectorRankerFallback, type VectorRankerRankArgs, type VectorRankerSemanticResult, WikiBusyError, type WikiBusyOperation, type WikiCheckpoint, type WikiConfig, type WikiEvent, type WikiFact, WikiMemory, type WikiOptions, type WikiTask, createWiki, formatContext, formatMemoryDump, parseEmbedding };
package/dist/index.d.ts CHANGED
@@ -60,7 +60,15 @@ interface WikiFact {
60
60
  body: string;
61
61
  tags: string[];
62
62
  confidence: 'certain' | 'inferred' | 'tentative';
63
- source_type: 'user_stated' | 'agent_inferred' | 'user_confirmed' | 'user_document';
63
+ /**
64
+ * Source type of this fact.
65
+ * - 'immutable_document': From ingestDocument(), cannot be modified by system (librarian/heal).
66
+ * Only removable via forget() or replaced via re-ingest.
67
+ * - 'librarian_inferred': Created by runLibrarian() from events, or by runHeal() when synthesizing new inferred facts.
68
+ * - 'user_stated': Direct user statement.
69
+ * - 'user_confirmed': User-confirmed fact.
70
+ */
71
+ source_type: 'user_stated' | 'librarian_inferred' | 'user_confirmed' | 'immutable_document';
64
72
  source_hash: string | null;
65
73
  source_ref: string | null;
66
74
  created_at: number;
@@ -145,6 +153,11 @@ interface VectorRankerSemanticResult {
145
153
  */
146
154
  interface VectorRankerRankArgs {
147
155
  entityId: string;
156
+ /**
157
+ * Query embedding. Treat as readonly — core provides a defensive copy,
158
+ * but adapters MUST NOT mutate this array. Mutation can corrupt
159
+ * WikiMemory's internal vector cache and JS-cosine fallback path.
160
+ */
148
161
  queryVec: Float32Array | number[];
149
162
  /**
150
163
  * When set (MiniSearch pre-filter path): ranker MUST only produce results for ids in this set.
@@ -172,6 +185,13 @@ interface VectorRanker {
172
185
  /**
173
186
  * Called after a fact's embedding is successfully persisted to embedding_blob (or cleared).
174
187
  * Hosts use this to keep sqlite-vec / external indexes consistent with SQLite as source of truth.
188
+ *
189
+ * On deletion paths (forget, prune, hard-delete), core awaits this hook to ensure ANN cleanup
190
+ * completes before the deletion call resolves (GDPR compliance). Hook failures or timeouts on
191
+ * those paths reject the deletion call.
192
+ *
193
+ * Treat `vector` as readonly — core provides a defensive copy, but adapters MUST NOT mutate.
194
+ *
175
195
  * Optional: if omitted, hosts MUST document "index rebuilt separately" and accept stale ANN until rebuild.
176
196
  */
177
197
  onEmbeddingPersisted?(event: {
@@ -226,6 +246,29 @@ interface WikiOptions {
226
246
  * Ignored when vectorRankerFallback is 'throw'. Default false.
227
247
  */
228
248
  propagateRankerFailureToRetrievalFallback?: boolean;
249
+ /**
250
+ * When true (default), sanitize ranker errors before exposing via error.cause
251
+ * to prevent credential leakage in host telemetry. Disable only when you
252
+ * control the ranker implementation.
253
+ *
254
+ * Sanitization replaces error message/stack with a generic message preserving
255
+ * only the error type (constructor name).
256
+ */
257
+ sanitizeRankerErrors?: boolean;
258
+ /**
259
+ * Timeout (ms) for onEmbeddingPersisted hook on GDPR deletion paths
260
+ * (forget, _doPrune). Hook must complete within this window or the
261
+ * deletion operation rejects. Default 30000.
262
+ * Lower for interactive deletes; raise for slow remote ANN backends.
263
+ */
264
+ deletionHookTimeoutMs?: number;
265
+ /**
266
+ * Escape hatch: skip onEmbeddingPersisted on deletion paths entirely.
267
+ * Use ONLY when the ANN backend is permanently decommissioned. Vectors
268
+ * orphaned in the (unreachable) external index are accepted as a tradeoff.
269
+ * NOT GDPR-safe for live indexes. Default false.
270
+ */
271
+ forceDeleteIgnoreRankerHook?: boolean;
229
272
  }
230
273
  interface MemoryBundle {
231
274
  facts: WikiFact[];
@@ -278,6 +321,15 @@ declare class WikiBusyError extends Error {
278
321
  readonly entityId: string;
279
322
  constructor(operation: WikiBusyOperation, entityId: string);
280
323
  }
324
+ declare class PrunePartialFailureError extends Error {
325
+ readonly deleted: number;
326
+ readonly failedAt: string;
327
+ readonly remaining: number;
328
+ readonly deletedTasks: number;
329
+ readonly deletedEvents: number;
330
+ readonly cause: Error;
331
+ constructor(deleted: number, failedAt: string, remaining: number, cause: Error, deletedTasks?: number, deletedEvents?: number);
332
+ }
281
333
 
282
334
  declare class WikiMemory {
283
335
  private db;
@@ -314,7 +366,16 @@ declare class WikiMemory {
314
366
  private _librarianKey;
315
367
  private _healKey;
316
368
  private _warnCrossEntityCollision;
369
+ /** Maps pre-rename enum strings from older dumps to current source_type values. */
370
+ private _normalizeImportedSourceType;
371
+ private assertNoLegacySourceTypes;
317
372
  private _notifyEmbeddingPersisted;
373
+ /**
374
+ * GDPR-critical variant: awaits the hook with a timeout and rethrows failures.
375
+ * Use ONLY on deletion paths. forget() calls after soft-delete UPDATE; runPrune()
376
+ * calls before hard DELETE. For best-effort sync, use _notifyEmbeddingPersisted.
377
+ */
378
+ private _notifyEmbeddingPersistedOrThrow;
318
379
  constructor(db: SQLiteAdapter, options: WikiOptions);
319
380
  setup(): Promise<void>;
320
381
  hasChanged(entityId: string, sourceRef: string, sourceHash: string): Promise<boolean>;
@@ -351,6 +412,12 @@ declare class WikiMemory {
351
412
  * Negative return means "a ranks ahead of b" for descending score order.
352
413
  */
353
414
  private _compareScoredRows;
415
+ /**
416
+ * Strip potentially sensitive data from ranker errors before exposing to host callbacks.
417
+ * Preserves error type for debugging but removes message/stack that may contain credentials.
418
+ * Recursively sanitizes one level of .cause; deeper chains collapse to type only.
419
+ */
420
+ private _sanitizeRankerError;
354
421
  /**
355
422
  * Score candidate rows using in-process JS cosine similarity.
356
423
  * Applies hybrid blending (if weight set) and tie-break sorting before returning.
@@ -414,6 +481,8 @@ declare function formatContext(bundle: MemoryBundle, options?: FormatContextOpti
414
481
 
415
482
  declare function formatMemoryDump(dump: MemoryDump): FormattedMemoryDump;
416
483
 
484
+ declare function parseEmbedding(blob: Uint8Array | null | undefined, text: string | null | undefined): Float32Array | null;
485
+
417
486
  declare function createWiki(db: SQLiteAdapter, options: WikiOptions): WikiMemory;
418
487
 
419
- export { type EntityStatus, type ExtractedFact, type ExtractedTask, type FormatContextOptions, type FormattedMemoryDump, type LLMProvider, type MemoryBundle, type MemoryDump, type ReadOptions, type SQLiteAdapter, type VectorRanker, type VectorRankerFallback, type VectorRankerRankArgs, type VectorRankerSemanticResult, WikiBusyError, type WikiBusyOperation, type WikiCheckpoint, type WikiConfig, type WikiEvent, type WikiFact, WikiMemory, type WikiOptions, type WikiTask, createWiki, formatContext, formatMemoryDump };
488
+ export { type EntityStatus, type ExtractedFact, type ExtractedTask, type FormatContextOptions, type FormattedMemoryDump, type LLMProvider, type MemoryBundle, type MemoryDump, PrunePartialFailureError, type ReadOptions, type SQLiteAdapter, type VectorRanker, type VectorRankerFallback, type VectorRankerRankArgs, type VectorRankerSemanticResult, WikiBusyError, type WikiBusyOperation, type WikiCheckpoint, type WikiConfig, type WikiEvent, type WikiFact, WikiMemory, type WikiOptions, type WikiTask, createWiki, formatContext, formatMemoryDump, parseEmbedding };