@equationalapplications/core-llm-wiki 3.1.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -0
- package/dist/index.d.mts +71 -2
- package/dist/index.d.ts +71 -2
- package/dist/index.js +240 -50
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +239 -51
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -302,6 +302,39 @@ wikiMemory.clearVectorCache();
|
|
|
302
302
|
|
|
303
303
|
The cache is also automatically invalidated on any mutation (`runLibrarian`, `runHeal`, `runPrune`, `runReembed`, `ingestDocument`, `importDump`, `forget`).
|
|
304
304
|
|
|
305
|
+
## Security
|
|
306
|
+
|
|
307
|
+
`@equationalapplications/core-llm-wiki` enforces multiple security layers:
|
|
308
|
+
|
|
309
|
+
### VectorRanker Adapter Security
|
|
310
|
+
|
|
311
|
+
If implementing a custom `VectorRanker`:
|
|
312
|
+
|
|
313
|
+
- **SQL Injection**: ALWAYS use parameterized queries for `entityId`, `factId`, `candidateIds`. Never concatenate into SQL strings.
|
|
314
|
+
- **Entity Isolation**: Filter by `entityId` in all queries to prevent cross-tenant data leaks.
|
|
315
|
+
- **Credential Scrubbing**: Strip API keys, tokens, connection strings from thrown errors before surfacing to host.
|
|
316
|
+
- **Resource Limits**: Cap `limit` and `candidateIds.length` to prevent DoS. Do NOT retain `vector` references beyond callback scope — blocks GC.
|
|
317
|
+
|
|
318
|
+
See [SECURITY.md](../../SECURITY.md) for complete adapter security guidance and code examples.
|
|
319
|
+
|
|
320
|
+
### Host Application Security
|
|
321
|
+
|
|
322
|
+
When using `VectorRanker`:
|
|
323
|
+
|
|
324
|
+
- **Error Sanitization**: `sanitizeRankerErrors: true` (default) scrubs ranker errors before mirroring via `error.cause`.
|
|
325
|
+
- **Fallback Policy**: Choose `vectorRankerFallback` based on availability vs consistency requirements:
|
|
326
|
+
- `'js-cosine'` (default): Best availability
|
|
327
|
+
- `'keyword'`: Fast fallback without semantic ranking
|
|
328
|
+
- `'empty'`: Strict consistency (no facts on failure)
|
|
329
|
+
- `'throw'`: Fail-fast error propagation
|
|
330
|
+
- **Deletion Hook Contract**: `forget()` / `runPrune()` reject on hook timeout/failure. Prevents GDPR violations (deleted vectors still retrievable). Handle failures with retry or queue for reconciliation.
|
|
331
|
+
- **Timeout Tuning**: Set `deletionHookTimeoutMs` per deployment (default 30s). Interactive UX: 5s. Background jobs: 60s.
|
|
332
|
+
|
|
333
|
+
Core WikiMemory provides:
|
|
334
|
+
- **Defensive Copies**: Query/embedding vectors copied before ranker/hook calls
|
|
335
|
+
- **Input Validation**: `sourceRef`/`sourceHash` normalized; embedding dimensions validated
|
|
336
|
+
- **Parameterized Queries**: All SQL uses bind parameters
|
|
337
|
+
|
|
305
338
|
## Usage
|
|
306
339
|
|
|
307
340
|
```typescript
|
package/dist/index.d.mts
CHANGED
|
@@ -60,7 +60,15 @@ interface WikiFact {
|
|
|
60
60
|
body: string;
|
|
61
61
|
tags: string[];
|
|
62
62
|
confidence: 'certain' | 'inferred' | 'tentative';
|
|
63
|
-
|
|
63
|
+
/**
|
|
64
|
+
* Source type of this fact.
|
|
65
|
+
* - 'immutable_document': From ingestDocument(), cannot be modified by system (librarian/heal).
|
|
66
|
+
* Only removable via forget() or replaced via re-ingest.
|
|
67
|
+
* - 'librarian_inferred': Created by runLibrarian() from events, or by runHeal() when synthesizing new inferred facts.
|
|
68
|
+
* - 'user_stated': Direct user statement.
|
|
69
|
+
* - 'user_confirmed': User-confirmed fact.
|
|
70
|
+
*/
|
|
71
|
+
source_type: 'user_stated' | 'librarian_inferred' | 'user_confirmed' | 'immutable_document';
|
|
64
72
|
source_hash: string | null;
|
|
65
73
|
source_ref: string | null;
|
|
66
74
|
created_at: number;
|
|
@@ -145,6 +153,11 @@ interface VectorRankerSemanticResult {
|
|
|
145
153
|
*/
|
|
146
154
|
interface VectorRankerRankArgs {
|
|
147
155
|
entityId: string;
|
|
156
|
+
/**
|
|
157
|
+
* Query embedding. Treat as readonly — core provides a defensive copy,
|
|
158
|
+
* but adapters MUST NOT mutate this array. Mutation can corrupt
|
|
159
|
+
* WikiMemory's internal vector cache and JS-cosine fallback path.
|
|
160
|
+
*/
|
|
148
161
|
queryVec: Float32Array | number[];
|
|
149
162
|
/**
|
|
150
163
|
* When set (MiniSearch pre-filter path): ranker MUST only produce results for ids in this set.
|
|
@@ -172,6 +185,13 @@ interface VectorRanker {
|
|
|
172
185
|
/**
|
|
173
186
|
* Called after a fact's embedding is successfully persisted to embedding_blob (or cleared).
|
|
174
187
|
* Hosts use this to keep sqlite-vec / external indexes consistent with SQLite as source of truth.
|
|
188
|
+
*
|
|
189
|
+
* On deletion paths (forget, prune, hard-delete), core awaits this hook to ensure ANN cleanup
|
|
190
|
+
* completes before the deletion call resolves (GDPR compliance). Hook failures or timeouts on
|
|
191
|
+
* those paths reject the deletion call.
|
|
192
|
+
*
|
|
193
|
+
* Treat `vector` as readonly — core provides a defensive copy, but adapters MUST NOT mutate.
|
|
194
|
+
*
|
|
175
195
|
* Optional: if omitted, hosts MUST document "index rebuilt separately" and accept stale ANN until rebuild.
|
|
176
196
|
*/
|
|
177
197
|
onEmbeddingPersisted?(event: {
|
|
@@ -226,6 +246,29 @@ interface WikiOptions {
|
|
|
226
246
|
* Ignored when vectorRankerFallback is 'throw'. Default false.
|
|
227
247
|
*/
|
|
228
248
|
propagateRankerFailureToRetrievalFallback?: boolean;
|
|
249
|
+
/**
|
|
250
|
+
* When true (default), sanitize ranker errors before exposing via error.cause
|
|
251
|
+
* to prevent credential leakage in host telemetry. Disable only when you
|
|
252
|
+
* control the ranker implementation.
|
|
253
|
+
*
|
|
254
|
+
* Sanitization replaces error message/stack with a generic message preserving
|
|
255
|
+
* only the error type (constructor name).
|
|
256
|
+
*/
|
|
257
|
+
sanitizeRankerErrors?: boolean;
|
|
258
|
+
/**
|
|
259
|
+
* Timeout (ms) for onEmbeddingPersisted hook on GDPR deletion paths
|
|
260
|
+
* (forget, _doPrune). Hook must complete within this window or the
|
|
261
|
+
* deletion operation rejects. Default 30000.
|
|
262
|
+
* Lower for interactive deletes; raise for slow remote ANN backends.
|
|
263
|
+
*/
|
|
264
|
+
deletionHookTimeoutMs?: number;
|
|
265
|
+
/**
|
|
266
|
+
* Escape hatch: skip onEmbeddingPersisted on deletion paths entirely.
|
|
267
|
+
* Use ONLY when the ANN backend is permanently decommissioned. Vectors
|
|
268
|
+
* orphaned in the (unreachable) external index are accepted as a tradeoff.
|
|
269
|
+
* NOT GDPR-safe for live indexes. Default false.
|
|
270
|
+
*/
|
|
271
|
+
forceDeleteIgnoreRankerHook?: boolean;
|
|
229
272
|
}
|
|
230
273
|
interface MemoryBundle {
|
|
231
274
|
facts: WikiFact[];
|
|
@@ -278,6 +321,15 @@ declare class WikiBusyError extends Error {
|
|
|
278
321
|
readonly entityId: string;
|
|
279
322
|
constructor(operation: WikiBusyOperation, entityId: string);
|
|
280
323
|
}
|
|
324
|
+
declare class PrunePartialFailureError extends Error {
|
|
325
|
+
readonly deleted: number;
|
|
326
|
+
readonly failedAt: string;
|
|
327
|
+
readonly remaining: number;
|
|
328
|
+
readonly deletedTasks: number;
|
|
329
|
+
readonly deletedEvents: number;
|
|
330
|
+
readonly cause: Error;
|
|
331
|
+
constructor(deleted: number, failedAt: string, remaining: number, cause: Error, deletedTasks?: number, deletedEvents?: number);
|
|
332
|
+
}
|
|
281
333
|
|
|
282
334
|
declare class WikiMemory {
|
|
283
335
|
private db;
|
|
@@ -314,7 +366,16 @@ declare class WikiMemory {
|
|
|
314
366
|
private _librarianKey;
|
|
315
367
|
private _healKey;
|
|
316
368
|
private _warnCrossEntityCollision;
|
|
369
|
+
/** Maps pre-rename enum strings from older dumps to current source_type values. */
|
|
370
|
+
private _normalizeImportedSourceType;
|
|
371
|
+
private assertNoLegacySourceTypes;
|
|
317
372
|
private _notifyEmbeddingPersisted;
|
|
373
|
+
/**
|
|
374
|
+
* GDPR-critical variant: awaits the hook with a timeout and rethrows failures.
|
|
375
|
+
* Use ONLY on deletion paths. forget() calls after soft-delete UPDATE; runPrune()
|
|
376
|
+
* calls before hard DELETE. For best-effort sync, use _notifyEmbeddingPersisted.
|
|
377
|
+
*/
|
|
378
|
+
private _notifyEmbeddingPersistedOrThrow;
|
|
318
379
|
constructor(db: SQLiteAdapter, options: WikiOptions);
|
|
319
380
|
setup(): Promise<void>;
|
|
320
381
|
hasChanged(entityId: string, sourceRef: string, sourceHash: string): Promise<boolean>;
|
|
@@ -351,6 +412,12 @@ declare class WikiMemory {
|
|
|
351
412
|
* Negative return means "a ranks ahead of b" for descending score order.
|
|
352
413
|
*/
|
|
353
414
|
private _compareScoredRows;
|
|
415
|
+
/**
|
|
416
|
+
* Strip potentially sensitive data from ranker errors before exposing to host callbacks.
|
|
417
|
+
* Preserves error type for debugging but removes message/stack that may contain credentials.
|
|
418
|
+
* Recursively sanitizes one level of .cause; deeper chains collapse to type only.
|
|
419
|
+
*/
|
|
420
|
+
private _sanitizeRankerError;
|
|
354
421
|
/**
|
|
355
422
|
* Score candidate rows using in-process JS cosine similarity.
|
|
356
423
|
* Applies hybrid blending (if weight set) and tie-break sorting before returning.
|
|
@@ -414,6 +481,8 @@ declare function formatContext(bundle: MemoryBundle, options?: FormatContextOpti
|
|
|
414
481
|
|
|
415
482
|
declare function formatMemoryDump(dump: MemoryDump): FormattedMemoryDump;
|
|
416
483
|
|
|
484
|
+
declare function parseEmbedding(blob: Uint8Array | null | undefined, text: string | null | undefined): Float32Array | null;
|
|
485
|
+
|
|
417
486
|
declare function createWiki(db: SQLiteAdapter, options: WikiOptions): WikiMemory;
|
|
418
487
|
|
|
419
|
-
export { type EntityStatus, type ExtractedFact, type ExtractedTask, type FormatContextOptions, type FormattedMemoryDump, type LLMProvider, type MemoryBundle, type MemoryDump, type ReadOptions, type SQLiteAdapter, type VectorRanker, type VectorRankerFallback, type VectorRankerRankArgs, type VectorRankerSemanticResult, WikiBusyError, type WikiBusyOperation, type WikiCheckpoint, type WikiConfig, type WikiEvent, type WikiFact, WikiMemory, type WikiOptions, type WikiTask, createWiki, formatContext, formatMemoryDump };
|
|
488
|
+
export { type EntityStatus, type ExtractedFact, type ExtractedTask, type FormatContextOptions, type FormattedMemoryDump, type LLMProvider, type MemoryBundle, type MemoryDump, PrunePartialFailureError, type ReadOptions, type SQLiteAdapter, type VectorRanker, type VectorRankerFallback, type VectorRankerRankArgs, type VectorRankerSemanticResult, WikiBusyError, type WikiBusyOperation, type WikiCheckpoint, type WikiConfig, type WikiEvent, type WikiFact, WikiMemory, type WikiOptions, type WikiTask, createWiki, formatContext, formatMemoryDump, parseEmbedding };
|
package/dist/index.d.ts
CHANGED
|
@@ -60,7 +60,15 @@ interface WikiFact {
|
|
|
60
60
|
body: string;
|
|
61
61
|
tags: string[];
|
|
62
62
|
confidence: 'certain' | 'inferred' | 'tentative';
|
|
63
|
-
|
|
63
|
+
/**
|
|
64
|
+
* Source type of this fact.
|
|
65
|
+
* - 'immutable_document': From ingestDocument(), cannot be modified by system (librarian/heal).
|
|
66
|
+
* Only removable via forget() or replaced via re-ingest.
|
|
67
|
+
* - 'librarian_inferred': Created by runLibrarian() from events, or by runHeal() when synthesizing new inferred facts.
|
|
68
|
+
* - 'user_stated': Direct user statement.
|
|
69
|
+
* - 'user_confirmed': User-confirmed fact.
|
|
70
|
+
*/
|
|
71
|
+
source_type: 'user_stated' | 'librarian_inferred' | 'user_confirmed' | 'immutable_document';
|
|
64
72
|
source_hash: string | null;
|
|
65
73
|
source_ref: string | null;
|
|
66
74
|
created_at: number;
|
|
@@ -145,6 +153,11 @@ interface VectorRankerSemanticResult {
|
|
|
145
153
|
*/
|
|
146
154
|
interface VectorRankerRankArgs {
|
|
147
155
|
entityId: string;
|
|
156
|
+
/**
|
|
157
|
+
* Query embedding. Treat as readonly — core provides a defensive copy,
|
|
158
|
+
* but adapters MUST NOT mutate this array. Mutation can corrupt
|
|
159
|
+
* WikiMemory's internal vector cache and JS-cosine fallback path.
|
|
160
|
+
*/
|
|
148
161
|
queryVec: Float32Array | number[];
|
|
149
162
|
/**
|
|
150
163
|
* When set (MiniSearch pre-filter path): ranker MUST only produce results for ids in this set.
|
|
@@ -172,6 +185,13 @@ interface VectorRanker {
|
|
|
172
185
|
/**
|
|
173
186
|
* Called after a fact's embedding is successfully persisted to embedding_blob (or cleared).
|
|
174
187
|
* Hosts use this to keep sqlite-vec / external indexes consistent with SQLite as source of truth.
|
|
188
|
+
*
|
|
189
|
+
* On deletion paths (forget, prune, hard-delete), core awaits this hook to ensure ANN cleanup
|
|
190
|
+
* completes before the deletion call resolves (GDPR compliance). Hook failures or timeouts on
|
|
191
|
+
* those paths reject the deletion call.
|
|
192
|
+
*
|
|
193
|
+
* Treat `vector` as readonly — core provides a defensive copy, but adapters MUST NOT mutate.
|
|
194
|
+
*
|
|
175
195
|
* Optional: if omitted, hosts MUST document "index rebuilt separately" and accept stale ANN until rebuild.
|
|
176
196
|
*/
|
|
177
197
|
onEmbeddingPersisted?(event: {
|
|
@@ -226,6 +246,29 @@ interface WikiOptions {
|
|
|
226
246
|
* Ignored when vectorRankerFallback is 'throw'. Default false.
|
|
227
247
|
*/
|
|
228
248
|
propagateRankerFailureToRetrievalFallback?: boolean;
|
|
249
|
+
/**
|
|
250
|
+
* When true (default), sanitize ranker errors before exposing via error.cause
|
|
251
|
+
* to prevent credential leakage in host telemetry. Disable only when you
|
|
252
|
+
* control the ranker implementation.
|
|
253
|
+
*
|
|
254
|
+
* Sanitization replaces error message/stack with a generic message preserving
|
|
255
|
+
* only the error type (constructor name).
|
|
256
|
+
*/
|
|
257
|
+
sanitizeRankerErrors?: boolean;
|
|
258
|
+
/**
|
|
259
|
+
* Timeout (ms) for onEmbeddingPersisted hook on GDPR deletion paths
|
|
260
|
+
* (forget, _doPrune). Hook must complete within this window or the
|
|
261
|
+
* deletion operation rejects. Default 30000.
|
|
262
|
+
* Lower for interactive deletes; raise for slow remote ANN backends.
|
|
263
|
+
*/
|
|
264
|
+
deletionHookTimeoutMs?: number;
|
|
265
|
+
/**
|
|
266
|
+
* Escape hatch: skip onEmbeddingPersisted on deletion paths entirely.
|
|
267
|
+
* Use ONLY when the ANN backend is permanently decommissioned. Vectors
|
|
268
|
+
* orphaned in the (unreachable) external index are accepted as a tradeoff.
|
|
269
|
+
* NOT GDPR-safe for live indexes. Default false.
|
|
270
|
+
*/
|
|
271
|
+
forceDeleteIgnoreRankerHook?: boolean;
|
|
229
272
|
}
|
|
230
273
|
interface MemoryBundle {
|
|
231
274
|
facts: WikiFact[];
|
|
@@ -278,6 +321,15 @@ declare class WikiBusyError extends Error {
|
|
|
278
321
|
readonly entityId: string;
|
|
279
322
|
constructor(operation: WikiBusyOperation, entityId: string);
|
|
280
323
|
}
|
|
324
|
+
declare class PrunePartialFailureError extends Error {
|
|
325
|
+
readonly deleted: number;
|
|
326
|
+
readonly failedAt: string;
|
|
327
|
+
readonly remaining: number;
|
|
328
|
+
readonly deletedTasks: number;
|
|
329
|
+
readonly deletedEvents: number;
|
|
330
|
+
readonly cause: Error;
|
|
331
|
+
constructor(deleted: number, failedAt: string, remaining: number, cause: Error, deletedTasks?: number, deletedEvents?: number);
|
|
332
|
+
}
|
|
281
333
|
|
|
282
334
|
declare class WikiMemory {
|
|
283
335
|
private db;
|
|
@@ -314,7 +366,16 @@ declare class WikiMemory {
|
|
|
314
366
|
private _librarianKey;
|
|
315
367
|
private _healKey;
|
|
316
368
|
private _warnCrossEntityCollision;
|
|
369
|
+
/** Maps pre-rename enum strings from older dumps to current source_type values. */
|
|
370
|
+
private _normalizeImportedSourceType;
|
|
371
|
+
private assertNoLegacySourceTypes;
|
|
317
372
|
private _notifyEmbeddingPersisted;
|
|
373
|
+
/**
|
|
374
|
+
* GDPR-critical variant: awaits the hook with a timeout and rethrows failures.
|
|
375
|
+
* Use ONLY on deletion paths. forget() calls after soft-delete UPDATE; runPrune()
|
|
376
|
+
* calls before hard DELETE. For best-effort sync, use _notifyEmbeddingPersisted.
|
|
377
|
+
*/
|
|
378
|
+
private _notifyEmbeddingPersistedOrThrow;
|
|
318
379
|
constructor(db: SQLiteAdapter, options: WikiOptions);
|
|
319
380
|
setup(): Promise<void>;
|
|
320
381
|
hasChanged(entityId: string, sourceRef: string, sourceHash: string): Promise<boolean>;
|
|
@@ -351,6 +412,12 @@ declare class WikiMemory {
|
|
|
351
412
|
* Negative return means "a ranks ahead of b" for descending score order.
|
|
352
413
|
*/
|
|
353
414
|
private _compareScoredRows;
|
|
415
|
+
/**
|
|
416
|
+
* Strip potentially sensitive data from ranker errors before exposing to host callbacks.
|
|
417
|
+
* Preserves error type for debugging but removes message/stack that may contain credentials.
|
|
418
|
+
* Recursively sanitizes one level of .cause; deeper chains collapse to type only.
|
|
419
|
+
*/
|
|
420
|
+
private _sanitizeRankerError;
|
|
354
421
|
/**
|
|
355
422
|
* Score candidate rows using in-process JS cosine similarity.
|
|
356
423
|
* Applies hybrid blending (if weight set) and tie-break sorting before returning.
|
|
@@ -414,6 +481,8 @@ declare function formatContext(bundle: MemoryBundle, options?: FormatContextOpti
|
|
|
414
481
|
|
|
415
482
|
declare function formatMemoryDump(dump: MemoryDump): FormattedMemoryDump;
|
|
416
483
|
|
|
484
|
+
declare function parseEmbedding(blob: Uint8Array | null | undefined, text: string | null | undefined): Float32Array | null;
|
|
485
|
+
|
|
417
486
|
declare function createWiki(db: SQLiteAdapter, options: WikiOptions): WikiMemory;
|
|
418
487
|
|
|
419
|
-
export { type EntityStatus, type ExtractedFact, type ExtractedTask, type FormatContextOptions, type FormattedMemoryDump, type LLMProvider, type MemoryBundle, type MemoryDump, type ReadOptions, type SQLiteAdapter, type VectorRanker, type VectorRankerFallback, type VectorRankerRankArgs, type VectorRankerSemanticResult, WikiBusyError, type WikiBusyOperation, type WikiCheckpoint, type WikiConfig, type WikiEvent, type WikiFact, WikiMemory, type WikiOptions, type WikiTask, createWiki, formatContext, formatMemoryDump };
|
|
488
|
+
export { type EntityStatus, type ExtractedFact, type ExtractedTask, type FormatContextOptions, type FormattedMemoryDump, type LLMProvider, type MemoryBundle, type MemoryDump, PrunePartialFailureError, type ReadOptions, type SQLiteAdapter, type VectorRanker, type VectorRankerFallback, type VectorRankerRankArgs, type VectorRankerSemanticResult, WikiBusyError, type WikiBusyOperation, type WikiCheckpoint, type WikiConfig, type WikiEvent, type WikiFact, WikiMemory, type WikiOptions, type WikiTask, createWiki, formatContext, formatMemoryDump, parseEmbedding };
|