@equationalapplications/core-llm-wiki 2.6.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,10 +2,15 @@
2
2
 
3
3
  Pure TypeScript business logic for LLM Wiki Memory.
4
4
 
5
+ > Inspired by [Andrej Karpathy's LLM Wiki memory spec](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f).
6
+
5
7
  ## Features
6
8
 
7
9
  - **Platform-agnostic** — Zero runtime dependencies; works with any SQLite driver via the `SQLiteAdapter` interface
8
- - **Full-featured memory** — Facts, tasks, events, semantic search, maintenance jobs
10
+ - **Semantic search** — Vector embeddings via your LLM's `embed` function, ranked by cosine similarity
11
+ - **Keyword fallback** — MiniSearch in-memory index for offline/degraded scenarios when embeddings unavailable
12
+ - **Retrieval tuning** — Per-call overrides for `maxResults`, `preFilterLimit`, and `hybridWeight` blend
13
+ - **Full-featured memory** — Facts, tasks, events, maintenance jobs (librarian, heal, reembed, prune)
9
14
  - **Type-safe** — Built with TypeScript, full type exports
10
15
 
11
16
  ## Installation
@@ -14,6 +19,289 @@ Pure TypeScript business logic for LLM Wiki Memory.
14
19
  npm install @equationalapplications/core-llm-wiki
15
20
  ```
16
21
 
22
+ ## Semantic Search with Embeddings
23
+
24
+ Provide an `embed` function in `llmProvider` to enable vector-based retrieval:
25
+
26
+ ```typescript
27
+ import { WikiMemory } from '@equationalapplications/core-llm-wiki';
28
+
29
+ const wikiMemory = new WikiMemory(db, {
30
+ llmProvider: {
31
+ generateText: async ({ systemPrompt, userPrompt }) => {
32
+ // Your LLM call for extracting facts, tasks
33
+ return 'Model output';
34
+ },
35
+ embed: async (text: string) => {
36
+ // Your embedding service (e.g., OpenAI, Cohere, local)
37
+ const response = await fetch('https://your-app.example.com/api/embed', {
38
+ method: 'POST',
39
+ body: JSON.stringify({ text }),
40
+ });
41
+ const { embedding } = await response.json();
42
+ return embedding; // number[]
43
+ },
44
+ },
45
+ });
46
+
47
+ await wikiMemory.setup();
48
+
49
+ // Query with semantic matching
50
+ const memory = await wikiMemory.read('user-123', 'What should I do this weekend?');
51
+ // Returns facts semantically similar to the query, not lexical matches
52
+ // E.g., fact "Saturday hiking trip" ranks high even though no lexical overlap
53
+ ```
54
+
55
+ When `embed` is unavailable, `read()` silently falls back to MiniSearch keyword search. If an embedding attempt throws, `read()` falls back and calls `onRetrievalFallback` if provided:
56
+
57
+ ```typescript
58
+ const wikiMemory = new WikiMemory(db, {
59
+ llmProvider: {
60
+ generateText: async () => { /* ... */ },
61
+ embed: undefined, // or throws on network error
62
+ },
63
+ onRetrievalFallback: (error) => {
64
+ console.warn('Embedding retrieval unavailable, using keyword search:', error);
65
+ },
66
+ });
67
+
68
+ // read() returns MiniSearch results, onRetrievalFallback not called (embed absent is expected)
69
+ // read() returns MiniSearch results, onRetrievalFallback called (embed threw)
70
+ ```
71
+
72
+ ## Configuration
73
+
74
+ All `WikiConfig` fields are optional:
75
+
76
+ ```typescript
77
+ const wikiMemory = new WikiMemory(db, {
78
+ llmProvider: { /* ... */ },
79
+ config: {
80
+ tablePrefix: 'llm_wiki_', // default: 'llm_wiki_'
81
+ maxResults: 10, // default: 10
82
+ autoLibrarianThreshold: 20, // default: 20 — events before librarian auto-runs
83
+ autoHealThreshold: 100, // default: 100 — events before heal auto-runs
84
+ maxChunkLength: 12000, // default: 12000 (char count per ingestDocument chunk)
85
+ chunkOverlap: 400, // default: 400 (overlap between chunks in characters)
86
+ chunkConcurrency: 1, // default: 1 (parallel LLM calls per ingestDocument)
87
+ pruneRetainSoftDeletedFor: 7, // default: 7 (days before hard-deleting soft-deleted facts)
88
+ pruneEventsAfter: 30, // default: 30 (days before hard-deleting old events)
89
+ orphanAfterDays: 30, // default: 30 (days before runHeal flags sourceless facts; null to disable)
90
+ staleInferredAfterDays: 60, // default: 60 (days before runHeal downgrades inferred facts; null to disable)
91
+ preFilterLimit: 50, // default: undefined — MiniSearch pre-filter before cosine scan; recommended for >500 facts
92
+ hybridWeight: 0.7, // default: undefined — blend semantic (1.0) ↔ keyword (0.0); pure semantic when unset
93
+ },
94
+ });
95
+ ```
96
+
97
+ ## Retrieval Tuning
98
+
99
+ Optimize `read()` performance and blend retrieval strategies:
100
+
101
+ ```typescript
102
+ const config = {
103
+ // Limit cosine similarity scoring to top-K MiniSearch keyword candidates
104
+ preFilterLimit: 50,
105
+
106
+ // Blend semantic and keyword scores (0.0 = pure keyword, 1.0 = pure semantic)
107
+ hybridWeight: 0.7,
108
+
109
+ // Max results returned per read
110
+ maxResults: 10,
111
+ };
112
+
113
+ const wikiMemory = new WikiMemory(db, {
114
+ config,
115
+ llmProvider: { /* ... */ },
116
+ });
117
+
118
+ // Per-call overrides (runtime controls for search dashboards, etc.)
119
+ const memory = await wikiMemory.read('user-123', 'my preferences', {
120
+ maxResults: 5,
121
+ preFilterLimit: 20,
122
+ hybridWeight: 0.5,
123
+ });
124
+ ```
125
+
126
+ **Hybrid scoring blends:**
127
+ - `hybridWeight: 1.0` → all-semantic blend with semantic scores clamped to non-negative range (no keyword component)
128
+ - `hybridWeight: 0.5` → balanced semantic + keyword (50/50 blend)
129
+ - `hybridWeight: 0.0` → pure keyword ranking, skips `embed()` entirely (no LLM API cost)
130
+
131
+ True cosine-range pure semantic ranking (including negative cosine values) is used when `hybridWeight` is left `undefined`.
132
+
133
+ **Pre-filtering optimization:**
134
+ When `preFilterLimit: 50` is set with 1000 facts, cosine similarity is computed only for the top 50 MiniSearch keyword matches, reducing O(N) scoring to O(50).
135
+
136
+ ## Pluggable Vector Retrieval
137
+
138
+ When your entity corpus grows, in-process cosine similarity scoring becomes a bottleneck. The optional **`VectorRanker`** interface lets you delegate semantic ranking to **sqlite-vec**, **sqlite-vss**, or an external vector database while `WikiMemory` handles embedding validation, hybrid scoring, and tier-2 row hydration.
139
+
140
+ ### `VectorRanker` purpose
141
+
142
+ `VectorRanker` provides an optional injection point for approximate nearest-neighbor (ANN) ranking:
143
+
144
+ ```typescript
145
+ export interface VectorRanker {
146
+ /**
147
+ * Return semantic scores for facts in scope, sorted by similarity.
148
+ * - `entityId`: restricts results to one entity
149
+ * - `queryVec`: the embedded query (Float32Array or number[])
150
+ * - `candidateIds` (optional): when set, rank only within this set (MiniSearch pre-filter mode)
151
+ * - `limit`: requested top-K count
152
+ */
153
+ rankBySimilarity(args: VectorRankerRankArgs): Promise<VectorRankerSemanticResult[]>;
154
+
155
+ /**
156
+ * Optional hook called after embedding persistence (upsert, reembed, delete).
157
+ * Implementations use this to keep external indexes (sqlite-vec, remote ANN) in sync.
158
+ */
159
+ onEmbeddingPersisted?(event: {
160
+ entityId: string;
161
+ factId: string;
162
+ vector: Float32Array | null; // null = embedding removed
163
+ }): void | Promise<void>;
164
+ }
165
+ ```
166
+
167
+ **When no ranker is configured**, `WikiMemory` uses built-in JS cosine similarity — the same behavior as today. When a ranker is supplied and embeddings preconditions are met (`embed` available, dimensions match, no mismatches), `WikiMemory` delegates scoring to the ranker and blends results with keyword scores.
168
+
169
+ ### Example: sqlite-vec adapter
170
+
171
+ ```typescript
172
+ import { WikiMemory } from '@equationalapplications/core-llm-wiki';
173
+ import type { VectorRanker, VectorRankerRankArgs, VectorRankerSemanticResult } from '@equationalapplications/core-llm-wiki';
174
+
175
+ // Minimal sqlite-vec adapter (pseudo-code)
176
+ const sqliteVecRanker: VectorRanker = {
177
+ async rankBySimilarity(args: VectorRankerRankArgs): Promise<VectorRankerSemanticResult[]> {
178
+ const { entityId, queryVec, candidateIds, limit } = args;
179
+
180
+ // Build KNN query using sqlite-vec's distance functions.
181
+ // sqlite-vec returns cosine distance (0 = identical, 2 = opposite) ascending.
182
+ // Invert to semanticScore: higher = more similar, matching VectorRanker contract.
183
+ let sql = `SELECT id, (1.0 - distance) AS semanticScore FROM vec_facts
184
+ WHERE entity_id = ? AND deleted_at IS NULL`;
185
+ const params: any[] = [entityId];
186
+
187
+ // Apply pre-filter if provided
188
+ if (candidateIds) {
189
+ sql += ` AND id IN (${candidateIds.map(() => '?').join(',')})`;
190
+ params.push(...candidateIds);
191
+ }
192
+
193
+ // KNN search (example syntax; adjust for your sqlite-vec version)
194
+ sql += ` ORDER BY vec MATCH vec_neighbor(?) LIMIT ?`;
195
+ params.push(queryVec, limit);
196
+
197
+ const rows = await db.getAllAsync<{ id: string; semanticScore: number }>(sql, params);
198
+ return rows; // sorted descending by semanticScore (closest distance → highest similarity)
199
+ },
200
+
201
+ async onEmbeddingPersisted(event) {
202
+ const { entityId, factId, vector } = event;
203
+ if (vector) {
204
+ // Upsert into sqlite-vec table
205
+ await db.runAsync(
206
+ `INSERT OR REPLACE INTO vec_facts (id, entity_id, vec) VALUES (?, ?, ?)`,
207
+ [factId, entityId, vector]
208
+ );
209
+ } else {
210
+ // Delete when embedding is removed
211
+ await db.runAsync(`DELETE FROM vec_facts WHERE id = ?`, [factId]);
212
+ }
213
+ },
214
+ };
215
+
216
+ const wikiMemory = new WikiMemory(db, {
217
+ llmProvider: { /* ... */ },
218
+ vectorRanker: sqliteVecRanker,
219
+ });
220
+
221
+ // read() now uses sqlite-vec for scoring instead of JS cosine
222
+ const memory = await wikiMemory.read('user-123', 'my preferences');
223
+ ```
224
+
225
+ ### Fallback policies
226
+
227
+ When `rankBySimilarity` rejects (e.g., ANN service outage, misconfiguration), `WikiMemory` applies a recovery policy:
228
+
229
+ ```typescript
230
+ export type VectorRankerFallback =
231
+ | 'js-cosine' // (default) Score candidates in-process with JS cosine — same as no ranker
232
+ | 'keyword' // Skip semantic ranking; return keyword-only results
233
+ | 'empty' // Semantic facts list empty for this read; tasks/events still included
234
+ | 'throw'; // Reject read() with the ranker error
235
+
236
+ const wikiMemory = new WikiMemory(db, {
237
+ llmProvider: { /* ... */ },
238
+ vectorRanker: sqliteVecRanker,
239
+ vectorRankerFallback: 'js-cosine', // default
240
+ onVectorRankerFallback: (info) => {
241
+ console.warn(
242
+ `Ranker failed (policy: ${info.policy}); error:`,
243
+ info.error
244
+ );
245
+ },
246
+ });
247
+ ```
248
+
249
+ - **`'js-cosine'` (default):** Seamless degradation; same behavior as if no ranker was configured.
250
+ - **`'keyword'`:** Useful when semantic ranking is optional; keyword search proceeds normally.
251
+ - **`'empty'`:** Return no facts for this query (but tasks/events still load); useful for strict consistency.
252
+ - **`'throw'`:** Propagate the error and fail the read.
253
+
254
+ ### `onEmbeddingPersisted` eventual consistency
255
+
256
+ If `vectorRanker.onEmbeddingPersisted` returns a pending Promise, the hook **may resolve asynchronously**. This supports ANN indexes that rebuild on a schedule (e.g., sqlite-vec triggers on transaction commit) or external services with eventual consistency.
257
+
258
+ **Best practice:**
259
+ - If your adapter has **synchronous guarantees** (in-process sqlite-vec, same transaction), await the promise.
260
+ - If your adapter is **eventually consistent** (remote ANN, async rebuild), document the lag and document that queries may miss recently-added facts until the index refreshes.
261
+ - The **SQLite blob remains the source of truth**; `WikiMemory` always writes embeddings to `embedding_blob` first before calling the hook.
262
+
263
+ ### Hybrid scoring with ranker
264
+
265
+ When both `vectorRanker` and `hybridWeight` are configured, `WikiMemory` still applies hybrid blending after the ranker returns scores:
266
+
267
+ ```typescript
268
+ const wikiMemory = new WikiMemory(db, {
269
+ config: {
270
+ hybridWeight: 0.7, // 70% semantic, 30% keyword
271
+ },
272
+ vectorRanker: sqliteVecRanker,
273
+ });
274
+
275
+ // ranker returns semanticScore; WikiMemory blends with MiniSearch keyword score
276
+ const memory = await wikiMemory.read('user-123', 'my preferences', {
277
+ hybridWeight: 0.5, // per-call override to 50/50 blend
278
+ });
279
+ ```
280
+
281
+ Note on semantics:
282
+ - Leave `hybridWeight` undefined for true pure-semantic cosine-range scoring.
283
+ - Set `hybridWeight: 1` for an all-semantic variant that clamps negative semantic scores to 0.
284
+
285
+ For details on hybrid scoring formulas and trade-offs, see [Retrieval Tuning](#retrieval-tuning) above.
286
+
287
+ ### Spec and issue reference
288
+
289
+ - **Full spec:** [`docs/superpowers/specs/2026-05-07-pluggable-vector-retrieval.md`](https://github.com/equationalapplications/expo-llm-wiki/blob/main/docs/superpowers/specs/2026-05-07-pluggable-vector-retrieval.md)
290
+ - **GitHub issue:** [#15](https://github.com/equationalapplications/expo-llm-wiki/issues/15)
291
+
292
+ ## Vector Cache
293
+
294
+ Parsed embedding vectors from full-scan `read()` calls are cached in memory, keyed by entity ID (max 16 entities, max 500 vectors per entity). This avoids redundant `Float32Array` parsing on repeated queries for the same entity. When the 16-entity limit is reached, the oldest-inserted entity is evicted to make room; if an entity exceeds 500 facts, its vectors are not cached at all for that read.
295
+
296
+ After heavy read workloads or on memory-constrained runtimes, you can release the entire cache explicitly:
297
+
298
+ ```typescript
299
+ // Release all cached embedding vectors
300
+ wikiMemory.clearVectorCache();
301
+ ```
302
+
303
+ The cache is also automatically invalidated on any mutation (`runLibrarian`, `runHeal`, `runPrune`, `runReembed`, `ingestDocument`, `importDump`, `forget`).
304
+
17
305
  ## Usage
18
306
 
19
307
  ```typescript
@@ -128,6 +416,46 @@ const adapter: SQLiteAdapter = {
128
416
  };
129
417
  ```
130
418
 
419
+ ## How It Works
420
+
421
+ ```mermaid
422
+ flowchart TD
423
+ A["read(entityId, query)"] --> B{hybridWeight = 0?}
424
+ B -->|Yes| C["MiniSearch only<br/>(skip embed)"]
425
+ B -->|No| D{embed available?}
426
+ D -->|No| C
427
+ D -->|Yes| F["Embed query"]
428
+ F -->|throws| E["onRetrievalFallback<br/>callback"]
429
+ E --> C
430
+ F -->|succeeds| G{preFilterLimit<br/>active?}
431
+ G -->|Yes| H["MiniSearch pre-filter<br/>top K candidates"]
432
+ H --> I["Phase 1: Cosine score<br/>top K candidates"]
433
+ G -->|No| J["Phase 1: Cosine score<br/>all facts"]
434
+ J --> K["Cache vectors<br/>in-memory<br/>(full scan only)"]
435
+ K --> L{hybridWeight = 1?}
436
+ I --> L
437
+ L -->|Yes| M["Pure semantic<br/>ranking"]
438
+ L -->|No| N["Hybrid blend:<br/>semantic + keyword<br/>via MiniSearch"]
439
+ M --> O["Phase 2: Fetch full rows<br/>top maxResults"]
440
+ N --> O
441
+ C --> P["MiniSearch ranking"]
442
+ P --> O
443
+ O --> R["Track access"]
444
+ R --> Q["Return MemoryBundle"]
445
+ ```
446
+
447
+ The flowchart shows:
448
+ 1. **Fast-path** when `hybridWeight = 0` (pure keyword, no embed cost)
449
+ 2. **Fallback chain** when embed unavailable (MiniSearch silently) or throws (`onRetrievalFallback` callback, then MiniSearch)
450
+ 3. **Pre-filtering** to limit cosine scoring to top-K keyword matches (O(N) → O(K))
451
+ 4. **Two-phase SELECT**: phase 1 scores all/filtered facts with minimal columns, phase 2 fetches full rows for winners
452
+ 5. **Hybrid scoring** to blend semantic and keyword rankings
453
+ 6. **Vector caching** on full scans only; reads with `preFilterLimit` active skip cache population
454
+
131
455
  ## License
132
456
 
133
457
  MIT
458
+
459
+ ---
460
+
461
+ Made with ❤️ by Equational Applications LLC. [https://equationalapplications.com/](https://equationalapplications.com/)
package/dist/index.d.mts CHANGED
@@ -28,6 +28,30 @@ interface WikiConfig {
28
28
  maxChunkLength?: number;
29
29
  chunkOverlap?: number;
30
30
  chunkConcurrency?: number;
31
+ /**
32
+ * Max MiniSearch candidates passed to cosine scoring.
33
+ * When set, MiniSearch pre-filters before the cosine scan.
34
+ * Only applies when embed is provided and succeeds.
35
+ * Default: undefined (full scan).
36
+ */
37
+ preFilterLimit?: number;
38
+ /**
39
+ * Hybrid blend weight (0.0–1.0).
40
+ * 0.0 = pure keyword (skips embed() entirely).
41
+ * 1.0 = pure semantic.
42
+ * Values outside [0,1] are clamped. Ignored when embed is absent or throws.
43
+ * Default: undefined (pure semantic when embed provided).
44
+ */
45
+ hybridWeight?: number;
46
+ }
47
+ interface ReadOptions {
48
+ maxResults?: number;
49
+ /**
50
+ * undefined → use WikiConfig.preFilterLimit (or no pre-filter if also unset).
51
+ * null → explicitly disable a config-level preFilterLimit for this call.
52
+ */
53
+ preFilterLimit?: number | null;
54
+ hybridWeight?: number;
31
55
  }
32
56
  interface WikiFact {
33
57
  id: string;
@@ -41,6 +65,18 @@ interface WikiFact {
41
65
  source_ref: string | null;
42
66
  created_at: number;
43
67
  updated_at: number;
68
+ /**
69
+ * Raw Float32Array bytes for the fact's embedding vector.
70
+ * Set when the fact was fetched via exportDump() with blob preservation.
71
+ * Accepted in importDump() as a real Uint8Array (in-memory round-trip),
72
+ * a Node.js Buffer JSON shape `{ type: 'Buffer', data: number[] }`,
73
+ * or a numeric-keyed plain object `{ 0: byte, 1: byte, ... }` produced
74
+ * by JSON.stringify(Uint8Array).
75
+ */
76
+ embedding_blob?: Uint8Array | {
77
+ type: 'Buffer';
78
+ data: number[];
79
+ } | Record<string, number>;
44
80
  last_accessed_at: number | null;
45
81
  access_count: number;
46
82
  deleted_at: number | null;
@@ -96,20 +132,100 @@ interface LLMProvider {
96
132
  */
97
133
  embed?: (text: string) => Promise<number[]>;
98
134
  }
135
+ /**
136
+ * Result of semantic ranking for a single fact.
137
+ */
138
+ interface VectorRankerSemanticResult {
139
+ id: string;
140
+ /** Cosine similarity in [-1, 1] when exact; implementations MAY document other monotonic scales. */
141
+ semanticScore: number;
142
+ }
143
+ /**
144
+ * Arguments passed to VectorRanker.rankBySimilarity.
145
+ */
146
+ interface VectorRankerRankArgs {
147
+ entityId: string;
148
+ queryVec: Float32Array | number[];
149
+ /**
150
+ * When set (MiniSearch pre-filter path): ranker MUST only produce results for ids in this set.
151
+ * When omitted (full-entity semantic path): ranker scopes by entityId per its backing store contract.
152
+ */
153
+ candidateIds?: readonly string[];
154
+ /**
155
+ * Upper bound on how many distinct fact ids should receive a semanticScore in this call.
156
+ * WikiMemory derives this from maxResults / candidate cardinality / documented oversampling policy.
157
+ */
158
+ limit: number;
159
+ }
160
+ /**
161
+ * Optional backend for semantic candidate scoring / top-k retrieval.
162
+ * When omitted, WikiMemory scores rows with embedding_blob / embedding TEXT in JS (cosine).
163
+ */
164
+ interface VectorRanker {
165
+ /**
166
+ * Return semantic scores for facts in scope, sorted descending by semanticScore (stable tie-breaking
167
+ * not required — WikiMemory reapplies existing tie-breakers after blending).
168
+ * Implementations SHOULD omit facts with no usable vector; callers treat missing ids like today's
169
+ * "no embedding" rows (pure semantic: -2; hybrid: keyword-only portion).
170
+ */
171
+ rankBySimilarity(args: VectorRankerRankArgs): Promise<VectorRankerSemanticResult[]>;
172
+ /**
173
+ * Called after a fact's embedding is successfully persisted to embedding_blob (or cleared).
174
+ * Hosts use this to keep sqlite-vec / external indexes consistent with SQLite as source of truth.
175
+ * Optional: if omitted, hosts MUST document "index rebuilt separately" and accept stale ANN until rebuild.
176
+ */
177
+ onEmbeddingPersisted?(event: {
178
+ entityId: string;
179
+ factId: string;
180
+ vector: Float32Array | null;
181
+ }): void | Promise<void>;
182
+ }
183
+ /**
184
+ * Fallback policy when rankBySimilarity rejects.
185
+ */
186
+ type VectorRankerFallback = 'js-cosine' | 'keyword' | 'empty' | 'throw';
99
187
  interface WikiOptions {
100
188
  config?: WikiConfig;
101
189
  llmProvider: LLMProvider;
102
190
  /**
103
- * Called when embedding-based retrieval is unavailable during `read()` and
104
- * MiniSearch keyword search is used instead. This can happen when:
105
- * - `embed()` throws (e.g. network error, model unavailable)
106
- * - `embed()` returns a vector with non-finite values (NaN / Infinity)
191
+ * Called when embedding-based retrieval is degraded or unavailable during `read()`.
192
+ * This can happen when:
193
+ * - `embed()` throws (e.g. network error, model unavailable) → falls back to keyword search
194
+ * - `embed()` returns a vector with non-finite values (NaN / Infinity) → falls back to keyword search
107
195
  * - The query vector's dimension doesn't match stored embeddings (model switch;
108
- * resolve by calling `runReembed()`)
196
+ * resolve by calling `runReembed()`) → falls back to keyword search
197
+ * - `vectorRanker` returns IDs that don't belong to the requested entity or don't exist
198
+ * (ranker integrity issue; returned rows will be filtered out, reducing result count) →
199
+ * may still use semantic ranking, but with degraded quality
109
200
  *
110
- * `read()` still returns keyword-search results — this is a notification, not an error path.
201
+ * `read()` returns results (keyword fallback or degraded semantic) — this is a notification, not an error path.
111
202
  */
112
203
  onRetrievalFallback?: (error: Error) => void;
204
+ /**
205
+ * Optional backend for semantic candidate scoring / top-k retrieval.
206
+ * When omitted, WikiMemory scores rows with embedding_blob / embedding TEXT in JS (cosine).
207
+ */
208
+ vectorRanker?: VectorRanker;
209
+ /**
210
+ * When rankBySimilarity throws. Default `'js-cosine'`.
211
+ * Ignored when vectorRanker is undefined.
212
+ */
213
+ vectorRankerFallback?: VectorRankerFallback;
214
+ /**
215
+ * Called only when rankBySimilarity rejects (after embeddings path succeeded).
216
+ * Invoked before applying vectorRankerFallback when that policy recovers or before rejecting when policy is 'throw'.
217
+ */
218
+ onVectorRankerFallback?: (info: {
219
+ error: Error;
220
+ /** Effective policy core will apply for this read (same as WikiOptions.vectorRankerFallback, default js-cosine). */
221
+ policy: VectorRankerFallback;
222
+ }) => void;
223
+ /**
224
+ * When true: after rankBySimilarity failure, once the recoverable fallback has finished
225
+ * and read() will resolve, invoke onRetrievalFallback — after onVectorRankerFallback if set.
226
+ * Ignored when vectorRankerFallback is 'throw'. Default false.
227
+ */
228
+ propagateRankerFailureToRetrievalFallback?: boolean;
113
229
  }
114
230
  interface MemoryBundle {
115
231
  facts: WikiFact[];
@@ -145,10 +261,22 @@ interface EntityStatus {
145
261
  librarian: boolean;
146
262
  heal: boolean;
147
263
  }
264
+ /**
265
+ * All operations that can appear in a {@link WikiBusyError}.
266
+ *
267
+ * @remarks **Breaking change from v2.x** — the union previously only contained
268
+ * `'ingest' | 'librarian' | 'heal' | 'prune' | 'reembed'`. The values `'import'`
269
+ * and `'forget'` were added in v3.0. Exhaustive `switch` / narrowing on this type
270
+ * must be updated (or given a `default` arm) to compile without errors.
271
+ */
272
+ type WikiBusyOperation = 'ingest' | 'librarian' | 'heal' | 'prune' | 'reembed' | 'import' | 'forget';
273
+ /**
274
+ * Thrown when a background mutator is already running for the requested entity.
275
+ */
148
276
  declare class WikiBusyError extends Error {
149
- readonly operation: 'ingest' | 'librarian' | 'heal' | 'prune' | 'reembed';
277
+ readonly operation: WikiBusyOperation;
150
278
  readonly entityId: string;
151
- constructor(operation: 'ingest' | 'librarian' | 'heal' | 'prune' | 'reembed', entityId: string);
279
+ constructor(operation: WikiBusyOperation, entityId: string);
152
280
  }
153
281
 
154
282
  declare class WikiMemory {
@@ -159,6 +287,19 @@ declare class WikiMemory {
159
287
  private activeIngestJobs;
160
288
  private miniSearch;
161
289
  private miniSearchEntryIdsByEntity;
290
+ /**
291
+ * Maximum number of entities whose parsed embedding vectors are held in
292
+ * memory. This cap is intentionally conservative so the cache remains safe
293
+ * on memory-constrained runtimes (e.g., mobile/Expo).
294
+ */
295
+ private static readonly MAX_VECTOR_CACHE_ENTITIES;
296
+ /**
297
+ * Maximum number of fact vectors cached per entity. Keep this high enough to
298
+ * preserve the parsed-embedding reuse optimization for common mid-sized
299
+ * entities while still maintaining a bounded memory footprint.
300
+ */
301
+ private static readonly MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
302
+ private vectorCache;
162
303
  private normalizeMiniSearchRow;
163
304
  private rebuildMiniSearchIndex;
164
305
  private storeEmbeddingDimension;
@@ -173,13 +314,19 @@ declare class WikiMemory {
173
314
  private _librarianKey;
174
315
  private _healKey;
175
316
  private _warnCrossEntityCollision;
317
+ private _notifyEmbeddingPersisted;
176
318
  constructor(db: SQLiteAdapter, options: WikiOptions);
177
319
  setup(): Promise<void>;
178
320
  hasChanged(entityId: string, sourceRef: string, sourceHash: string): Promise<boolean>;
179
321
  private _pruneKey;
180
322
  private _reembedKey;
181
323
  private _globalReembedKey;
324
+ private _importKey;
325
+ private _globalImportKey;
326
+ private _forgetKey;
182
327
  private _isReembedActive;
328
+ private _isImportActiveFor;
329
+ private _isForgetActiveFor;
183
330
  /** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
184
331
  private _isAnyMaintenanceActiveWithSuffix;
185
332
  /** Returns true if any ingest job is active for the given entity. */
@@ -194,7 +341,27 @@ declare class WikiMemory {
194
341
  tasks: number;
195
342
  events: number;
196
343
  }>;
197
- read(entityId: string, query: string): Promise<MemoryBundle>;
344
+ read(entityId: string, query: string, options?: ReadOptions): Promise<MemoryBundle>;
345
+ /**
346
+ * Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
347
+ */
348
+ private _tieBreakSort;
349
+ /**
350
+ * Comparator for score + deterministic tie-break fields.
351
+ * Negative return means "a ranks ahead of b" for descending score order.
352
+ */
353
+ private _compareScoredRows;
354
+ /**
355
+ * Score candidate rows using in-process JS cosine similarity.
356
+ * Applies hybrid blending (if weight set) and tie-break sorting before returning.
357
+ */
358
+ private _rankWithJsCosine;
359
+ /**
360
+ * Delegate semantic ranking to the injected VectorRanker.
361
+ * Caller should pass an oversampledLimit to preserve recall after re-ranking.
362
+ * Returns scored results ready for hybrid blending and tie-break sorting.
363
+ */
364
+ private _rankWithVectorRanker;
198
365
  getMemoryBundle(entityId: string): Promise<MemoryBundle>;
199
366
  write(entityId: string, event: Omit<WikiEvent, 'id' | 'entity_id' | 'created_at'>): Promise<void>;
200
367
  private runLibrarianThenMaybeHeal;
@@ -202,16 +369,22 @@ declare class WikiMemory {
202
369
  private _doRunHeal;
203
370
  runLibrarian(entityId: string): Promise<void>;
204
371
  runHeal(entityId: string): Promise<void>;
205
- runReembed(entityId?: string): Promise<{
372
+ runReembed(entityId?: string, opts?: {
373
+ force?: boolean;
374
+ skipExisting?: boolean;
375
+ }): Promise<{
206
376
  embedded: number;
207
377
  skipped: number;
378
+ failed: number;
208
379
  }>;
209
380
  getEntityStatus(entityId: string): EntityStatus;
381
+ clearVectorCache(): void;
210
382
  private _getFullBundle;
211
383
  exportDump(entityIds?: string[]): Promise<MemoryDump>;
212
384
  importDump(dump: MemoryDump, opts?: {
213
385
  merge?: boolean;
214
386
  }): Promise<void>;
387
+ private _doImportEntity;
215
388
  forget(entityId: string, params: {
216
389
  entryId?: string;
217
390
  taskId?: string;
@@ -243,4 +416,4 @@ declare function formatMemoryDump(dump: MemoryDump): FormattedMemoryDump;
243
416
 
244
417
  declare function createWiki(db: SQLiteAdapter, options: WikiOptions): WikiMemory;
245
418
 
246
- export { type EntityStatus, type ExtractedFact, type ExtractedTask, type FormatContextOptions, type FormattedMemoryDump, type LLMProvider, type MemoryBundle, type MemoryDump, type SQLiteAdapter, WikiBusyError, type WikiCheckpoint, type WikiConfig, type WikiEvent, type WikiFact, WikiMemory, type WikiOptions, type WikiTask, createWiki, formatContext, formatMemoryDump };
419
+ export { type EntityStatus, type ExtractedFact, type ExtractedTask, type FormatContextOptions, type FormattedMemoryDump, type LLMProvider, type MemoryBundle, type MemoryDump, type ReadOptions, type SQLiteAdapter, type VectorRanker, type VectorRankerFallback, type VectorRankerRankArgs, type VectorRankerSemanticResult, WikiBusyError, type WikiBusyOperation, type WikiCheckpoint, type WikiConfig, type WikiEvent, type WikiFact, WikiMemory, type WikiOptions, type WikiTask, createWiki, formatContext, formatMemoryDump };