@equationalapplications/core-llm-wiki 2.6.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +329 -1
- package/dist/index.d.mts +184 -11
- package/dist/index.d.ts +184 -11
- package/dist/index.js +1134 -222
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1134 -222
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -28,6 +28,30 @@ interface WikiConfig {
|
|
|
28
28
|
maxChunkLength?: number;
|
|
29
29
|
chunkOverlap?: number;
|
|
30
30
|
chunkConcurrency?: number;
|
|
31
|
+
/**
|
|
32
|
+
* Max MiniSearch candidates passed to cosine scoring.
|
|
33
|
+
* When set, MiniSearch pre-filters before the cosine scan.
|
|
34
|
+
* Only applies when embed is provided and succeeds.
|
|
35
|
+
* Default: undefined (full scan).
|
|
36
|
+
*/
|
|
37
|
+
preFilterLimit?: number;
|
|
38
|
+
/**
|
|
39
|
+
* Hybrid blend weight (0.0–1.0).
|
|
40
|
+
* 0.0 = pure keyword (skips embed() entirely).
|
|
41
|
+
* 1.0 = pure semantic.
|
|
42
|
+
* Values outside [0,1] are clamped. Ignored when embed is absent or throws.
|
|
43
|
+
* Default: undefined (pure semantic when embed provided).
|
|
44
|
+
*/
|
|
45
|
+
hybridWeight?: number;
|
|
46
|
+
}
|
|
47
|
+
interface ReadOptions {
|
|
48
|
+
maxResults?: number;
|
|
49
|
+
/**
|
|
50
|
+
* undefined → use WikiConfig.preFilterLimit (or no pre-filter if also unset).
|
|
51
|
+
* null → explicitly disable a config-level preFilterLimit for this call.
|
|
52
|
+
*/
|
|
53
|
+
preFilterLimit?: number | null;
|
|
54
|
+
hybridWeight?: number;
|
|
31
55
|
}
|
|
32
56
|
interface WikiFact {
|
|
33
57
|
id: string;
|
|
@@ -41,6 +65,18 @@ interface WikiFact {
|
|
|
41
65
|
source_ref: string | null;
|
|
42
66
|
created_at: number;
|
|
43
67
|
updated_at: number;
|
|
68
|
+
/**
|
|
69
|
+
* Raw Float32Array bytes for the fact's embedding vector.
|
|
70
|
+
* Set when the fact was fetched via exportDump() with blob preservation.
|
|
71
|
+
* Accepted in importDump() as a real Uint8Array (in-memory round-trip),
|
|
72
|
+
* a Node.js Buffer JSON shape `{ type: 'Buffer', data: number[] }`,
|
|
73
|
+
* or a numeric-keyed plain object `{ 0: byte, 1: byte, ... }` produced
|
|
74
|
+
* by JSON.stringify(Uint8Array).
|
|
75
|
+
*/
|
|
76
|
+
embedding_blob?: Uint8Array | {
|
|
77
|
+
type: 'Buffer';
|
|
78
|
+
data: number[];
|
|
79
|
+
} | Record<string, number>;
|
|
44
80
|
last_accessed_at: number | null;
|
|
45
81
|
access_count: number;
|
|
46
82
|
deleted_at: number | null;
|
|
@@ -96,20 +132,100 @@ interface LLMProvider {
|
|
|
96
132
|
*/
|
|
97
133
|
embed?: (text: string) => Promise<number[]>;
|
|
98
134
|
}
|
|
135
|
+
/**
|
|
136
|
+
* Result of semantic ranking for a single fact.
|
|
137
|
+
*/
|
|
138
|
+
interface VectorRankerSemanticResult {
|
|
139
|
+
id: string;
|
|
140
|
+
/** Cosine similarity in [-1, 1] when exact; implementations MAY document other monotonic scales. */
|
|
141
|
+
semanticScore: number;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Arguments passed to VectorRanker.rankBySimilarity.
|
|
145
|
+
*/
|
|
146
|
+
interface VectorRankerRankArgs {
|
|
147
|
+
entityId: string;
|
|
148
|
+
queryVec: Float32Array | number[];
|
|
149
|
+
/**
|
|
150
|
+
* When set (MiniSearch pre-filter path): ranker MUST only produce results for ids in this set.
|
|
151
|
+
* When omitted (full-entity semantic path): ranker scopes by entityId per its backing store contract.
|
|
152
|
+
*/
|
|
153
|
+
candidateIds?: readonly string[];
|
|
154
|
+
/**
|
|
155
|
+
* Upper bound on how many distinct fact ids should receive a semanticScore in this call.
|
|
156
|
+
* WikiMemory derives this from maxResults / candidate cardinality / documented oversampling policy.
|
|
157
|
+
*/
|
|
158
|
+
limit: number;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Optional backend for semantic candidate scoring / top-k retrieval.
|
|
162
|
+
* When omitted, WikiMemory scores rows with embedding_blob / embedding TEXT in JS (cosine).
|
|
163
|
+
*/
|
|
164
|
+
interface VectorRanker {
|
|
165
|
+
/**
|
|
166
|
+
* Return semantic scores for facts in scope, sorted descending by semanticScore (stable tie-breaking
|
|
167
|
+
* not required — WikiMemory reapplies existing tie-breakers after blending).
|
|
168
|
+
* Implementations SHOULD omit facts with no usable vector; callers treat missing ids like today's
|
|
169
|
+
* "no embedding" rows (pure semantic: -2; hybrid: keyword-only portion).
|
|
170
|
+
*/
|
|
171
|
+
rankBySimilarity(args: VectorRankerRankArgs): Promise<VectorRankerSemanticResult[]>;
|
|
172
|
+
/**
|
|
173
|
+
* Called after a fact's embedding is successfully persisted to embedding_blob (or cleared).
|
|
174
|
+
* Hosts use this to keep sqlite-vec / external indexes consistent with SQLite as source of truth.
|
|
175
|
+
* Optional: if omitted, hosts MUST document "index rebuilt separately" and accept stale ANN until rebuild.
|
|
176
|
+
*/
|
|
177
|
+
onEmbeddingPersisted?(event: {
|
|
178
|
+
entityId: string;
|
|
179
|
+
factId: string;
|
|
180
|
+
vector: Float32Array | null;
|
|
181
|
+
}): void | Promise<void>;
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Fallback policy when rankBySimilarity rejects.
|
|
185
|
+
*/
|
|
186
|
+
type VectorRankerFallback = 'js-cosine' | 'keyword' | 'empty' | 'throw';
|
|
99
187
|
interface WikiOptions {
|
|
100
188
|
config?: WikiConfig;
|
|
101
189
|
llmProvider: LLMProvider;
|
|
102
190
|
/**
|
|
103
|
-
* Called when embedding-based retrieval is unavailable during `read()
|
|
104
|
-
*
|
|
105
|
-
* - `embed()` throws (e.g. network error, model unavailable)
|
|
106
|
-
* - `embed()` returns a vector with non-finite values (NaN / Infinity)
|
|
191
|
+
* Called when embedding-based retrieval is degraded or unavailable during `read()`.
|
|
192
|
+
* This can happen when:
|
|
193
|
+
* - `embed()` throws (e.g. network error, model unavailable) → falls back to keyword search
|
|
194
|
+
* - `embed()` returns a vector with non-finite values (NaN / Infinity) → falls back to keyword search
|
|
107
195
|
* - The query vector's dimension doesn't match stored embeddings (model switch;
|
|
108
|
-
* resolve by calling `runReembed()`)
|
|
196
|
+
* resolve by calling `runReembed()`) → falls back to keyword search
|
|
197
|
+
* - `vectorRanker` returns IDs that don't belong to the requested entity or don't exist
|
|
198
|
+
* (ranker integrity issue; returned rows will be filtered out, reducing result count) →
|
|
199
|
+
* may still use semantic ranking, but with degraded quality
|
|
109
200
|
*
|
|
110
|
-
* `read()`
|
|
201
|
+
* `read()` returns results (keyword fallback or degraded semantic) — this is a notification, not an error path.
|
|
111
202
|
*/
|
|
112
203
|
onRetrievalFallback?: (error: Error) => void;
|
|
204
|
+
/**
|
|
205
|
+
* Optional backend for semantic candidate scoring / top-k retrieval.
|
|
206
|
+
* When omitted, WikiMemory scores rows with embedding_blob / embedding TEXT in JS (cosine).
|
|
207
|
+
*/
|
|
208
|
+
vectorRanker?: VectorRanker;
|
|
209
|
+
/**
|
|
210
|
+
* When rankBySimilarity throws. Default `'js-cosine'`.
|
|
211
|
+
* Ignored when vectorRanker is undefined.
|
|
212
|
+
*/
|
|
213
|
+
vectorRankerFallback?: VectorRankerFallback;
|
|
214
|
+
/**
|
|
215
|
+
* Called only when rankBySimilarity rejects (after embeddings path succeeded).
|
|
216
|
+
* Invoked before applying vectorRankerFallback when that policy recovers or before rejecting when policy is 'throw'.
|
|
217
|
+
*/
|
|
218
|
+
onVectorRankerFallback?: (info: {
|
|
219
|
+
error: Error;
|
|
220
|
+
/** Effective policy core will apply for this read (same as WikiOptions.vectorRankerFallback, default js-cosine). */
|
|
221
|
+
policy: VectorRankerFallback;
|
|
222
|
+
}) => void;
|
|
223
|
+
/**
|
|
224
|
+
* When true: after rankBySimilarity failure, once the recoverable fallback has finished
|
|
225
|
+
* and read() will resolve, invoke onRetrievalFallback — after onVectorRankerFallback if set.
|
|
226
|
+
* Ignored when vectorRankerFallback is 'throw'. Default false.
|
|
227
|
+
*/
|
|
228
|
+
propagateRankerFailureToRetrievalFallback?: boolean;
|
|
113
229
|
}
|
|
114
230
|
interface MemoryBundle {
|
|
115
231
|
facts: WikiFact[];
|
|
@@ -145,10 +261,22 @@ interface EntityStatus {
|
|
|
145
261
|
librarian: boolean;
|
|
146
262
|
heal: boolean;
|
|
147
263
|
}
|
|
264
|
+
/**
|
|
265
|
+
* All operations that can appear in a {@link WikiBusyError}.
|
|
266
|
+
*
|
|
267
|
+
* @remarks **Breaking change from v2.x** — the union previously only contained
|
|
268
|
+
* `'ingest' | 'librarian' | 'heal' | 'prune' | 'reembed'`. The values `'import'`
|
|
269
|
+
* and `'forget'` were added in v3.0. Exhaustive `switch` / narrowing on this type
|
|
270
|
+
* must be updated (or given a `default` arm) to compile without errors.
|
|
271
|
+
*/
|
|
272
|
+
type WikiBusyOperation = 'ingest' | 'librarian' | 'heal' | 'prune' | 'reembed' | 'import' | 'forget';
|
|
273
|
+
/**
|
|
274
|
+
* Thrown when a background mutator is already running for the requested entity.
|
|
275
|
+
*/
|
|
148
276
|
declare class WikiBusyError extends Error {
|
|
149
|
-
readonly operation:
|
|
277
|
+
readonly operation: WikiBusyOperation;
|
|
150
278
|
readonly entityId: string;
|
|
151
|
-
constructor(operation:
|
|
279
|
+
constructor(operation: WikiBusyOperation, entityId: string);
|
|
152
280
|
}
|
|
153
281
|
|
|
154
282
|
declare class WikiMemory {
|
|
@@ -159,6 +287,19 @@ declare class WikiMemory {
|
|
|
159
287
|
private activeIngestJobs;
|
|
160
288
|
private miniSearch;
|
|
161
289
|
private miniSearchEntryIdsByEntity;
|
|
290
|
+
/**
|
|
291
|
+
* Maximum number of entities whose parsed embedding vectors are held in
|
|
292
|
+
* memory. This cap is intentionally conservative so the cache remains safe
|
|
293
|
+
* on memory-constrained runtimes (e.g., mobile/Expo).
|
|
294
|
+
*/
|
|
295
|
+
private static readonly MAX_VECTOR_CACHE_ENTITIES;
|
|
296
|
+
/**
|
|
297
|
+
* Maximum number of fact vectors cached per entity. Keep this high enough to
|
|
298
|
+
* preserve the parsed-embedding reuse optimization for common mid-sized
|
|
299
|
+
* entities while still maintaining a bounded memory footprint.
|
|
300
|
+
*/
|
|
301
|
+
private static readonly MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
302
|
+
private vectorCache;
|
|
162
303
|
private normalizeMiniSearchRow;
|
|
163
304
|
private rebuildMiniSearchIndex;
|
|
164
305
|
private storeEmbeddingDimension;
|
|
@@ -173,13 +314,19 @@ declare class WikiMemory {
|
|
|
173
314
|
private _librarianKey;
|
|
174
315
|
private _healKey;
|
|
175
316
|
private _warnCrossEntityCollision;
|
|
317
|
+
private _notifyEmbeddingPersisted;
|
|
176
318
|
constructor(db: SQLiteAdapter, options: WikiOptions);
|
|
177
319
|
setup(): Promise<void>;
|
|
178
320
|
hasChanged(entityId: string, sourceRef: string, sourceHash: string): Promise<boolean>;
|
|
179
321
|
private _pruneKey;
|
|
180
322
|
private _reembedKey;
|
|
181
323
|
private _globalReembedKey;
|
|
324
|
+
private _importKey;
|
|
325
|
+
private _globalImportKey;
|
|
326
|
+
private _forgetKey;
|
|
182
327
|
private _isReembedActive;
|
|
328
|
+
private _isImportActiveFor;
|
|
329
|
+
private _isForgetActiveFor;
|
|
183
330
|
/** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
|
|
184
331
|
private _isAnyMaintenanceActiveWithSuffix;
|
|
185
332
|
/** Returns true if any ingest job is active for the given entity. */
|
|
@@ -194,7 +341,27 @@ declare class WikiMemory {
|
|
|
194
341
|
tasks: number;
|
|
195
342
|
events: number;
|
|
196
343
|
}>;
|
|
197
|
-
read(entityId: string, query: string): Promise<MemoryBundle>;
|
|
344
|
+
read(entityId: string, query: string, options?: ReadOptions): Promise<MemoryBundle>;
|
|
345
|
+
/**
|
|
346
|
+
* Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
|
|
347
|
+
*/
|
|
348
|
+
private _tieBreakSort;
|
|
349
|
+
/**
|
|
350
|
+
* Comparator for score + deterministic tie-break fields.
|
|
351
|
+
* Negative return means "a ranks ahead of b" for descending score order.
|
|
352
|
+
*/
|
|
353
|
+
private _compareScoredRows;
|
|
354
|
+
/**
|
|
355
|
+
* Score candidate rows using in-process JS cosine similarity.
|
|
356
|
+
* Applies hybrid blending (if weight set) and tie-break sorting before returning.
|
|
357
|
+
*/
|
|
358
|
+
private _rankWithJsCosine;
|
|
359
|
+
/**
|
|
360
|
+
* Delegate semantic ranking to the injected VectorRanker.
|
|
361
|
+
* Caller should pass an oversampledLimit to preserve recall after re-ranking.
|
|
362
|
+
* Returns scored results ready for hybrid blending and tie-break sorting.
|
|
363
|
+
*/
|
|
364
|
+
private _rankWithVectorRanker;
|
|
198
365
|
getMemoryBundle(entityId: string): Promise<MemoryBundle>;
|
|
199
366
|
write(entityId: string, event: Omit<WikiEvent, 'id' | 'entity_id' | 'created_at'>): Promise<void>;
|
|
200
367
|
private runLibrarianThenMaybeHeal;
|
|
@@ -202,16 +369,22 @@ declare class WikiMemory {
|
|
|
202
369
|
private _doRunHeal;
|
|
203
370
|
runLibrarian(entityId: string): Promise<void>;
|
|
204
371
|
runHeal(entityId: string): Promise<void>;
|
|
205
|
-
runReembed(entityId?: string
|
|
372
|
+
runReembed(entityId?: string, opts?: {
|
|
373
|
+
force?: boolean;
|
|
374
|
+
skipExisting?: boolean;
|
|
375
|
+
}): Promise<{
|
|
206
376
|
embedded: number;
|
|
207
377
|
skipped: number;
|
|
378
|
+
failed: number;
|
|
208
379
|
}>;
|
|
209
380
|
getEntityStatus(entityId: string): EntityStatus;
|
|
381
|
+
clearVectorCache(): void;
|
|
210
382
|
private _getFullBundle;
|
|
211
383
|
exportDump(entityIds?: string[]): Promise<MemoryDump>;
|
|
212
384
|
importDump(dump: MemoryDump, opts?: {
|
|
213
385
|
merge?: boolean;
|
|
214
386
|
}): Promise<void>;
|
|
387
|
+
private _doImportEntity;
|
|
215
388
|
forget(entityId: string, params: {
|
|
216
389
|
entryId?: string;
|
|
217
390
|
taskId?: string;
|
|
@@ -243,4 +416,4 @@ declare function formatMemoryDump(dump: MemoryDump): FormattedMemoryDump;
|
|
|
243
416
|
|
|
244
417
|
declare function createWiki(db: SQLiteAdapter, options: WikiOptions): WikiMemory;
|
|
245
418
|
|
|
246
|
-
export { type EntityStatus, type ExtractedFact, type ExtractedTask, type FormatContextOptions, type FormattedMemoryDump, type LLMProvider, type MemoryBundle, type MemoryDump, type SQLiteAdapter, WikiBusyError, type WikiCheckpoint, type WikiConfig, type WikiEvent, type WikiFact, WikiMemory, type WikiOptions, type WikiTask, createWiki, formatContext, formatMemoryDump };
|
|
419
|
+
export { type EntityStatus, type ExtractedFact, type ExtractedTask, type FormatContextOptions, type FormattedMemoryDump, type LLMProvider, type MemoryBundle, type MemoryDump, type ReadOptions, type SQLiteAdapter, type VectorRanker, type VectorRankerFallback, type VectorRankerRankArgs, type VectorRankerSemanticResult, WikiBusyError, type WikiBusyOperation, type WikiCheckpoint, type WikiConfig, type WikiEvent, type WikiFact, WikiMemory, type WikiOptions, type WikiTask, createWiki, formatContext, formatMemoryDump };
|