@equationalapplications/core-llm-wiki 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +46 -12
- package/dist/index.d.ts +46 -12
- package/dist/index.js +447 -138
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +443 -138
- package/dist/index.mjs.map +1 -1
- package/package.json +4 -1
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import MiniSearch from 'minisearch';
|
|
2
|
+
|
|
1
3
|
// src/db/schema.ts
|
|
2
4
|
async function setupDatabase(db, prefix) {
|
|
3
5
|
await db.execAsync(`
|
|
@@ -15,7 +17,8 @@ async function setupDatabase(db, prefix) {
|
|
|
15
17
|
updated_at INTEGER NOT NULL,
|
|
16
18
|
last_accessed_at INTEGER,
|
|
17
19
|
access_count INTEGER NOT NULL DEFAULT 0,
|
|
18
|
-
deleted_at INTEGER
|
|
20
|
+
deleted_at INTEGER,
|
|
21
|
+
embedding TEXT
|
|
19
22
|
);
|
|
20
23
|
|
|
21
24
|
CREATE INDEX IF NOT EXISTS ${prefix}entries_entity_idx ON ${prefix}entries(entity_id);
|
|
@@ -23,34 +26,6 @@ async function setupDatabase(db, prefix) {
|
|
|
23
26
|
CREATE INDEX IF NOT EXISTS ${prefix}entries_source_hash_idx ON ${prefix}entries(entity_id, source_hash) WHERE source_hash IS NOT NULL;
|
|
24
27
|
CREATE INDEX IF NOT EXISTS ${prefix}entries_updated_idx ON ${prefix}entries(updated_at DESC);
|
|
25
28
|
|
|
26
|
-
-- FTS5 Virtual Table for full-text search
|
|
27
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS ${prefix}entries_fts USING fts5(
|
|
28
|
-
title,
|
|
29
|
-
body,
|
|
30
|
-
tags,
|
|
31
|
-
content='${prefix}entries',
|
|
32
|
-
content_rowid='rowid',
|
|
33
|
-
tokenize='porter unicode61'
|
|
34
|
-
);
|
|
35
|
-
|
|
36
|
-
-- Triggers to keep FTS5 in sync with entries
|
|
37
|
-
CREATE TRIGGER IF NOT EXISTS ${prefix}entries_ai AFTER INSERT ON ${prefix}entries BEGIN
|
|
38
|
-
INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
|
|
39
|
-
VALUES (new.rowid, new.title, new.body, new.tags);
|
|
40
|
-
END;
|
|
41
|
-
|
|
42
|
-
CREATE TRIGGER IF NOT EXISTS ${prefix}entries_ad AFTER DELETE ON ${prefix}entries BEGIN
|
|
43
|
-
INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
|
|
44
|
-
VALUES ('delete', old.rowid, old.title, old.body, old.tags);
|
|
45
|
-
END;
|
|
46
|
-
|
|
47
|
-
CREATE TRIGGER IF NOT EXISTS ${prefix}entries_au AFTER UPDATE ON ${prefix}entries BEGIN
|
|
48
|
-
INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
|
|
49
|
-
VALUES ('delete', old.rowid, old.title, old.body, old.tags);
|
|
50
|
-
INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
|
|
51
|
-
VALUES (new.rowid, new.title, new.body, new.tags);
|
|
52
|
-
END;
|
|
53
|
-
|
|
54
29
|
CREATE TABLE IF NOT EXISTS ${prefix}tasks (
|
|
55
30
|
id TEXT PRIMARY KEY,
|
|
56
31
|
entity_id TEXT NOT NULL,
|
|
@@ -93,7 +68,13 @@ async function setupDatabase(db, prefix) {
|
|
|
93
68
|
var MIGRATIONS = [
|
|
94
69
|
{
|
|
95
70
|
version: 1,
|
|
96
|
-
description: "Rebuild FTS5 with porter unicode61 tokenizer",
|
|
71
|
+
description: "Rebuild FTS5 with porter unicode61 tokenizer (superseded by v2)",
|
|
72
|
+
run: async (_db, _prefix) => {
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
version: 2,
|
|
77
|
+
description: "Remove FTS5; add embedding column for semantic retrieval",
|
|
97
78
|
run: async (db, prefix) => {
|
|
98
79
|
await db.withTransactionAsync(async () => {
|
|
99
80
|
await db.execAsync(`
|
|
@@ -101,32 +82,14 @@ var MIGRATIONS = [
|
|
|
101
82
|
DROP TRIGGER IF EXISTS ${prefix}entries_ad;
|
|
102
83
|
DROP TRIGGER IF EXISTS ${prefix}entries_au;
|
|
103
84
|
DROP TABLE IF EXISTS ${prefix}entries_fts;
|
|
104
|
-
CREATE VIRTUAL TABLE ${prefix}entries_fts USING fts5(
|
|
105
|
-
title,
|
|
106
|
-
body,
|
|
107
|
-
tags,
|
|
108
|
-
content='${prefix}entries',
|
|
109
|
-
content_rowid='rowid',
|
|
110
|
-
tokenize='porter unicode61'
|
|
111
|
-
);
|
|
112
|
-
INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
|
|
113
|
-
SELECT rowid, title, body, tags FROM ${prefix}entries;
|
|
114
|
-
CREATE TRIGGER ${prefix}entries_ai AFTER INSERT ON ${prefix}entries BEGIN
|
|
115
|
-
INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
|
|
116
|
-
VALUES (new.rowid, new.title, new.body, new.tags);
|
|
117
|
-
END;
|
|
118
|
-
CREATE TRIGGER ${prefix}entries_ad AFTER DELETE ON ${prefix}entries BEGIN
|
|
119
|
-
INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
|
|
120
|
-
VALUES ('delete', old.rowid, old.title, old.body, old.tags);
|
|
121
|
-
END;
|
|
122
|
-
CREATE TRIGGER ${prefix}entries_au AFTER UPDATE ON ${prefix}entries BEGIN
|
|
123
|
-
INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
|
|
124
|
-
VALUES ('delete', old.rowid, old.title, old.body, old.tags);
|
|
125
|
-
INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
|
|
126
|
-
VALUES (new.rowid, new.title, new.body, new.tags);
|
|
127
|
-
END;
|
|
128
85
|
`);
|
|
129
86
|
});
|
|
87
|
+
const cols = await db.getAllAsync(
|
|
88
|
+
`PRAGMA table_info(${prefix}entries)`
|
|
89
|
+
);
|
|
90
|
+
if (!cols.some((c) => c.name === "embedding")) {
|
|
91
|
+
await db.execAsync(`ALTER TABLE ${prefix}entries ADD COLUMN embedding TEXT`);
|
|
92
|
+
}
|
|
130
93
|
}
|
|
131
94
|
}
|
|
132
95
|
];
|
|
@@ -172,6 +135,19 @@ Return ONLY a valid JSON object matching this schema:
|
|
|
172
135
|
}
|
|
173
136
|
Extract verbatim factual content. Do not return markdown, just raw JSON.`;
|
|
174
137
|
|
|
138
|
+
// src/utils/cosine.ts
|
|
139
|
+
function cosineSimilarity(a, b) {
|
|
140
|
+
let dot = 0, normA = 0, normB = 0;
|
|
141
|
+
const len = Math.min(a.length, b.length);
|
|
142
|
+
for (let i = 0; i < len; i++) {
|
|
143
|
+
dot += a[i] * b[i];
|
|
144
|
+
normA += a[i] * a[i];
|
|
145
|
+
normB += b[i] * b[i];
|
|
146
|
+
}
|
|
147
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
148
|
+
return denom === 0 ? 0 : dot / denom;
|
|
149
|
+
}
|
|
150
|
+
|
|
175
151
|
// src/WikiMemory.ts
|
|
176
152
|
function parseJsonResponse(text) {
|
|
177
153
|
const firstBrace = text.indexOf("{");
|
|
@@ -380,10 +356,146 @@ var WikiMemory = class {
|
|
|
380
356
|
constructor(db, options) {
|
|
381
357
|
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
382
358
|
this.activeIngestJobs = /* @__PURE__ */ new Set();
|
|
359
|
+
this.miniSearch = new MiniSearch({
|
|
360
|
+
fields: ["title", "body", "tags"],
|
|
361
|
+
storeFields: ["entity_id"],
|
|
362
|
+
searchOptions: {
|
|
363
|
+
boost: { title: 2 },
|
|
364
|
+
fuzzy: 0.2,
|
|
365
|
+
prefix: true
|
|
366
|
+
}
|
|
367
|
+
});
|
|
368
|
+
this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
|
|
383
369
|
this.db = db;
|
|
384
370
|
this.options = options;
|
|
385
371
|
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
386
372
|
}
|
|
373
|
+
normalizeMiniSearchRow(row) {
|
|
374
|
+
return {
|
|
375
|
+
id: row.id,
|
|
376
|
+
entity_id: row.entity_id,
|
|
377
|
+
title: row.title,
|
|
378
|
+
body: row.body,
|
|
379
|
+
tags: (() => {
|
|
380
|
+
try {
|
|
381
|
+
const parsed = JSON.parse(row.tags);
|
|
382
|
+
return Array.isArray(parsed) ? parsed.join(" ") : row.tags;
|
|
383
|
+
} catch {
|
|
384
|
+
return row.tags;
|
|
385
|
+
}
|
|
386
|
+
})()
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
async rebuildMiniSearchIndex(entityId) {
|
|
390
|
+
if (entityId) {
|
|
391
|
+
const rows2 = await this.db.getAllAsync(
|
|
392
|
+
`SELECT id, entity_id, title, body, tags FROM ${this.prefix}entries WHERE deleted_at IS NULL AND entity_id = ?`,
|
|
393
|
+
[entityId]
|
|
394
|
+
);
|
|
395
|
+
const previousIds = this.miniSearchEntryIdsByEntity.get(entityId);
|
|
396
|
+
if (previousIds) {
|
|
397
|
+
for (const id of previousIds) {
|
|
398
|
+
this.miniSearch.discard(id);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
const documents2 = rows2.map((row) => this.normalizeMiniSearchRow(row));
|
|
402
|
+
if (documents2.length > 0) {
|
|
403
|
+
this.miniSearch.addAll(documents2);
|
|
404
|
+
}
|
|
405
|
+
this.miniSearchEntryIdsByEntity.set(entityId, new Set(documents2.map((document) => document.id)));
|
|
406
|
+
return;
|
|
407
|
+
}
|
|
408
|
+
const rows = await this.db.getAllAsync(`SELECT id, entity_id, title, body, tags FROM ${this.prefix}entries WHERE deleted_at IS NULL`);
|
|
409
|
+
this.miniSearch.removeAll();
|
|
410
|
+
this.miniSearchEntryIdsByEntity.clear();
|
|
411
|
+
const documents = rows.map((row) => this.normalizeMiniSearchRow(row));
|
|
412
|
+
if (documents.length > 0) {
|
|
413
|
+
this.miniSearch.addAll(documents);
|
|
414
|
+
}
|
|
415
|
+
for (const document of documents) {
|
|
416
|
+
const ids = this.miniSearchEntryIdsByEntity.get(document.entity_id) ?? /* @__PURE__ */ new Set();
|
|
417
|
+
ids.add(document.id);
|
|
418
|
+
this.miniSearchEntryIdsByEntity.set(document.entity_id, ids);
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
async storeEmbeddingDimension(dim) {
|
|
422
|
+
const existing = await this.db.getFirstAsync(
|
|
423
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
|
|
424
|
+
);
|
|
425
|
+
if (existing) {
|
|
426
|
+
const storedDim = parseInt(existing.value, 10);
|
|
427
|
+
if (storedDim !== dim) {
|
|
428
|
+
console.warn(
|
|
429
|
+
`[WikiMemory] Embedding dimension mismatch: stored ${storedDim}, got ${dim}. Call runReembed() to rebuild embeddings with the new model.`
|
|
430
|
+
);
|
|
431
|
+
await this.db.runAsync(
|
|
432
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
433
|
+
[String(dim)]
|
|
434
|
+
);
|
|
435
|
+
} else {
|
|
436
|
+
await this.db.runAsync(
|
|
437
|
+
`DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
438
|
+
);
|
|
439
|
+
}
|
|
440
|
+
} else {
|
|
441
|
+
await this.db.runAsync(
|
|
442
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
|
|
443
|
+
[String(dim)]
|
|
444
|
+
);
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
/**
|
|
448
|
+
* After a successful runReembed(), promote the pending `embedding_dimension_mismatch`
|
|
449
|
+
* value to the canonical `embedding_dimension` key and clear the mismatch flag.
|
|
450
|
+
* This ensures future read() calls use embedding-based retrieval rather than staying
|
|
451
|
+
* stuck on the MiniSearch fallback.
|
|
452
|
+
*/
|
|
453
|
+
async _reconcileEmbeddingDimension() {
|
|
454
|
+
const mismatch = await this.db.getFirstAsync(
|
|
455
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
456
|
+
);
|
|
457
|
+
if (mismatch) {
|
|
458
|
+
await this.db.runAsync(
|
|
459
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
|
|
460
|
+
[mismatch.value]
|
|
461
|
+
);
|
|
462
|
+
await this.db.runAsync(
|
|
463
|
+
`DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
464
|
+
);
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
async embedFact(fact) {
|
|
468
|
+
const embedFn = this.options.llmProvider.embed;
|
|
469
|
+
if (!embedFn) return false;
|
|
470
|
+
let tagsStr;
|
|
471
|
+
if (Array.isArray(fact.tags)) {
|
|
472
|
+
tagsStr = fact.tags.join(" ");
|
|
473
|
+
} else {
|
|
474
|
+
try {
|
|
475
|
+
const parsed = JSON.parse(fact.tags);
|
|
476
|
+
tagsStr = Array.isArray(parsed) ? parsed.join(" ") : fact.tags;
|
|
477
|
+
} catch {
|
|
478
|
+
tagsStr = fact.tags;
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
|
|
482
|
+
try {
|
|
483
|
+
const vector = await embedFn(text);
|
|
484
|
+
if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
|
|
485
|
+
console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
|
|
486
|
+
return false;
|
|
487
|
+
}
|
|
488
|
+
await this.storeEmbeddingDimension(vector.length);
|
|
489
|
+
await this.db.runAsync(
|
|
490
|
+
`UPDATE ${this.prefix}entries SET embedding = ? WHERE id = ?`,
|
|
491
|
+
[JSON.stringify(vector), fact.id]
|
|
492
|
+
);
|
|
493
|
+
return true;
|
|
494
|
+
} catch (err) {
|
|
495
|
+
console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
|
|
496
|
+
return false;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
387
499
|
_librarianKey(entityId) {
|
|
388
500
|
return `${this.prefix}:${entityId}:librarian`;
|
|
389
501
|
}
|
|
@@ -465,6 +577,7 @@ var WikiMemory = class {
|
|
|
465
577
|
}
|
|
466
578
|
}
|
|
467
579
|
});
|
|
580
|
+
await this.rebuildMiniSearchIndex();
|
|
468
581
|
}
|
|
469
582
|
async hasChanged(entityId, sourceRef, sourceHash) {
|
|
470
583
|
const normalizedRef = normalizeSourceRef(sourceRef);
|
|
@@ -489,6 +602,31 @@ var WikiMemory = class {
|
|
|
489
602
|
_pruneKey(entityId) {
|
|
490
603
|
return `${this.prefix}:${entityId}:prune`;
|
|
491
604
|
}
|
|
605
|
+
_reembedKey(entityId) {
|
|
606
|
+
return `${this.prefix}:${entityId}:reembed`;
|
|
607
|
+
}
|
|
608
|
+
_globalReembedKey() {
|
|
609
|
+
return `${this.prefix}:reembed`;
|
|
610
|
+
}
|
|
611
|
+
_isReembedActive(entityId) {
|
|
612
|
+
return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
|
|
613
|
+
}
|
|
614
|
+
/** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
|
|
615
|
+
_isAnyMaintenanceActiveWithSuffix(suffix) {
|
|
616
|
+
const entityKeyPrefix = `${this.prefix}:`;
|
|
617
|
+
for (const k of this.activeMaintenanceJobs) {
|
|
618
|
+
if (k.startsWith(entityKeyPrefix) && k.endsWith(suffix)) return true;
|
|
619
|
+
}
|
|
620
|
+
return false;
|
|
621
|
+
}
|
|
622
|
+
/** Returns true if any ingest job is active for the given entity. */
|
|
623
|
+
_isIngestActiveFor(entityId) {
|
|
624
|
+
const entityKeyPrefix = `${this.prefix}:${entityId}:`;
|
|
625
|
+
for (const k of this.activeIngestJobs) {
|
|
626
|
+
if (k.startsWith(entityKeyPrefix)) return true;
|
|
627
|
+
}
|
|
628
|
+
return false;
|
|
629
|
+
}
|
|
492
630
|
_validatePruneDuration(value, name) {
|
|
493
631
|
if (value !== null && value !== void 0 && (typeof value !== "number" || !isFinite(value) || value < 0)) {
|
|
494
632
|
throw new Error(`Invalid ${name}: must be a non-negative finite number or null`);
|
|
@@ -511,6 +649,8 @@ var WikiMemory = class {
|
|
|
511
649
|
blockingOperation = "librarian";
|
|
512
650
|
} else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
513
651
|
blockingOperation = "heal";
|
|
652
|
+
} else if (this._isReembedActive(entityId)) {
|
|
653
|
+
blockingOperation = "reembed";
|
|
514
654
|
} else if (isIngestRunning) {
|
|
515
655
|
blockingOperation = "ingest";
|
|
516
656
|
}
|
|
@@ -556,91 +696,145 @@ var WikiMemory = class {
|
|
|
556
696
|
await this.db.execAsync(`PRAGMA wal_checkpoint(TRUNCATE)`);
|
|
557
697
|
await this.db.execAsync(`VACUUM`);
|
|
558
698
|
}
|
|
699
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
559
700
|
return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
|
|
560
701
|
} finally {
|
|
561
702
|
this.activeMaintenanceJobs.delete(pruneKey);
|
|
562
703
|
}
|
|
563
704
|
}
|
|
564
|
-
|
|
565
|
-
const
|
|
566
|
-
const
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
705
|
+
async read(entityId, query) {
|
|
706
|
+
const maxResults = this.options.config?.maxResults ?? this.options.config?.maxFtsResults ?? 10;
|
|
707
|
+
const embedFn = this.options.llmProvider.embed;
|
|
708
|
+
const trimmedQuery = query.trim();
|
|
709
|
+
let facts = [];
|
|
710
|
+
if (trimmedQuery) {
|
|
711
|
+
let usedEmbed = false;
|
|
712
|
+
if (embedFn) {
|
|
713
|
+
try {
|
|
714
|
+
const queryVec = await embedFn(trimmedQuery);
|
|
715
|
+
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
716
|
+
throw new Error(
|
|
717
|
+
"embed() returned an empty or non-finite vector. Falling back to keyword search."
|
|
718
|
+
);
|
|
719
|
+
}
|
|
720
|
+
const storedDimRow = await this.db.getFirstAsync(
|
|
721
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
|
|
722
|
+
);
|
|
723
|
+
if (storedDimRow) {
|
|
724
|
+
const storedDim = parseInt(storedDimRow.value, 10);
|
|
725
|
+
if (storedDim !== queryVec.length) {
|
|
726
|
+
throw new Error(
|
|
727
|
+
`Embedding dimension mismatch: stored ${storedDim}, query has ${queryVec.length}. Call runReembed() to rebuild embeddings with the new model.`
|
|
728
|
+
);
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
const scoreRows = await this.db.getAllAsync(
|
|
732
|
+
`SELECT id, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
733
|
+
[entityId]
|
|
734
|
+
);
|
|
735
|
+
const scored = scoreRows.map((row) => {
|
|
736
|
+
let score = 0;
|
|
737
|
+
if (row.embedding) {
|
|
738
|
+
try {
|
|
739
|
+
const parsed = JSON.parse(row.embedding);
|
|
740
|
+
if (Array.isArray(parsed) && parsed.length === queryVec.length && parsed.every((v) => typeof v === "number" && isFinite(v))) {
|
|
741
|
+
score = cosineSimilarity(queryVec, parsed);
|
|
742
|
+
}
|
|
743
|
+
} catch {
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
return { row, score };
|
|
747
|
+
});
|
|
748
|
+
scored.sort((a, b) => {
|
|
749
|
+
const scoreDiff = b.score - a.score;
|
|
750
|
+
if (scoreDiff !== 0) {
|
|
751
|
+
return scoreDiff;
|
|
752
|
+
}
|
|
753
|
+
const updatedAtDiff = (b.row.updated_at ?? 0) - (a.row.updated_at ?? 0);
|
|
754
|
+
if (updatedAtDiff !== 0) {
|
|
755
|
+
return updatedAtDiff;
|
|
756
|
+
}
|
|
757
|
+
const accessCountDiff = (b.row.access_count ?? 0) - (a.row.access_count ?? 0);
|
|
758
|
+
if (accessCountDiff !== 0) {
|
|
759
|
+
return accessCountDiff;
|
|
588
760
|
}
|
|
761
|
+
return a.row.id.localeCompare(b.row.id);
|
|
762
|
+
});
|
|
763
|
+
const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
|
|
764
|
+
if (topIds.length > 0) {
|
|
765
|
+
const placeholders = topIds.map(() => "?").join(",");
|
|
766
|
+
const fullRows = await this.db.getAllAsync(
|
|
767
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
768
|
+
topIds
|
|
769
|
+
);
|
|
770
|
+
const byId = new Map(fullRows.map((r) => [r.id, r]));
|
|
771
|
+
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
589
772
|
}
|
|
773
|
+
usedEmbed = true;
|
|
774
|
+
} catch (err) {
|
|
775
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
776
|
+
this.options.onRetrievalFallback?.(error);
|
|
590
777
|
}
|
|
591
778
|
}
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
779
|
+
if (!usedEmbed) {
|
|
780
|
+
const results = this.miniSearch.search(trimmedQuery, {
|
|
781
|
+
filter: (r) => r.entity_id === entityId,
|
|
782
|
+
combineWith: "OR"
|
|
783
|
+
});
|
|
784
|
+
const topIds = results.slice(0, maxResults).map((r) => r.id);
|
|
785
|
+
if (topIds.length > 0) {
|
|
786
|
+
const placeholders = topIds.map(() => "?").join(",");
|
|
787
|
+
const rows = await this.db.getAllAsync(
|
|
788
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
789
|
+
topIds
|
|
790
|
+
);
|
|
791
|
+
const byId = new Map(rows.map((r) => [r.id, r]));
|
|
792
|
+
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
if (facts.length > 0) {
|
|
796
|
+
const ids = facts.map((f) => f.id);
|
|
797
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
798
|
+
const now = Date.now();
|
|
799
|
+
await this.db.runAsync(
|
|
800
|
+
`UPDATE ${this.prefix}entries
|
|
801
|
+
SET access_count = access_count + 1, last_accessed_at = ?
|
|
802
|
+
WHERE id IN (${placeholders})`,
|
|
803
|
+
[now, ...ids]
|
|
804
|
+
);
|
|
805
|
+
}
|
|
609
806
|
} else {
|
|
610
|
-
|
|
611
|
-
SELECT * FROM ${this.prefix}entries
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags
|
|
642
|
-
}));
|
|
643
|
-
return { facts, tasks, events: events.reverse() };
|
|
807
|
+
facts = await this.db.getAllAsync(
|
|
808
|
+
`SELECT * FROM ${this.prefix}entries
|
|
809
|
+
WHERE entity_id = ? AND deleted_at IS NULL
|
|
810
|
+
ORDER BY updated_at DESC
|
|
811
|
+
LIMIT ?`,
|
|
812
|
+
[entityId, maxResults]
|
|
813
|
+
);
|
|
814
|
+
}
|
|
815
|
+
const [tasks, events] = await Promise.all([
|
|
816
|
+
this.db.getAllAsync(
|
|
817
|
+
`SELECT * FROM ${this.prefix}tasks
|
|
818
|
+
WHERE entity_id = ? AND status IN ('pending', 'in_progress') AND deleted_at IS NULL
|
|
819
|
+
ORDER BY priority DESC, created_at ASC`,
|
|
820
|
+
[entityId]
|
|
821
|
+
),
|
|
822
|
+
this.db.getAllAsync(
|
|
823
|
+
`SELECT * FROM ${this.prefix}events
|
|
824
|
+
WHERE entity_id = ?
|
|
825
|
+
ORDER BY created_at DESC
|
|
826
|
+
LIMIT 10`,
|
|
827
|
+
[entityId]
|
|
828
|
+
)
|
|
829
|
+
]);
|
|
830
|
+
const parsedFacts = facts.map((f) => {
|
|
831
|
+
const { embedding: _embedding, ...rest } = f;
|
|
832
|
+
return {
|
|
833
|
+
...rest,
|
|
834
|
+
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
835
|
+
};
|
|
836
|
+
});
|
|
837
|
+
return { facts: parsedFacts, tasks, events: events.reverse() };
|
|
644
838
|
}
|
|
645
839
|
async getMemoryBundle(entityId) {
|
|
646
840
|
return this._getFullBundle(entityId, { maxEvents: 10 });
|
|
@@ -713,10 +907,13 @@ var WikiMemory = class {
|
|
|
713
907
|
ORDER BY updated_at DESC
|
|
714
908
|
LIMIT 100
|
|
715
909
|
`, [entityId]);
|
|
716
|
-
const currentFacts = currentFactsRows.map((f) =>
|
|
717
|
-
...f
|
|
718
|
-
|
|
719
|
-
|
|
910
|
+
const currentFacts = currentFactsRows.map((f) => {
|
|
911
|
+
const { embedding: _embedding, ...rest } = f;
|
|
912
|
+
return {
|
|
913
|
+
...rest,
|
|
914
|
+
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
915
|
+
};
|
|
916
|
+
});
|
|
720
917
|
const userPrompt = `Events:
|
|
721
918
|
${JSON.stringify(events.reverse(), null, 2)}
|
|
722
919
|
|
|
@@ -732,6 +929,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
732
929
|
const validFacts = facts.map(validateFact).filter((f) => f !== null);
|
|
733
930
|
const validTasks = tasks.map(validateTask).filter((t) => t !== null);
|
|
734
931
|
const now = Date.now();
|
|
932
|
+
const insertedFacts = [];
|
|
735
933
|
await this.db.withTransactionAsync(async () => {
|
|
736
934
|
for (const fact of validFacts) {
|
|
737
935
|
const newTokens = titleTokens(fact.title);
|
|
@@ -754,6 +952,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
754
952
|
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
|
|
755
953
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
756
954
|
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
|
|
955
|
+
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
757
956
|
}
|
|
758
957
|
for (const task of validTasks) {
|
|
759
958
|
const id = generateId("task_");
|
|
@@ -763,6 +962,10 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
763
962
|
`, [id, entityId, task.description, "pending", task.priority, now, now]);
|
|
764
963
|
}
|
|
765
964
|
});
|
|
965
|
+
for (const fact of insertedFacts) {
|
|
966
|
+
await this.embedFact(fact);
|
|
967
|
+
}
|
|
968
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
766
969
|
}
|
|
767
970
|
async _doRunHeal(entityId) {
|
|
768
971
|
const now = Date.now();
|
|
@@ -799,7 +1002,10 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
799
1002
|
const healCandidates = allFactsRows.filter((f) => f.source_type !== "user_document");
|
|
800
1003
|
const documentAnchors = allFactsRows.filter((f) => f.source_type === "user_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
|
|
801
1004
|
const userPrompt = `Heal Candidates:
|
|
802
|
-
${JSON.stringify(healCandidates.map((f) =>
|
|
1005
|
+
${JSON.stringify(healCandidates.map((f) => {
|
|
1006
|
+
const { embedding: _embedding, ...rest } = f;
|
|
1007
|
+
return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
|
|
1008
|
+
}), null, 2)}
|
|
803
1009
|
|
|
804
1010
|
Document Anchors (DO NOT MODIFY OR DELETE):
|
|
805
1011
|
${JSON.stringify(documentAnchors, null, 2)}
|
|
@@ -823,6 +1029,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
823
1029
|
const safeDowngraded = downgraded.filter((id) => mutableIds.has(id));
|
|
824
1030
|
const safeDeleted = deleted.filter((id) => mutableIds.has(id));
|
|
825
1031
|
const validNewFacts = newFacts.map(validateFact).filter((f) => f !== null);
|
|
1032
|
+
const insertedFacts = [];
|
|
826
1033
|
await this.db.withTransactionAsync(async () => {
|
|
827
1034
|
for (const id of safeDowngraded) {
|
|
828
1035
|
await this.db.runAsync(`UPDATE ${this.prefix}entries SET confidence = 'tentative', updated_at = ? WHERE id = ? AND entity_id = ?`, [now, id, entityId]);
|
|
@@ -836,8 +1043,13 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
836
1043
|
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
|
|
837
1044
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
838
1045
|
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
|
|
1046
|
+
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
839
1047
|
}
|
|
840
1048
|
});
|
|
1049
|
+
for (const fact of insertedFacts) {
|
|
1050
|
+
await this.embedFact(fact);
|
|
1051
|
+
}
|
|
1052
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
841
1053
|
}
|
|
842
1054
|
async runLibrarian(entityId) {
|
|
843
1055
|
const jobKey = this._librarianKey(entityId);
|
|
@@ -847,6 +1059,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
847
1059
|
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
848
1060
|
throw new WikiBusyError("prune", entityId);
|
|
849
1061
|
}
|
|
1062
|
+
if (this._isReembedActive(entityId)) {
|
|
1063
|
+
throw new WikiBusyError("reembed", entityId);
|
|
1064
|
+
}
|
|
850
1065
|
this.activeMaintenanceJobs.add(jobKey);
|
|
851
1066
|
try {
|
|
852
1067
|
await this._doRunLibrarian(entityId);
|
|
@@ -862,6 +1077,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
862
1077
|
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
863
1078
|
throw new WikiBusyError("prune", entityId);
|
|
864
1079
|
}
|
|
1080
|
+
if (this._isReembedActive(entityId)) {
|
|
1081
|
+
throw new WikiBusyError("reembed", entityId);
|
|
1082
|
+
}
|
|
865
1083
|
this.activeMaintenanceJobs.add(jobKey);
|
|
866
1084
|
try {
|
|
867
1085
|
await this._doRunHeal(entityId);
|
|
@@ -869,6 +1087,69 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
869
1087
|
this.activeMaintenanceJobs.delete(jobKey);
|
|
870
1088
|
}
|
|
871
1089
|
}
|
|
1090
|
+
async runReembed(entityId) {
|
|
1091
|
+
const embedFn = this.options.llmProvider.embed;
|
|
1092
|
+
if (!embedFn) return { embedded: 0, skipped: 0 };
|
|
1093
|
+
const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
|
|
1094
|
+
if (this.activeMaintenanceJobs.has(reembedKey)) {
|
|
1095
|
+
throw new WikiBusyError("reembed", entityId ?? "*");
|
|
1096
|
+
}
|
|
1097
|
+
if (entityId) {
|
|
1098
|
+
if (this.activeMaintenanceJobs.has(this._globalReembedKey())) {
|
|
1099
|
+
throw new WikiBusyError("reembed", entityId);
|
|
1100
|
+
}
|
|
1101
|
+
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1102
|
+
throw new WikiBusyError("prune", entityId);
|
|
1103
|
+
}
|
|
1104
|
+
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
1105
|
+
throw new WikiBusyError("librarian", entityId);
|
|
1106
|
+
}
|
|
1107
|
+
if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
1108
|
+
throw new WikiBusyError("heal", entityId);
|
|
1109
|
+
}
|
|
1110
|
+
if (this._isIngestActiveFor(entityId)) {
|
|
1111
|
+
throw new WikiBusyError("ingest", entityId);
|
|
1112
|
+
}
|
|
1113
|
+
} else {
|
|
1114
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
|
|
1115
|
+
throw new WikiBusyError("reembed", "*");
|
|
1116
|
+
}
|
|
1117
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":prune")) {
|
|
1118
|
+
throw new WikiBusyError("prune", "*");
|
|
1119
|
+
}
|
|
1120
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":librarian")) {
|
|
1121
|
+
throw new WikiBusyError("librarian", "*");
|
|
1122
|
+
}
|
|
1123
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":heal")) {
|
|
1124
|
+
throw new WikiBusyError("heal", "*");
|
|
1125
|
+
}
|
|
1126
|
+
if (this.activeIngestJobs.size > 0) {
|
|
1127
|
+
throw new WikiBusyError("ingest", "*");
|
|
1128
|
+
}
|
|
1129
|
+
}
|
|
1130
|
+
this.activeMaintenanceJobs.add(reembedKey);
|
|
1131
|
+
try {
|
|
1132
|
+
const where = entityId ? `entity_id = ? AND deleted_at IS NULL` : `deleted_at IS NULL`;
|
|
1133
|
+
const params = entityId ? [entityId] : [];
|
|
1134
|
+
const rows = await this.db.getAllAsync(
|
|
1135
|
+
`SELECT * FROM ${this.prefix}entries WHERE ${where}`,
|
|
1136
|
+
params
|
|
1137
|
+
);
|
|
1138
|
+
let embedded = 0;
|
|
1139
|
+
let skipped = 0;
|
|
1140
|
+
for (const row of rows) {
|
|
1141
|
+
const success = await this.embedFact(row);
|
|
1142
|
+
if (success) embedded++;
|
|
1143
|
+
else skipped++;
|
|
1144
|
+
}
|
|
1145
|
+
if (embedded > 0) {
|
|
1146
|
+
await this._reconcileEmbeddingDimension();
|
|
1147
|
+
}
|
|
1148
|
+
return { embedded, skipped };
|
|
1149
|
+
} finally {
|
|
1150
|
+
this.activeMaintenanceJobs.delete(reembedKey);
|
|
1151
|
+
}
|
|
1152
|
+
}
|
|
872
1153
|
getEntityStatus(entityId) {
|
|
873
1154
|
const ingestPrefix = `${this.prefix}:${entityId}:`;
|
|
874
1155
|
let ingesting = false;
|
|
@@ -899,10 +1180,13 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
899
1180
|
),
|
|
900
1181
|
this.db.getAllAsync(eventsQuery, eventsParams)
|
|
901
1182
|
]);
|
|
902
|
-
const facts = factsRaw.map((f) =>
|
|
903
|
-
...f
|
|
904
|
-
|
|
905
|
-
|
|
1183
|
+
const facts = factsRaw.map((f) => {
|
|
1184
|
+
const { embedding: _embedding, ...rest } = f;
|
|
1185
|
+
return {
|
|
1186
|
+
...rest,
|
|
1187
|
+
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
1188
|
+
};
|
|
1189
|
+
});
|
|
906
1190
|
const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
|
|
907
1191
|
return { facts, tasks, events };
|
|
908
1192
|
}
|
|
@@ -1041,7 +1325,18 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1041
1325
|
);
|
|
1042
1326
|
}
|
|
1043
1327
|
});
|
|
1328
|
+
for (const fact of bundle.facts) {
|
|
1329
|
+
if (!fact.deleted_at) {
|
|
1330
|
+
await this.embedFact({
|
|
1331
|
+
id: fact.id,
|
|
1332
|
+
title: fact.title,
|
|
1333
|
+
body: fact.body,
|
|
1334
|
+
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
1335
|
+
});
|
|
1336
|
+
}
|
|
1337
|
+
}
|
|
1044
1338
|
}
|
|
1339
|
+
await this.rebuildMiniSearchIndex();
|
|
1045
1340
|
}
|
|
1046
1341
|
async forget(entityId, params) {
|
|
1047
1342
|
const now = Date.now();
|
|
@@ -1090,6 +1385,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1090
1385
|
if (taskResult) deletedTasks += taskResult.changes;
|
|
1091
1386
|
if (refResult) deletedEntries += refResult.changes;
|
|
1092
1387
|
}
|
|
1388
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1093
1389
|
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
1094
1390
|
}
|
|
1095
1391
|
async ingestDocument(entityId, params) {
|
|
@@ -1115,6 +1411,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1115
1411
|
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1116
1412
|
throw new WikiBusyError("prune", entityId);
|
|
1117
1413
|
}
|
|
1414
|
+
if (this._isReembedActive(entityId)) {
|
|
1415
|
+
throw new WikiBusyError("reembed", entityId);
|
|
1416
|
+
}
|
|
1118
1417
|
this.activeIngestJobs.add(jobKey);
|
|
1119
1418
|
try {
|
|
1120
1419
|
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
@@ -1146,6 +1445,7 @@ ${chunk}`;
|
|
|
1146
1445
|
}
|
|
1147
1446
|
}
|
|
1148
1447
|
const now = Date.now();
|
|
1448
|
+
const insertedFacts = [];
|
|
1149
1449
|
await this.db.withTransactionAsync(async () => {
|
|
1150
1450
|
await this.db.runAsync(
|
|
1151
1451
|
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
@@ -1158,8 +1458,13 @@ ${chunk}`;
|
|
|
1158
1458
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1159
1459
|
[id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]
|
|
1160
1460
|
);
|
|
1461
|
+
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1161
1462
|
}
|
|
1162
1463
|
});
|
|
1464
|
+
for (const fact of insertedFacts) {
|
|
1465
|
+
await this.embedFact(fact);
|
|
1466
|
+
}
|
|
1467
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1163
1468
|
return { truncated, chunks: chunks.length };
|
|
1164
1469
|
} finally {
|
|
1165
1470
|
this.activeIngestJobs.delete(jobKey);
|