@equationalapplications/core-llm-wiki 2.4.0 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +46 -12
- package/dist/index.d.ts +46 -12
- package/dist/index.js +447 -138
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +443 -138
- package/dist/index.mjs.map +1 -1
- package/package.json +4 -1
package/dist/index.js
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
var MiniSearch = require('minisearch');
|
|
4
|
+
|
|
5
|
+
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
6
|
+
|
|
7
|
+
var MiniSearch__default = /*#__PURE__*/_interopDefault(MiniSearch);
|
|
8
|
+
|
|
3
9
|
// src/db/schema.ts
|
|
4
10
|
async function setupDatabase(db, prefix) {
|
|
5
11
|
await db.execAsync(`
|
|
@@ -17,7 +23,8 @@ async function setupDatabase(db, prefix) {
|
|
|
17
23
|
updated_at INTEGER NOT NULL,
|
|
18
24
|
last_accessed_at INTEGER,
|
|
19
25
|
access_count INTEGER NOT NULL DEFAULT 0,
|
|
20
|
-
deleted_at INTEGER
|
|
26
|
+
deleted_at INTEGER,
|
|
27
|
+
embedding TEXT
|
|
21
28
|
);
|
|
22
29
|
|
|
23
30
|
CREATE INDEX IF NOT EXISTS ${prefix}entries_entity_idx ON ${prefix}entries(entity_id);
|
|
@@ -25,34 +32,6 @@ async function setupDatabase(db, prefix) {
|
|
|
25
32
|
CREATE INDEX IF NOT EXISTS ${prefix}entries_source_hash_idx ON ${prefix}entries(entity_id, source_hash) WHERE source_hash IS NOT NULL;
|
|
26
33
|
CREATE INDEX IF NOT EXISTS ${prefix}entries_updated_idx ON ${prefix}entries(updated_at DESC);
|
|
27
34
|
|
|
28
|
-
-- FTS5 Virtual Table for full-text search
|
|
29
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS ${prefix}entries_fts USING fts5(
|
|
30
|
-
title,
|
|
31
|
-
body,
|
|
32
|
-
tags,
|
|
33
|
-
content='${prefix}entries',
|
|
34
|
-
content_rowid='rowid',
|
|
35
|
-
tokenize='porter unicode61'
|
|
36
|
-
);
|
|
37
|
-
|
|
38
|
-
-- Triggers to keep FTS5 in sync with entries
|
|
39
|
-
CREATE TRIGGER IF NOT EXISTS ${prefix}entries_ai AFTER INSERT ON ${prefix}entries BEGIN
|
|
40
|
-
INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
|
|
41
|
-
VALUES (new.rowid, new.title, new.body, new.tags);
|
|
42
|
-
END;
|
|
43
|
-
|
|
44
|
-
CREATE TRIGGER IF NOT EXISTS ${prefix}entries_ad AFTER DELETE ON ${prefix}entries BEGIN
|
|
45
|
-
INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
|
|
46
|
-
VALUES ('delete', old.rowid, old.title, old.body, old.tags);
|
|
47
|
-
END;
|
|
48
|
-
|
|
49
|
-
CREATE TRIGGER IF NOT EXISTS ${prefix}entries_au AFTER UPDATE ON ${prefix}entries BEGIN
|
|
50
|
-
INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
|
|
51
|
-
VALUES ('delete', old.rowid, old.title, old.body, old.tags);
|
|
52
|
-
INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
|
|
53
|
-
VALUES (new.rowid, new.title, new.body, new.tags);
|
|
54
|
-
END;
|
|
55
|
-
|
|
56
35
|
CREATE TABLE IF NOT EXISTS ${prefix}tasks (
|
|
57
36
|
id TEXT PRIMARY KEY,
|
|
58
37
|
entity_id TEXT NOT NULL,
|
|
@@ -95,7 +74,13 @@ async function setupDatabase(db, prefix) {
|
|
|
95
74
|
var MIGRATIONS = [
|
|
96
75
|
{
|
|
97
76
|
version: 1,
|
|
98
|
-
description: "Rebuild FTS5 with porter unicode61 tokenizer",
|
|
77
|
+
description: "Rebuild FTS5 with porter unicode61 tokenizer (superseded by v2)",
|
|
78
|
+
run: async (_db, _prefix) => {
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
version: 2,
|
|
83
|
+
description: "Remove FTS5; add embedding column for semantic retrieval",
|
|
99
84
|
run: async (db, prefix) => {
|
|
100
85
|
await db.withTransactionAsync(async () => {
|
|
101
86
|
await db.execAsync(`
|
|
@@ -103,32 +88,14 @@ var MIGRATIONS = [
|
|
|
103
88
|
DROP TRIGGER IF EXISTS ${prefix}entries_ad;
|
|
104
89
|
DROP TRIGGER IF EXISTS ${prefix}entries_au;
|
|
105
90
|
DROP TABLE IF EXISTS ${prefix}entries_fts;
|
|
106
|
-
CREATE VIRTUAL TABLE ${prefix}entries_fts USING fts5(
|
|
107
|
-
title,
|
|
108
|
-
body,
|
|
109
|
-
tags,
|
|
110
|
-
content='${prefix}entries',
|
|
111
|
-
content_rowid='rowid',
|
|
112
|
-
tokenize='porter unicode61'
|
|
113
|
-
);
|
|
114
|
-
INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
|
|
115
|
-
SELECT rowid, title, body, tags FROM ${prefix}entries;
|
|
116
|
-
CREATE TRIGGER ${prefix}entries_ai AFTER INSERT ON ${prefix}entries BEGIN
|
|
117
|
-
INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
|
|
118
|
-
VALUES (new.rowid, new.title, new.body, new.tags);
|
|
119
|
-
END;
|
|
120
|
-
CREATE TRIGGER ${prefix}entries_ad AFTER DELETE ON ${prefix}entries BEGIN
|
|
121
|
-
INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
|
|
122
|
-
VALUES ('delete', old.rowid, old.title, old.body, old.tags);
|
|
123
|
-
END;
|
|
124
|
-
CREATE TRIGGER ${prefix}entries_au AFTER UPDATE ON ${prefix}entries BEGIN
|
|
125
|
-
INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
|
|
126
|
-
VALUES ('delete', old.rowid, old.title, old.body, old.tags);
|
|
127
|
-
INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
|
|
128
|
-
VALUES (new.rowid, new.title, new.body, new.tags);
|
|
129
|
-
END;
|
|
130
91
|
`);
|
|
131
92
|
});
|
|
93
|
+
const cols = await db.getAllAsync(
|
|
94
|
+
`PRAGMA table_info(${prefix}entries)`
|
|
95
|
+
);
|
|
96
|
+
if (!cols.some((c) => c.name === "embedding")) {
|
|
97
|
+
await db.execAsync(`ALTER TABLE ${prefix}entries ADD COLUMN embedding TEXT`);
|
|
98
|
+
}
|
|
132
99
|
}
|
|
133
100
|
}
|
|
134
101
|
];
|
|
@@ -174,6 +141,19 @@ Return ONLY a valid JSON object matching this schema:
|
|
|
174
141
|
}
|
|
175
142
|
Extract verbatim factual content. Do not return markdown, just raw JSON.`;
|
|
176
143
|
|
|
144
|
+
// src/utils/cosine.ts
|
|
145
|
+
function cosineSimilarity(a, b) {
|
|
146
|
+
let dot = 0, normA = 0, normB = 0;
|
|
147
|
+
const len = Math.min(a.length, b.length);
|
|
148
|
+
for (let i = 0; i < len; i++) {
|
|
149
|
+
dot += a[i] * b[i];
|
|
150
|
+
normA += a[i] * a[i];
|
|
151
|
+
normB += b[i] * b[i];
|
|
152
|
+
}
|
|
153
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
154
|
+
return denom === 0 ? 0 : dot / denom;
|
|
155
|
+
}
|
|
156
|
+
|
|
177
157
|
// src/WikiMemory.ts
|
|
178
158
|
function parseJsonResponse(text) {
|
|
179
159
|
const firstBrace = text.indexOf("{");
|
|
@@ -382,10 +362,146 @@ var WikiMemory = class {
|
|
|
382
362
|
constructor(db, options) {
|
|
383
363
|
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
384
364
|
this.activeIngestJobs = /* @__PURE__ */ new Set();
|
|
365
|
+
this.miniSearch = new MiniSearch__default.default({
|
|
366
|
+
fields: ["title", "body", "tags"],
|
|
367
|
+
storeFields: ["entity_id"],
|
|
368
|
+
searchOptions: {
|
|
369
|
+
boost: { title: 2 },
|
|
370
|
+
fuzzy: 0.2,
|
|
371
|
+
prefix: true
|
|
372
|
+
}
|
|
373
|
+
});
|
|
374
|
+
this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
|
|
385
375
|
this.db = db;
|
|
386
376
|
this.options = options;
|
|
387
377
|
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
388
378
|
}
|
|
379
|
+
normalizeMiniSearchRow(row) {
|
|
380
|
+
return {
|
|
381
|
+
id: row.id,
|
|
382
|
+
entity_id: row.entity_id,
|
|
383
|
+
title: row.title,
|
|
384
|
+
body: row.body,
|
|
385
|
+
tags: (() => {
|
|
386
|
+
try {
|
|
387
|
+
const parsed = JSON.parse(row.tags);
|
|
388
|
+
return Array.isArray(parsed) ? parsed.join(" ") : row.tags;
|
|
389
|
+
} catch {
|
|
390
|
+
return row.tags;
|
|
391
|
+
}
|
|
392
|
+
})()
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
async rebuildMiniSearchIndex(entityId) {
|
|
396
|
+
if (entityId) {
|
|
397
|
+
const rows2 = await this.db.getAllAsync(
|
|
398
|
+
`SELECT id, entity_id, title, body, tags FROM ${this.prefix}entries WHERE deleted_at IS NULL AND entity_id = ?`,
|
|
399
|
+
[entityId]
|
|
400
|
+
);
|
|
401
|
+
const previousIds = this.miniSearchEntryIdsByEntity.get(entityId);
|
|
402
|
+
if (previousIds) {
|
|
403
|
+
for (const id of previousIds) {
|
|
404
|
+
this.miniSearch.discard(id);
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
const documents2 = rows2.map((row) => this.normalizeMiniSearchRow(row));
|
|
408
|
+
if (documents2.length > 0) {
|
|
409
|
+
this.miniSearch.addAll(documents2);
|
|
410
|
+
}
|
|
411
|
+
this.miniSearchEntryIdsByEntity.set(entityId, new Set(documents2.map((document) => document.id)));
|
|
412
|
+
return;
|
|
413
|
+
}
|
|
414
|
+
const rows = await this.db.getAllAsync(`SELECT id, entity_id, title, body, tags FROM ${this.prefix}entries WHERE deleted_at IS NULL`);
|
|
415
|
+
this.miniSearch.removeAll();
|
|
416
|
+
this.miniSearchEntryIdsByEntity.clear();
|
|
417
|
+
const documents = rows.map((row) => this.normalizeMiniSearchRow(row));
|
|
418
|
+
if (documents.length > 0) {
|
|
419
|
+
this.miniSearch.addAll(documents);
|
|
420
|
+
}
|
|
421
|
+
for (const document of documents) {
|
|
422
|
+
const ids = this.miniSearchEntryIdsByEntity.get(document.entity_id) ?? /* @__PURE__ */ new Set();
|
|
423
|
+
ids.add(document.id);
|
|
424
|
+
this.miniSearchEntryIdsByEntity.set(document.entity_id, ids);
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
async storeEmbeddingDimension(dim) {
|
|
428
|
+
const existing = await this.db.getFirstAsync(
|
|
429
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
|
|
430
|
+
);
|
|
431
|
+
if (existing) {
|
|
432
|
+
const storedDim = parseInt(existing.value, 10);
|
|
433
|
+
if (storedDim !== dim) {
|
|
434
|
+
console.warn(
|
|
435
|
+
`[WikiMemory] Embedding dimension mismatch: stored ${storedDim}, got ${dim}. Call runReembed() to rebuild embeddings with the new model.`
|
|
436
|
+
);
|
|
437
|
+
await this.db.runAsync(
|
|
438
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
439
|
+
[String(dim)]
|
|
440
|
+
);
|
|
441
|
+
} else {
|
|
442
|
+
await this.db.runAsync(
|
|
443
|
+
`DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
444
|
+
);
|
|
445
|
+
}
|
|
446
|
+
} else {
|
|
447
|
+
await this.db.runAsync(
|
|
448
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
|
|
449
|
+
[String(dim)]
|
|
450
|
+
);
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
/**
|
|
454
|
+
* After a successful runReembed(), promote the pending `embedding_dimension_mismatch`
|
|
455
|
+
* value to the canonical `embedding_dimension` key and clear the mismatch flag.
|
|
456
|
+
* This ensures future read() calls use embedding-based retrieval rather than staying
|
|
457
|
+
* stuck on the MiniSearch fallback.
|
|
458
|
+
*/
|
|
459
|
+
async _reconcileEmbeddingDimension() {
|
|
460
|
+
const mismatch = await this.db.getFirstAsync(
|
|
461
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
462
|
+
);
|
|
463
|
+
if (mismatch) {
|
|
464
|
+
await this.db.runAsync(
|
|
465
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
|
|
466
|
+
[mismatch.value]
|
|
467
|
+
);
|
|
468
|
+
await this.db.runAsync(
|
|
469
|
+
`DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
470
|
+
);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
async embedFact(fact) {
|
|
474
|
+
const embedFn = this.options.llmProvider.embed;
|
|
475
|
+
if (!embedFn) return false;
|
|
476
|
+
let tagsStr;
|
|
477
|
+
if (Array.isArray(fact.tags)) {
|
|
478
|
+
tagsStr = fact.tags.join(" ");
|
|
479
|
+
} else {
|
|
480
|
+
try {
|
|
481
|
+
const parsed = JSON.parse(fact.tags);
|
|
482
|
+
tagsStr = Array.isArray(parsed) ? parsed.join(" ") : fact.tags;
|
|
483
|
+
} catch {
|
|
484
|
+
tagsStr = fact.tags;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
|
|
488
|
+
try {
|
|
489
|
+
const vector = await embedFn(text);
|
|
490
|
+
if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
|
|
491
|
+
console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
|
|
492
|
+
return false;
|
|
493
|
+
}
|
|
494
|
+
await this.storeEmbeddingDimension(vector.length);
|
|
495
|
+
await this.db.runAsync(
|
|
496
|
+
`UPDATE ${this.prefix}entries SET embedding = ? WHERE id = ?`,
|
|
497
|
+
[JSON.stringify(vector), fact.id]
|
|
498
|
+
);
|
|
499
|
+
return true;
|
|
500
|
+
} catch (err) {
|
|
501
|
+
console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
|
|
502
|
+
return false;
|
|
503
|
+
}
|
|
504
|
+
}
|
|
389
505
|
_librarianKey(entityId) {
|
|
390
506
|
return `${this.prefix}:${entityId}:librarian`;
|
|
391
507
|
}
|
|
@@ -467,6 +583,7 @@ var WikiMemory = class {
|
|
|
467
583
|
}
|
|
468
584
|
}
|
|
469
585
|
});
|
|
586
|
+
await this.rebuildMiniSearchIndex();
|
|
470
587
|
}
|
|
471
588
|
async hasChanged(entityId, sourceRef, sourceHash) {
|
|
472
589
|
const normalizedRef = normalizeSourceRef(sourceRef);
|
|
@@ -491,6 +608,31 @@ var WikiMemory = class {
|
|
|
491
608
|
_pruneKey(entityId) {
|
|
492
609
|
return `${this.prefix}:${entityId}:prune`;
|
|
493
610
|
}
|
|
611
|
+
_reembedKey(entityId) {
|
|
612
|
+
return `${this.prefix}:${entityId}:reembed`;
|
|
613
|
+
}
|
|
614
|
+
_globalReembedKey() {
|
|
615
|
+
return `${this.prefix}:reembed`;
|
|
616
|
+
}
|
|
617
|
+
_isReembedActive(entityId) {
|
|
618
|
+
return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
|
|
619
|
+
}
|
|
620
|
+
/** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
|
|
621
|
+
_isAnyMaintenanceActiveWithSuffix(suffix) {
|
|
622
|
+
const entityKeyPrefix = `${this.prefix}:`;
|
|
623
|
+
for (const k of this.activeMaintenanceJobs) {
|
|
624
|
+
if (k.startsWith(entityKeyPrefix) && k.endsWith(suffix)) return true;
|
|
625
|
+
}
|
|
626
|
+
return false;
|
|
627
|
+
}
|
|
628
|
+
/** Returns true if any ingest job is active for the given entity. */
|
|
629
|
+
_isIngestActiveFor(entityId) {
|
|
630
|
+
const entityKeyPrefix = `${this.prefix}:${entityId}:`;
|
|
631
|
+
for (const k of this.activeIngestJobs) {
|
|
632
|
+
if (k.startsWith(entityKeyPrefix)) return true;
|
|
633
|
+
}
|
|
634
|
+
return false;
|
|
635
|
+
}
|
|
494
636
|
_validatePruneDuration(value, name) {
|
|
495
637
|
if (value !== null && value !== void 0 && (typeof value !== "number" || !isFinite(value) || value < 0)) {
|
|
496
638
|
throw new Error(`Invalid ${name}: must be a non-negative finite number or null`);
|
|
@@ -513,6 +655,8 @@ var WikiMemory = class {
|
|
|
513
655
|
blockingOperation = "librarian";
|
|
514
656
|
} else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
515
657
|
blockingOperation = "heal";
|
|
658
|
+
} else if (this._isReembedActive(entityId)) {
|
|
659
|
+
blockingOperation = "reembed";
|
|
516
660
|
} else if (isIngestRunning) {
|
|
517
661
|
blockingOperation = "ingest";
|
|
518
662
|
}
|
|
@@ -558,91 +702,145 @@ var WikiMemory = class {
|
|
|
558
702
|
await this.db.execAsync(`PRAGMA wal_checkpoint(TRUNCATE)`);
|
|
559
703
|
await this.db.execAsync(`VACUUM`);
|
|
560
704
|
}
|
|
705
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
561
706
|
return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
|
|
562
707
|
} finally {
|
|
563
708
|
this.activeMaintenanceJobs.delete(pruneKey);
|
|
564
709
|
}
|
|
565
710
|
}
|
|
566
|
-
|
|
567
|
-
const
|
|
568
|
-
const
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
711
|
+
async read(entityId, query) {
|
|
712
|
+
const maxResults = this.options.config?.maxResults ?? this.options.config?.maxFtsResults ?? 10;
|
|
713
|
+
const embedFn = this.options.llmProvider.embed;
|
|
714
|
+
const trimmedQuery = query.trim();
|
|
715
|
+
let facts = [];
|
|
716
|
+
if (trimmedQuery) {
|
|
717
|
+
let usedEmbed = false;
|
|
718
|
+
if (embedFn) {
|
|
719
|
+
try {
|
|
720
|
+
const queryVec = await embedFn(trimmedQuery);
|
|
721
|
+
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
722
|
+
throw new Error(
|
|
723
|
+
"embed() returned an empty or non-finite vector. Falling back to keyword search."
|
|
724
|
+
);
|
|
725
|
+
}
|
|
726
|
+
const storedDimRow = await this.db.getFirstAsync(
|
|
727
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
|
|
728
|
+
);
|
|
729
|
+
if (storedDimRow) {
|
|
730
|
+
const storedDim = parseInt(storedDimRow.value, 10);
|
|
731
|
+
if (storedDim !== queryVec.length) {
|
|
732
|
+
throw new Error(
|
|
733
|
+
`Embedding dimension mismatch: stored ${storedDim}, query has ${queryVec.length}. Call runReembed() to rebuild embeddings with the new model.`
|
|
734
|
+
);
|
|
590
735
|
}
|
|
591
736
|
}
|
|
737
|
+
const scoreRows = await this.db.getAllAsync(
|
|
738
|
+
`SELECT id, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
739
|
+
[entityId]
|
|
740
|
+
);
|
|
741
|
+
const scored = scoreRows.map((row) => {
|
|
742
|
+
let score = 0;
|
|
743
|
+
if (row.embedding) {
|
|
744
|
+
try {
|
|
745
|
+
const parsed = JSON.parse(row.embedding);
|
|
746
|
+
if (Array.isArray(parsed) && parsed.length === queryVec.length && parsed.every((v) => typeof v === "number" && isFinite(v))) {
|
|
747
|
+
score = cosineSimilarity(queryVec, parsed);
|
|
748
|
+
}
|
|
749
|
+
} catch {
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
return { row, score };
|
|
753
|
+
});
|
|
754
|
+
scored.sort((a, b) => {
|
|
755
|
+
const scoreDiff = b.score - a.score;
|
|
756
|
+
if (scoreDiff !== 0) {
|
|
757
|
+
return scoreDiff;
|
|
758
|
+
}
|
|
759
|
+
const updatedAtDiff = (b.row.updated_at ?? 0) - (a.row.updated_at ?? 0);
|
|
760
|
+
if (updatedAtDiff !== 0) {
|
|
761
|
+
return updatedAtDiff;
|
|
762
|
+
}
|
|
763
|
+
const accessCountDiff = (b.row.access_count ?? 0) - (a.row.access_count ?? 0);
|
|
764
|
+
if (accessCountDiff !== 0) {
|
|
765
|
+
return accessCountDiff;
|
|
766
|
+
}
|
|
767
|
+
return a.row.id.localeCompare(b.row.id);
|
|
768
|
+
});
|
|
769
|
+
const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
|
|
770
|
+
if (topIds.length > 0) {
|
|
771
|
+
const placeholders = topIds.map(() => "?").join(",");
|
|
772
|
+
const fullRows = await this.db.getAllAsync(
|
|
773
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
774
|
+
topIds
|
|
775
|
+
);
|
|
776
|
+
const byId = new Map(fullRows.map((r) => [r.id, r]));
|
|
777
|
+
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
778
|
+
}
|
|
779
|
+
usedEmbed = true;
|
|
780
|
+
} catch (err) {
|
|
781
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
782
|
+
this.options.onRetrievalFallback?.(error);
|
|
592
783
|
}
|
|
593
784
|
}
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
785
|
+
if (!usedEmbed) {
|
|
786
|
+
const results = this.miniSearch.search(trimmedQuery, {
|
|
787
|
+
filter: (r) => r.entity_id === entityId,
|
|
788
|
+
combineWith: "OR"
|
|
789
|
+
});
|
|
790
|
+
const topIds = results.slice(0, maxResults).map((r) => r.id);
|
|
791
|
+
if (topIds.length > 0) {
|
|
792
|
+
const placeholders = topIds.map(() => "?").join(",");
|
|
793
|
+
const rows = await this.db.getAllAsync(
|
|
794
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
795
|
+
topIds
|
|
796
|
+
);
|
|
797
|
+
const byId = new Map(rows.map((r) => [r.id, r]));
|
|
798
|
+
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
if (facts.length > 0) {
|
|
802
|
+
const ids = facts.map((f) => f.id);
|
|
803
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
804
|
+
const now = Date.now();
|
|
805
|
+
await this.db.runAsync(
|
|
806
|
+
`UPDATE ${this.prefix}entries
|
|
807
|
+
SET access_count = access_count + 1, last_accessed_at = ?
|
|
808
|
+
WHERE id IN (${placeholders})`,
|
|
809
|
+
[now, ...ids]
|
|
810
|
+
);
|
|
811
|
+
}
|
|
611
812
|
} else {
|
|
612
|
-
|
|
613
|
-
SELECT * FROM ${this.prefix}entries
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags
|
|
644
|
-
}));
|
|
645
|
-
return { facts, tasks, events: events.reverse() };
|
|
813
|
+
facts = await this.db.getAllAsync(
|
|
814
|
+
`SELECT * FROM ${this.prefix}entries
|
|
815
|
+
WHERE entity_id = ? AND deleted_at IS NULL
|
|
816
|
+
ORDER BY updated_at DESC
|
|
817
|
+
LIMIT ?`,
|
|
818
|
+
[entityId, maxResults]
|
|
819
|
+
);
|
|
820
|
+
}
|
|
821
|
+
const [tasks, events] = await Promise.all([
|
|
822
|
+
this.db.getAllAsync(
|
|
823
|
+
`SELECT * FROM ${this.prefix}tasks
|
|
824
|
+
WHERE entity_id = ? AND status IN ('pending', 'in_progress') AND deleted_at IS NULL
|
|
825
|
+
ORDER BY priority DESC, created_at ASC`,
|
|
826
|
+
[entityId]
|
|
827
|
+
),
|
|
828
|
+
this.db.getAllAsync(
|
|
829
|
+
`SELECT * FROM ${this.prefix}events
|
|
830
|
+
WHERE entity_id = ?
|
|
831
|
+
ORDER BY created_at DESC
|
|
832
|
+
LIMIT 10`,
|
|
833
|
+
[entityId]
|
|
834
|
+
)
|
|
835
|
+
]);
|
|
836
|
+
const parsedFacts = facts.map((f) => {
|
|
837
|
+
const { embedding: _embedding, ...rest } = f;
|
|
838
|
+
return {
|
|
839
|
+
...rest,
|
|
840
|
+
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
841
|
+
};
|
|
842
|
+
});
|
|
843
|
+
return { facts: parsedFacts, tasks, events: events.reverse() };
|
|
646
844
|
}
|
|
647
845
|
async getMemoryBundle(entityId) {
|
|
648
846
|
return this._getFullBundle(entityId, { maxEvents: 10 });
|
|
@@ -715,10 +913,13 @@ var WikiMemory = class {
|
|
|
715
913
|
ORDER BY updated_at DESC
|
|
716
914
|
LIMIT 100
|
|
717
915
|
`, [entityId]);
|
|
718
|
-
const currentFacts = currentFactsRows.map((f) =>
|
|
719
|
-
...f
|
|
720
|
-
|
|
721
|
-
|
|
916
|
+
const currentFacts = currentFactsRows.map((f) => {
|
|
917
|
+
const { embedding: _embedding, ...rest } = f;
|
|
918
|
+
return {
|
|
919
|
+
...rest,
|
|
920
|
+
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
921
|
+
};
|
|
922
|
+
});
|
|
722
923
|
const userPrompt = `Events:
|
|
723
924
|
${JSON.stringify(events.reverse(), null, 2)}
|
|
724
925
|
|
|
@@ -734,6 +935,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
734
935
|
const validFacts = facts.map(validateFact).filter((f) => f !== null);
|
|
735
936
|
const validTasks = tasks.map(validateTask).filter((t) => t !== null);
|
|
736
937
|
const now = Date.now();
|
|
938
|
+
const insertedFacts = [];
|
|
737
939
|
await this.db.withTransactionAsync(async () => {
|
|
738
940
|
for (const fact of validFacts) {
|
|
739
941
|
const newTokens = titleTokens(fact.title);
|
|
@@ -756,6 +958,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
756
958
|
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
|
|
757
959
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
758
960
|
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
|
|
961
|
+
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
759
962
|
}
|
|
760
963
|
for (const task of validTasks) {
|
|
761
964
|
const id = generateId("task_");
|
|
@@ -765,6 +968,10 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
765
968
|
`, [id, entityId, task.description, "pending", task.priority, now, now]);
|
|
766
969
|
}
|
|
767
970
|
});
|
|
971
|
+
for (const fact of insertedFacts) {
|
|
972
|
+
await this.embedFact(fact);
|
|
973
|
+
}
|
|
974
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
768
975
|
}
|
|
769
976
|
async _doRunHeal(entityId) {
|
|
770
977
|
const now = Date.now();
|
|
@@ -801,7 +1008,10 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
801
1008
|
const healCandidates = allFactsRows.filter((f) => f.source_type !== "user_document");
|
|
802
1009
|
const documentAnchors = allFactsRows.filter((f) => f.source_type === "user_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
|
|
803
1010
|
const userPrompt = `Heal Candidates:
|
|
804
|
-
${JSON.stringify(healCandidates.map((f) =>
|
|
1011
|
+
${JSON.stringify(healCandidates.map((f) => {
|
|
1012
|
+
const { embedding: _embedding, ...rest } = f;
|
|
1013
|
+
return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
|
|
1014
|
+
}), null, 2)}
|
|
805
1015
|
|
|
806
1016
|
Document Anchors (DO NOT MODIFY OR DELETE):
|
|
807
1017
|
${JSON.stringify(documentAnchors, null, 2)}
|
|
@@ -825,6 +1035,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
825
1035
|
const safeDowngraded = downgraded.filter((id) => mutableIds.has(id));
|
|
826
1036
|
const safeDeleted = deleted.filter((id) => mutableIds.has(id));
|
|
827
1037
|
const validNewFacts = newFacts.map(validateFact).filter((f) => f !== null);
|
|
1038
|
+
const insertedFacts = [];
|
|
828
1039
|
await this.db.withTransactionAsync(async () => {
|
|
829
1040
|
for (const id of safeDowngraded) {
|
|
830
1041
|
await this.db.runAsync(`UPDATE ${this.prefix}entries SET confidence = 'tentative', updated_at = ? WHERE id = ? AND entity_id = ?`, [now, id, entityId]);
|
|
@@ -838,8 +1049,13 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
838
1049
|
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
|
|
839
1050
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
840
1051
|
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
|
|
1052
|
+
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
841
1053
|
}
|
|
842
1054
|
});
|
|
1055
|
+
for (const fact of insertedFacts) {
|
|
1056
|
+
await this.embedFact(fact);
|
|
1057
|
+
}
|
|
1058
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
843
1059
|
}
|
|
844
1060
|
async runLibrarian(entityId) {
|
|
845
1061
|
const jobKey = this._librarianKey(entityId);
|
|
@@ -849,6 +1065,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
849
1065
|
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
850
1066
|
throw new WikiBusyError("prune", entityId);
|
|
851
1067
|
}
|
|
1068
|
+
if (this._isReembedActive(entityId)) {
|
|
1069
|
+
throw new WikiBusyError("reembed", entityId);
|
|
1070
|
+
}
|
|
852
1071
|
this.activeMaintenanceJobs.add(jobKey);
|
|
853
1072
|
try {
|
|
854
1073
|
await this._doRunLibrarian(entityId);
|
|
@@ -864,6 +1083,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
864
1083
|
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
865
1084
|
throw new WikiBusyError("prune", entityId);
|
|
866
1085
|
}
|
|
1086
|
+
if (this._isReembedActive(entityId)) {
|
|
1087
|
+
throw new WikiBusyError("reembed", entityId);
|
|
1088
|
+
}
|
|
867
1089
|
this.activeMaintenanceJobs.add(jobKey);
|
|
868
1090
|
try {
|
|
869
1091
|
await this._doRunHeal(entityId);
|
|
@@ -871,6 +1093,69 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
871
1093
|
this.activeMaintenanceJobs.delete(jobKey);
|
|
872
1094
|
}
|
|
873
1095
|
}
|
|
1096
|
+
async runReembed(entityId) {
|
|
1097
|
+
const embedFn = this.options.llmProvider.embed;
|
|
1098
|
+
if (!embedFn) return { embedded: 0, skipped: 0 };
|
|
1099
|
+
const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
|
|
1100
|
+
if (this.activeMaintenanceJobs.has(reembedKey)) {
|
|
1101
|
+
throw new WikiBusyError("reembed", entityId ?? "*");
|
|
1102
|
+
}
|
|
1103
|
+
if (entityId) {
|
|
1104
|
+
if (this.activeMaintenanceJobs.has(this._globalReembedKey())) {
|
|
1105
|
+
throw new WikiBusyError("reembed", entityId);
|
|
1106
|
+
}
|
|
1107
|
+
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1108
|
+
throw new WikiBusyError("prune", entityId);
|
|
1109
|
+
}
|
|
1110
|
+
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
1111
|
+
throw new WikiBusyError("librarian", entityId);
|
|
1112
|
+
}
|
|
1113
|
+
if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
1114
|
+
throw new WikiBusyError("heal", entityId);
|
|
1115
|
+
}
|
|
1116
|
+
if (this._isIngestActiveFor(entityId)) {
|
|
1117
|
+
throw new WikiBusyError("ingest", entityId);
|
|
1118
|
+
}
|
|
1119
|
+
} else {
|
|
1120
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
|
|
1121
|
+
throw new WikiBusyError("reembed", "*");
|
|
1122
|
+
}
|
|
1123
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":prune")) {
|
|
1124
|
+
throw new WikiBusyError("prune", "*");
|
|
1125
|
+
}
|
|
1126
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":librarian")) {
|
|
1127
|
+
throw new WikiBusyError("librarian", "*");
|
|
1128
|
+
}
|
|
1129
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":heal")) {
|
|
1130
|
+
throw new WikiBusyError("heal", "*");
|
|
1131
|
+
}
|
|
1132
|
+
if (this.activeIngestJobs.size > 0) {
|
|
1133
|
+
throw new WikiBusyError("ingest", "*");
|
|
1134
|
+
}
|
|
1135
|
+
}
|
|
1136
|
+
this.activeMaintenanceJobs.add(reembedKey);
|
|
1137
|
+
try {
|
|
1138
|
+
const where = entityId ? `entity_id = ? AND deleted_at IS NULL` : `deleted_at IS NULL`;
|
|
1139
|
+
const params = entityId ? [entityId] : [];
|
|
1140
|
+
const rows = await this.db.getAllAsync(
|
|
1141
|
+
`SELECT * FROM ${this.prefix}entries WHERE ${where}`,
|
|
1142
|
+
params
|
|
1143
|
+
);
|
|
1144
|
+
let embedded = 0;
|
|
1145
|
+
let skipped = 0;
|
|
1146
|
+
for (const row of rows) {
|
|
1147
|
+
const success = await this.embedFact(row);
|
|
1148
|
+
if (success) embedded++;
|
|
1149
|
+
else skipped++;
|
|
1150
|
+
}
|
|
1151
|
+
if (embedded > 0) {
|
|
1152
|
+
await this._reconcileEmbeddingDimension();
|
|
1153
|
+
}
|
|
1154
|
+
return { embedded, skipped };
|
|
1155
|
+
} finally {
|
|
1156
|
+
this.activeMaintenanceJobs.delete(reembedKey);
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
874
1159
|
getEntityStatus(entityId) {
|
|
875
1160
|
const ingestPrefix = `${this.prefix}:${entityId}:`;
|
|
876
1161
|
let ingesting = false;
|
|
@@ -901,10 +1186,13 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
901
1186
|
),
|
|
902
1187
|
this.db.getAllAsync(eventsQuery, eventsParams)
|
|
903
1188
|
]);
|
|
904
|
-
const facts = factsRaw.map((f) =>
|
|
905
|
-
...f
|
|
906
|
-
|
|
907
|
-
|
|
1189
|
+
const facts = factsRaw.map((f) => {
|
|
1190
|
+
const { embedding: _embedding, ...rest } = f;
|
|
1191
|
+
return {
|
|
1192
|
+
...rest,
|
|
1193
|
+
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
1194
|
+
};
|
|
1195
|
+
});
|
|
908
1196
|
const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
|
|
909
1197
|
return { facts, tasks, events };
|
|
910
1198
|
}
|
|
@@ -1043,7 +1331,18 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1043
1331
|
);
|
|
1044
1332
|
}
|
|
1045
1333
|
});
|
|
1334
|
+
for (const fact of bundle.facts) {
|
|
1335
|
+
if (!fact.deleted_at) {
|
|
1336
|
+
await this.embedFact({
|
|
1337
|
+
id: fact.id,
|
|
1338
|
+
title: fact.title,
|
|
1339
|
+
body: fact.body,
|
|
1340
|
+
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
1341
|
+
});
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1046
1344
|
}
|
|
1345
|
+
await this.rebuildMiniSearchIndex();
|
|
1047
1346
|
}
|
|
1048
1347
|
async forget(entityId, params) {
|
|
1049
1348
|
const now = Date.now();
|
|
@@ -1092,6 +1391,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1092
1391
|
if (taskResult) deletedTasks += taskResult.changes;
|
|
1093
1392
|
if (refResult) deletedEntries += refResult.changes;
|
|
1094
1393
|
}
|
|
1394
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1095
1395
|
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
1096
1396
|
}
|
|
1097
1397
|
async ingestDocument(entityId, params) {
|
|
@@ -1117,6 +1417,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1117
1417
|
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1118
1418
|
throw new WikiBusyError("prune", entityId);
|
|
1119
1419
|
}
|
|
1420
|
+
if (this._isReembedActive(entityId)) {
|
|
1421
|
+
throw new WikiBusyError("reembed", entityId);
|
|
1422
|
+
}
|
|
1120
1423
|
this.activeIngestJobs.add(jobKey);
|
|
1121
1424
|
try {
|
|
1122
1425
|
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
@@ -1148,6 +1451,7 @@ ${chunk}`;
|
|
|
1148
1451
|
}
|
|
1149
1452
|
}
|
|
1150
1453
|
const now = Date.now();
|
|
1454
|
+
const insertedFacts = [];
|
|
1151
1455
|
await this.db.withTransactionAsync(async () => {
|
|
1152
1456
|
await this.db.runAsync(
|
|
1153
1457
|
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
@@ -1160,8 +1464,13 @@ ${chunk}`;
|
|
|
1160
1464
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1161
1465
|
[id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]
|
|
1162
1466
|
);
|
|
1467
|
+
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1163
1468
|
}
|
|
1164
1469
|
});
|
|
1470
|
+
for (const fact of insertedFacts) {
|
|
1471
|
+
await this.embedFact(fact);
|
|
1472
|
+
}
|
|
1473
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1165
1474
|
return { truncated, chunks: chunks.length };
|
|
1166
1475
|
} finally {
|
|
1167
1476
|
this.activeIngestJobs.delete(jobKey);
|