@equationalapplications/core-llm-wiki 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,3 +1,5 @@
1
+ import MiniSearch from 'minisearch';
2
+
1
3
  // src/db/schema.ts
2
4
  async function setupDatabase(db, prefix) {
3
5
  await db.execAsync(`
@@ -15,7 +17,8 @@ async function setupDatabase(db, prefix) {
15
17
  updated_at INTEGER NOT NULL,
16
18
  last_accessed_at INTEGER,
17
19
  access_count INTEGER NOT NULL DEFAULT 0,
18
- deleted_at INTEGER
20
+ deleted_at INTEGER,
21
+ embedding TEXT
19
22
  );
20
23
 
21
24
  CREATE INDEX IF NOT EXISTS ${prefix}entries_entity_idx ON ${prefix}entries(entity_id);
@@ -23,34 +26,6 @@ async function setupDatabase(db, prefix) {
23
26
  CREATE INDEX IF NOT EXISTS ${prefix}entries_source_hash_idx ON ${prefix}entries(entity_id, source_hash) WHERE source_hash IS NOT NULL;
24
27
  CREATE INDEX IF NOT EXISTS ${prefix}entries_updated_idx ON ${prefix}entries(updated_at DESC);
25
28
 
26
- -- FTS5 Virtual Table for full-text search
27
- CREATE VIRTUAL TABLE IF NOT EXISTS ${prefix}entries_fts USING fts5(
28
- title,
29
- body,
30
- tags,
31
- content='${prefix}entries',
32
- content_rowid='rowid',
33
- tokenize='porter unicode61'
34
- );
35
-
36
- -- Triggers to keep FTS5 in sync with entries
37
- CREATE TRIGGER IF NOT EXISTS ${prefix}entries_ai AFTER INSERT ON ${prefix}entries BEGIN
38
- INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
39
- VALUES (new.rowid, new.title, new.body, new.tags);
40
- END;
41
-
42
- CREATE TRIGGER IF NOT EXISTS ${prefix}entries_ad AFTER DELETE ON ${prefix}entries BEGIN
43
- INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
44
- VALUES ('delete', old.rowid, old.title, old.body, old.tags);
45
- END;
46
-
47
- CREATE TRIGGER IF NOT EXISTS ${prefix}entries_au AFTER UPDATE ON ${prefix}entries BEGIN
48
- INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
49
- VALUES ('delete', old.rowid, old.title, old.body, old.tags);
50
- INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
51
- VALUES (new.rowid, new.title, new.body, new.tags);
52
- END;
53
-
54
29
  CREATE TABLE IF NOT EXISTS ${prefix}tasks (
55
30
  id TEXT PRIMARY KEY,
56
31
  entity_id TEXT NOT NULL,
@@ -93,7 +68,13 @@ async function setupDatabase(db, prefix) {
93
68
  var MIGRATIONS = [
94
69
  {
95
70
  version: 1,
96
- description: "Rebuild FTS5 with porter unicode61 tokenizer",
71
+ description: "Rebuild FTS5 with porter unicode61 tokenizer (superseded by v2)",
72
+ run: async (_db, _prefix) => {
73
+ }
74
+ },
75
+ {
76
+ version: 2,
77
+ description: "Remove FTS5; add embedding column for semantic retrieval",
97
78
  run: async (db, prefix) => {
98
79
  await db.withTransactionAsync(async () => {
99
80
  await db.execAsync(`
@@ -101,32 +82,14 @@ var MIGRATIONS = [
101
82
  DROP TRIGGER IF EXISTS ${prefix}entries_ad;
102
83
  DROP TRIGGER IF EXISTS ${prefix}entries_au;
103
84
  DROP TABLE IF EXISTS ${prefix}entries_fts;
104
- CREATE VIRTUAL TABLE ${prefix}entries_fts USING fts5(
105
- title,
106
- body,
107
- tags,
108
- content='${prefix}entries',
109
- content_rowid='rowid',
110
- tokenize='porter unicode61'
111
- );
112
- INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
113
- SELECT rowid, title, body, tags FROM ${prefix}entries;
114
- CREATE TRIGGER ${prefix}entries_ai AFTER INSERT ON ${prefix}entries BEGIN
115
- INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
116
- VALUES (new.rowid, new.title, new.body, new.tags);
117
- END;
118
- CREATE TRIGGER ${prefix}entries_ad AFTER DELETE ON ${prefix}entries BEGIN
119
- INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
120
- VALUES ('delete', old.rowid, old.title, old.body, old.tags);
121
- END;
122
- CREATE TRIGGER ${prefix}entries_au AFTER UPDATE ON ${prefix}entries BEGIN
123
- INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
124
- VALUES ('delete', old.rowid, old.title, old.body, old.tags);
125
- INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
126
- VALUES (new.rowid, new.title, new.body, new.tags);
127
- END;
128
85
  `);
129
86
  });
87
+ const cols = await db.getAllAsync(
88
+ `PRAGMA table_info(${prefix}entries)`
89
+ );
90
+ if (!cols.some((c) => c.name === "embedding")) {
91
+ await db.execAsync(`ALTER TABLE ${prefix}entries ADD COLUMN embedding TEXT`);
92
+ }
130
93
  }
131
94
  }
132
95
  ];
@@ -172,6 +135,19 @@ Return ONLY a valid JSON object matching this schema:
172
135
  }
173
136
  Extract verbatim factual content. Do not return markdown, just raw JSON.`;
174
137
 
138
+ // src/utils/cosine.ts
139
+ function cosineSimilarity(a, b) {
140
+ let dot = 0, normA = 0, normB = 0;
141
+ const len = Math.min(a.length, b.length);
142
+ for (let i = 0; i < len; i++) {
143
+ dot += a[i] * b[i];
144
+ normA += a[i] * a[i];
145
+ normB += b[i] * b[i];
146
+ }
147
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
148
+ return denom === 0 ? 0 : dot / denom;
149
+ }
150
+
175
151
  // src/WikiMemory.ts
176
152
  function parseJsonResponse(text) {
177
153
  const firstBrace = text.indexOf("{");
@@ -380,10 +356,146 @@ var WikiMemory = class {
380
356
  constructor(db, options) {
381
357
  this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
382
358
  this.activeIngestJobs = /* @__PURE__ */ new Set();
359
+ this.miniSearch = new MiniSearch({
360
+ fields: ["title", "body", "tags"],
361
+ storeFields: ["entity_id"],
362
+ searchOptions: {
363
+ boost: { title: 2 },
364
+ fuzzy: 0.2,
365
+ prefix: true
366
+ }
367
+ });
368
+ this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
383
369
  this.db = db;
384
370
  this.options = options;
385
371
  this.prefix = options.config?.tablePrefix || "llm_wiki_";
386
372
  }
373
+ normalizeMiniSearchRow(row) {
374
+ return {
375
+ id: row.id,
376
+ entity_id: row.entity_id,
377
+ title: row.title,
378
+ body: row.body,
379
+ tags: (() => {
380
+ try {
381
+ const parsed = JSON.parse(row.tags);
382
+ return Array.isArray(parsed) ? parsed.join(" ") : row.tags;
383
+ } catch {
384
+ return row.tags;
385
+ }
386
+ })()
387
+ };
388
+ }
389
+ async rebuildMiniSearchIndex(entityId) {
390
+ if (entityId) {
391
+ const rows2 = await this.db.getAllAsync(
392
+ `SELECT id, entity_id, title, body, tags FROM ${this.prefix}entries WHERE deleted_at IS NULL AND entity_id = ?`,
393
+ [entityId]
394
+ );
395
+ const previousIds = this.miniSearchEntryIdsByEntity.get(entityId);
396
+ if (previousIds) {
397
+ for (const id of previousIds) {
398
+ this.miniSearch.discard(id);
399
+ }
400
+ }
401
+ const documents2 = rows2.map((row) => this.normalizeMiniSearchRow(row));
402
+ if (documents2.length > 0) {
403
+ this.miniSearch.addAll(documents2);
404
+ }
405
+ this.miniSearchEntryIdsByEntity.set(entityId, new Set(documents2.map((document) => document.id)));
406
+ return;
407
+ }
408
+ const rows = await this.db.getAllAsync(`SELECT id, entity_id, title, body, tags FROM ${this.prefix}entries WHERE deleted_at IS NULL`);
409
+ this.miniSearch.removeAll();
410
+ this.miniSearchEntryIdsByEntity.clear();
411
+ const documents = rows.map((row) => this.normalizeMiniSearchRow(row));
412
+ if (documents.length > 0) {
413
+ this.miniSearch.addAll(documents);
414
+ }
415
+ for (const document of documents) {
416
+ const ids = this.miniSearchEntryIdsByEntity.get(document.entity_id) ?? /* @__PURE__ */ new Set();
417
+ ids.add(document.id);
418
+ this.miniSearchEntryIdsByEntity.set(document.entity_id, ids);
419
+ }
420
+ }
421
+ async storeEmbeddingDimension(dim) {
422
+ const existing = await this.db.getFirstAsync(
423
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
424
+ );
425
+ if (existing) {
426
+ const storedDim = parseInt(existing.value, 10);
427
+ if (storedDim !== dim) {
428
+ console.warn(
429
+ `[WikiMemory] Embedding dimension mismatch: stored ${storedDim}, got ${dim}. Call runReembed() to rebuild embeddings with the new model.`
430
+ );
431
+ await this.db.runAsync(
432
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
433
+ [String(dim)]
434
+ );
435
+ } else {
436
+ await this.db.runAsync(
437
+ `DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
438
+ );
439
+ }
440
+ } else {
441
+ await this.db.runAsync(
442
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
443
+ [String(dim)]
444
+ );
445
+ }
446
+ }
447
+ /**
448
+ * After a successful runReembed(), promote the pending `embedding_dimension_mismatch`
449
+ * value to the canonical `embedding_dimension` key and clear the mismatch flag.
450
+ * This ensures future read() calls use embedding-based retrieval rather than staying
451
+ * stuck on the MiniSearch fallback.
452
+ */
453
+ async _reconcileEmbeddingDimension() {
454
+ const mismatch = await this.db.getFirstAsync(
455
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
456
+ );
457
+ if (mismatch) {
458
+ await this.db.runAsync(
459
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
460
+ [mismatch.value]
461
+ );
462
+ await this.db.runAsync(
463
+ `DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
464
+ );
465
+ }
466
+ }
467
+ async embedFact(fact) {
468
+ const embedFn = this.options.llmProvider.embed;
469
+ if (!embedFn) return false;
470
+ let tagsStr;
471
+ if (Array.isArray(fact.tags)) {
472
+ tagsStr = fact.tags.join(" ");
473
+ } else {
474
+ try {
475
+ const parsed = JSON.parse(fact.tags);
476
+ tagsStr = Array.isArray(parsed) ? parsed.join(" ") : fact.tags;
477
+ } catch {
478
+ tagsStr = fact.tags;
479
+ }
480
+ }
481
+ const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
482
+ try {
483
+ const vector = await embedFn(text);
484
+ if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
485
+ console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
486
+ return false;
487
+ }
488
+ await this.storeEmbeddingDimension(vector.length);
489
+ await this.db.runAsync(
490
+ `UPDATE ${this.prefix}entries SET embedding = ? WHERE id = ?`,
491
+ [JSON.stringify(vector), fact.id]
492
+ );
493
+ return true;
494
+ } catch (err) {
495
+ console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
496
+ return false;
497
+ }
498
+ }
387
499
  _librarianKey(entityId) {
388
500
  return `${this.prefix}:${entityId}:librarian`;
389
501
  }
@@ -465,6 +577,7 @@ var WikiMemory = class {
465
577
  }
466
578
  }
467
579
  });
580
+ await this.rebuildMiniSearchIndex();
468
581
  }
469
582
  async hasChanged(entityId, sourceRef, sourceHash) {
470
583
  const normalizedRef = normalizeSourceRef(sourceRef);
@@ -489,6 +602,31 @@ var WikiMemory = class {
489
602
  _pruneKey(entityId) {
490
603
  return `${this.prefix}:${entityId}:prune`;
491
604
  }
605
+ _reembedKey(entityId) {
606
+ return `${this.prefix}:${entityId}:reembed`;
607
+ }
608
+ _globalReembedKey() {
609
+ return `${this.prefix}:reembed`;
610
+ }
611
+ _isReembedActive(entityId) {
612
+ return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
613
+ }
614
+ /** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
615
+ _isAnyMaintenanceActiveWithSuffix(suffix) {
616
+ const entityKeyPrefix = `${this.prefix}:`;
617
+ for (const k of this.activeMaintenanceJobs) {
618
+ if (k.startsWith(entityKeyPrefix) && k.endsWith(suffix)) return true;
619
+ }
620
+ return false;
621
+ }
622
+ /** Returns true if any ingest job is active for the given entity. */
623
+ _isIngestActiveFor(entityId) {
624
+ const entityKeyPrefix = `${this.prefix}:${entityId}:`;
625
+ for (const k of this.activeIngestJobs) {
626
+ if (k.startsWith(entityKeyPrefix)) return true;
627
+ }
628
+ return false;
629
+ }
492
630
  _validatePruneDuration(value, name) {
493
631
  if (value !== null && value !== void 0 && (typeof value !== "number" || !isFinite(value) || value < 0)) {
494
632
  throw new Error(`Invalid ${name}: must be a non-negative finite number or null`);
@@ -511,6 +649,8 @@ var WikiMemory = class {
511
649
  blockingOperation = "librarian";
512
650
  } else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
513
651
  blockingOperation = "heal";
652
+ } else if (this._isReembedActive(entityId)) {
653
+ blockingOperation = "reembed";
514
654
  } else if (isIngestRunning) {
515
655
  blockingOperation = "ingest";
516
656
  }
@@ -556,91 +696,145 @@ var WikiMemory = class {
556
696
  await this.db.execAsync(`PRAGMA wal_checkpoint(TRUNCATE)`);
557
697
  await this.db.execAsync(`VACUUM`);
558
698
  }
699
+ await this.rebuildMiniSearchIndex(entityId);
559
700
  return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
560
701
  } finally {
561
702
  this.activeMaintenanceJobs.delete(pruneKey);
562
703
  }
563
704
  }
564
- formatSearchQuery(query) {
565
- const normalizeTokens = (value) => value.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3);
566
- const baseTokens = normalizeTokens(query);
567
- if (baseTokens.length === 0) return "";
568
- const synonymMap = this.options.config?.synonymMap;
569
- const expanded = [];
570
- const seen = /* @__PURE__ */ new Set();
571
- const pushNormalized = (value) => {
572
- for (const token of normalizeTokens(value)) {
573
- if (expanded.length >= 12) return false;
574
- if (seen.has(token)) continue;
575
- seen.add(token);
576
- expanded.push(token);
577
- }
578
- return true;
579
- };
580
- for (const t of baseTokens) {
581
- if (!pushNormalized(t)) break;
582
- if (synonymMap) {
583
- const synonyms = synonymMap[t];
584
- if (Array.isArray(synonyms)) {
585
- for (const s of synonyms) {
586
- if (typeof s === "string") {
587
- if (!pushNormalized(s)) break;
705
+ async read(entityId, query) {
706
+ const maxResults = this.options.config?.maxResults ?? this.options.config?.maxFtsResults ?? 10;
707
+ const embedFn = this.options.llmProvider.embed;
708
+ const trimmedQuery = query.trim();
709
+ let facts = [];
710
+ if (trimmedQuery) {
711
+ let usedEmbed = false;
712
+ if (embedFn) {
713
+ try {
714
+ const queryVec = await embedFn(trimmedQuery);
715
+ if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
716
+ throw new Error(
717
+ "embed() returned an empty or non-finite vector. Falling back to keyword search."
718
+ );
719
+ }
720
+ const storedDimRow = await this.db.getFirstAsync(
721
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
722
+ );
723
+ if (storedDimRow) {
724
+ const storedDim = parseInt(storedDimRow.value, 10);
725
+ if (storedDim !== queryVec.length) {
726
+ throw new Error(
727
+ `Embedding dimension mismatch: stored ${storedDim}, query has ${queryVec.length}. Call runReembed() to rebuild embeddings with the new model.`
728
+ );
729
+ }
730
+ }
731
+ const scoreRows = await this.db.getAllAsync(
732
+ `SELECT id, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
733
+ [entityId]
734
+ );
735
+ const scored = scoreRows.map((row) => {
736
+ let score = 0;
737
+ if (row.embedding) {
738
+ try {
739
+ const parsed = JSON.parse(row.embedding);
740
+ if (Array.isArray(parsed) && parsed.length === queryVec.length && parsed.every((v) => typeof v === "number" && isFinite(v))) {
741
+ score = cosineSimilarity(queryVec, parsed);
742
+ }
743
+ } catch {
744
+ }
745
+ }
746
+ return { row, score };
747
+ });
748
+ scored.sort((a, b) => {
749
+ const scoreDiff = b.score - a.score;
750
+ if (scoreDiff !== 0) {
751
+ return scoreDiff;
752
+ }
753
+ const updatedAtDiff = (b.row.updated_at ?? 0) - (a.row.updated_at ?? 0);
754
+ if (updatedAtDiff !== 0) {
755
+ return updatedAtDiff;
756
+ }
757
+ const accessCountDiff = (b.row.access_count ?? 0) - (a.row.access_count ?? 0);
758
+ if (accessCountDiff !== 0) {
759
+ return accessCountDiff;
588
760
  }
761
+ return a.row.id.localeCompare(b.row.id);
762
+ });
763
+ const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
764
+ if (topIds.length > 0) {
765
+ const placeholders = topIds.map(() => "?").join(",");
766
+ const fullRows = await this.db.getAllAsync(
767
+ `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
768
+ topIds
769
+ );
770
+ const byId = new Map(fullRows.map((r) => [r.id, r]));
771
+ facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
589
772
  }
773
+ usedEmbed = true;
774
+ } catch (err) {
775
+ const error = err instanceof Error ? err : new Error(String(err));
776
+ this.options.onRetrievalFallback?.(error);
590
777
  }
591
778
  }
592
- }
593
- return expanded.map((t) => `"${t}"*`).join(" OR ");
594
- }
595
- async read(entityId, query) {
596
- const ftsQuery = this.formatSearchQuery(query);
597
- const maxResults = this.options.config?.maxFtsResults || 10;
598
- let factsPromise;
599
- if (ftsQuery) {
600
- factsPromise = this.db.getAllAsync(`
601
- SELECT e.* FROM ${this.prefix}entries e
602
- JOIN ${this.prefix}entries_fts fts ON e.rowid = fts.rowid
603
- WHERE fts.${this.prefix}entries_fts MATCH ?
604
- AND e.entity_id = ?
605
- AND e.deleted_at IS NULL
606
- ORDER BY e.confidence DESC, e.access_count DESC, e.updated_at DESC
607
- LIMIT ?
608
- `, [ftsQuery, entityId, maxResults]);
779
+ if (!usedEmbed) {
780
+ const results = this.miniSearch.search(trimmedQuery, {
781
+ filter: (r) => r.entity_id === entityId,
782
+ combineWith: "OR"
783
+ });
784
+ const topIds = results.slice(0, maxResults).map((r) => r.id);
785
+ if (topIds.length > 0) {
786
+ const placeholders = topIds.map(() => "?").join(",");
787
+ const rows = await this.db.getAllAsync(
788
+ `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
789
+ topIds
790
+ );
791
+ const byId = new Map(rows.map((r) => [r.id, r]));
792
+ facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
793
+ }
794
+ }
795
+ if (facts.length > 0) {
796
+ const ids = facts.map((f) => f.id);
797
+ const placeholders = ids.map(() => "?").join(",");
798
+ const now = Date.now();
799
+ await this.db.runAsync(
800
+ `UPDATE ${this.prefix}entries
801
+ SET access_count = access_count + 1, last_accessed_at = ?
802
+ WHERE id IN (${placeholders})`,
803
+ [now, ...ids]
804
+ );
805
+ }
609
806
  } else {
610
- factsPromise = this.db.getAllAsync(`
611
- SELECT * FROM ${this.prefix}entries
612
- WHERE entity_id = ? AND deleted_at IS NULL
613
- ORDER BY updated_at DESC
614
- LIMIT ?
615
- `, [entityId, maxResults]);
616
- }
617
- const tasksPromise = this.db.getAllAsync(`
618
- SELECT * FROM ${this.prefix}tasks
619
- WHERE entity_id = ? AND status IN ('pending', 'in_progress') AND deleted_at IS NULL
620
- ORDER BY priority DESC, created_at ASC
621
- `, [entityId]);
622
- const eventsPromise = this.db.getAllAsync(`
623
- SELECT * FROM ${this.prefix}events
624
- WHERE entity_id = ?
625
- ORDER BY created_at DESC
626
- LIMIT 10
627
- `, [entityId]);
628
- const [factsRaw, tasks, events] = await Promise.all([factsPromise, tasksPromise, eventsPromise]);
629
- if (ftsQuery && factsRaw.length > 0) {
630
- const ids = factsRaw.map((f) => f.id);
631
- const placeholders = ids.map(() => "?").join(",");
632
- const now = Date.now();
633
- await this.db.runAsync(`
634
- UPDATE ${this.prefix}entries
635
- SET access_count = access_count + 1, last_accessed_at = ?
636
- WHERE id IN (${placeholders})
637
- `, [now, ...ids]);
638
- }
639
- const facts = factsRaw.map((f) => ({
640
- ...f,
641
- tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags
642
- }));
643
- return { facts, tasks, events: events.reverse() };
807
+ facts = await this.db.getAllAsync(
808
+ `SELECT * FROM ${this.prefix}entries
809
+ WHERE entity_id = ? AND deleted_at IS NULL
810
+ ORDER BY updated_at DESC
811
+ LIMIT ?`,
812
+ [entityId, maxResults]
813
+ );
814
+ }
815
+ const [tasks, events] = await Promise.all([
816
+ this.db.getAllAsync(
817
+ `SELECT * FROM ${this.prefix}tasks
818
+ WHERE entity_id = ? AND status IN ('pending', 'in_progress') AND deleted_at IS NULL
819
+ ORDER BY priority DESC, created_at ASC`,
820
+ [entityId]
821
+ ),
822
+ this.db.getAllAsync(
823
+ `SELECT * FROM ${this.prefix}events
824
+ WHERE entity_id = ?
825
+ ORDER BY created_at DESC
826
+ LIMIT 10`,
827
+ [entityId]
828
+ )
829
+ ]);
830
+ const parsedFacts = facts.map((f) => {
831
+ const { embedding: _embedding, ...rest } = f;
832
+ return {
833
+ ...rest,
834
+ tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
835
+ };
836
+ });
837
+ return { facts: parsedFacts, tasks, events: events.reverse() };
644
838
  }
645
839
  async getMemoryBundle(entityId) {
646
840
  return this._getFullBundle(entityId, { maxEvents: 10 });
@@ -713,10 +907,13 @@ var WikiMemory = class {
713
907
  ORDER BY updated_at DESC
714
908
  LIMIT 100
715
909
  `, [entityId]);
716
- const currentFacts = currentFactsRows.map((f) => ({
717
- ...f,
718
- tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags
719
- }));
910
+ const currentFacts = currentFactsRows.map((f) => {
911
+ const { embedding: _embedding, ...rest } = f;
912
+ return {
913
+ ...rest,
914
+ tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
915
+ };
916
+ });
720
917
  const userPrompt = `Events:
721
918
  ${JSON.stringify(events.reverse(), null, 2)}
722
919
 
@@ -732,6 +929,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
732
929
  const validFacts = facts.map(validateFact).filter((f) => f !== null);
733
930
  const validTasks = tasks.map(validateTask).filter((t) => t !== null);
734
931
  const now = Date.now();
932
+ const insertedFacts = [];
735
933
  await this.db.withTransactionAsync(async () => {
736
934
  for (const fact of validFacts) {
737
935
  const newTokens = titleTokens(fact.title);
@@ -754,6 +952,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
754
952
  INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
755
953
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
756
954
  `, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
955
+ insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
757
956
  }
758
957
  for (const task of validTasks) {
759
958
  const id = generateId("task_");
@@ -763,6 +962,10 @@ ${JSON.stringify(currentFacts, null, 2)}`;
763
962
  `, [id, entityId, task.description, "pending", task.priority, now, now]);
764
963
  }
765
964
  });
965
+ for (const fact of insertedFacts) {
966
+ await this.embedFact(fact);
967
+ }
968
+ await this.rebuildMiniSearchIndex(entityId);
766
969
  }
767
970
  async _doRunHeal(entityId) {
768
971
  const now = Date.now();
@@ -799,7 +1002,10 @@ ${JSON.stringify(currentFacts, null, 2)}`;
799
1002
  const healCandidates = allFactsRows.filter((f) => f.source_type !== "user_document");
800
1003
  const documentAnchors = allFactsRows.filter((f) => f.source_type === "user_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
801
1004
  const userPrompt = `Heal Candidates:
802
- ${JSON.stringify(healCandidates.map((f) => ({ ...f, tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags })), null, 2)}
1005
+ ${JSON.stringify(healCandidates.map((f) => {
1006
+ const { embedding: _embedding, ...rest } = f;
1007
+ return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
1008
+ }), null, 2)}
803
1009
 
804
1010
  Document Anchors (DO NOT MODIFY OR DELETE):
805
1011
  ${JSON.stringify(documentAnchors, null, 2)}
@@ -823,6 +1029,7 @@ The following document anchors are provided for contradiction detection only. Do
823
1029
  const safeDowngraded = downgraded.filter((id) => mutableIds.has(id));
824
1030
  const safeDeleted = deleted.filter((id) => mutableIds.has(id));
825
1031
  const validNewFacts = newFacts.map(validateFact).filter((f) => f !== null);
1032
+ const insertedFacts = [];
826
1033
  await this.db.withTransactionAsync(async () => {
827
1034
  for (const id of safeDowngraded) {
828
1035
  await this.db.runAsync(`UPDATE ${this.prefix}entries SET confidence = 'tentative', updated_at = ? WHERE id = ? AND entity_id = ?`, [now, id, entityId]);
@@ -836,8 +1043,13 @@ The following document anchors are provided for contradiction detection only. Do
836
1043
  INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
837
1044
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
838
1045
  `, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
1046
+ insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
839
1047
  }
840
1048
  });
1049
+ for (const fact of insertedFacts) {
1050
+ await this.embedFact(fact);
1051
+ }
1052
+ await this.rebuildMiniSearchIndex(entityId);
841
1053
  }
842
1054
  async runLibrarian(entityId) {
843
1055
  const jobKey = this._librarianKey(entityId);
@@ -847,6 +1059,9 @@ The following document anchors are provided for contradiction detection only. Do
847
1059
  if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
848
1060
  throw new WikiBusyError("prune", entityId);
849
1061
  }
1062
+ if (this._isReembedActive(entityId)) {
1063
+ throw new WikiBusyError("reembed", entityId);
1064
+ }
850
1065
  this.activeMaintenanceJobs.add(jobKey);
851
1066
  try {
852
1067
  await this._doRunLibrarian(entityId);
@@ -862,6 +1077,9 @@ The following document anchors are provided for contradiction detection only. Do
862
1077
  if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
863
1078
  throw new WikiBusyError("prune", entityId);
864
1079
  }
1080
+ if (this._isReembedActive(entityId)) {
1081
+ throw new WikiBusyError("reembed", entityId);
1082
+ }
865
1083
  this.activeMaintenanceJobs.add(jobKey);
866
1084
  try {
867
1085
  await this._doRunHeal(entityId);
@@ -869,6 +1087,69 @@ The following document anchors are provided for contradiction detection only. Do
869
1087
  this.activeMaintenanceJobs.delete(jobKey);
870
1088
  }
871
1089
  }
1090
+ async runReembed(entityId) {
1091
+ const embedFn = this.options.llmProvider.embed;
1092
+ if (!embedFn) return { embedded: 0, skipped: 0 };
1093
+ const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
1094
+ if (this.activeMaintenanceJobs.has(reembedKey)) {
1095
+ throw new WikiBusyError("reembed", entityId ?? "*");
1096
+ }
1097
+ if (entityId) {
1098
+ if (this.activeMaintenanceJobs.has(this._globalReembedKey())) {
1099
+ throw new WikiBusyError("reembed", entityId);
1100
+ }
1101
+ if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
1102
+ throw new WikiBusyError("prune", entityId);
1103
+ }
1104
+ if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
1105
+ throw new WikiBusyError("librarian", entityId);
1106
+ }
1107
+ if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
1108
+ throw new WikiBusyError("heal", entityId);
1109
+ }
1110
+ if (this._isIngestActiveFor(entityId)) {
1111
+ throw new WikiBusyError("ingest", entityId);
1112
+ }
1113
+ } else {
1114
+ if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
1115
+ throw new WikiBusyError("reembed", "*");
1116
+ }
1117
+ if (this._isAnyMaintenanceActiveWithSuffix(":prune")) {
1118
+ throw new WikiBusyError("prune", "*");
1119
+ }
1120
+ if (this._isAnyMaintenanceActiveWithSuffix(":librarian")) {
1121
+ throw new WikiBusyError("librarian", "*");
1122
+ }
1123
+ if (this._isAnyMaintenanceActiveWithSuffix(":heal")) {
1124
+ throw new WikiBusyError("heal", "*");
1125
+ }
1126
+ if (this.activeIngestJobs.size > 0) {
1127
+ throw new WikiBusyError("ingest", "*");
1128
+ }
1129
+ }
1130
+ this.activeMaintenanceJobs.add(reembedKey);
1131
+ try {
1132
+ const where = entityId ? `entity_id = ? AND deleted_at IS NULL` : `deleted_at IS NULL`;
1133
+ const params = entityId ? [entityId] : [];
1134
+ const rows = await this.db.getAllAsync(
1135
+ `SELECT * FROM ${this.prefix}entries WHERE ${where}`,
1136
+ params
1137
+ );
1138
+ let embedded = 0;
1139
+ let skipped = 0;
1140
+ for (const row of rows) {
1141
+ const success = await this.embedFact(row);
1142
+ if (success) embedded++;
1143
+ else skipped++;
1144
+ }
1145
+ if (embedded > 0) {
1146
+ await this._reconcileEmbeddingDimension();
1147
+ }
1148
+ return { embedded, skipped };
1149
+ } finally {
1150
+ this.activeMaintenanceJobs.delete(reembedKey);
1151
+ }
1152
+ }
872
1153
  getEntityStatus(entityId) {
873
1154
  const ingestPrefix = `${this.prefix}:${entityId}:`;
874
1155
  let ingesting = false;
@@ -899,10 +1180,13 @@ The following document anchors are provided for contradiction detection only. Do
899
1180
  ),
900
1181
  this.db.getAllAsync(eventsQuery, eventsParams)
901
1182
  ]);
902
- const facts = factsRaw.map((f) => ({
903
- ...f,
904
- tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags
905
- }));
1183
+ const facts = factsRaw.map((f) => {
1184
+ const { embedding: _embedding, ...rest } = f;
1185
+ return {
1186
+ ...rest,
1187
+ tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
1188
+ };
1189
+ });
906
1190
  const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
907
1191
  return { facts, tasks, events };
908
1192
  }
@@ -1041,7 +1325,18 @@ The following document anchors are provided for contradiction detection only. Do
1041
1325
  );
1042
1326
  }
1043
1327
  });
1328
+ for (const fact of bundle.facts) {
1329
+ if (!fact.deleted_at) {
1330
+ await this.embedFact({
1331
+ id: fact.id,
1332
+ title: fact.title,
1333
+ body: fact.body,
1334
+ tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
1335
+ });
1336
+ }
1337
+ }
1044
1338
  }
1339
+ await this.rebuildMiniSearchIndex();
1045
1340
  }
1046
1341
  async forget(entityId, params) {
1047
1342
  const now = Date.now();
@@ -1090,6 +1385,7 @@ The following document anchors are provided for contradiction detection only. Do
1090
1385
  if (taskResult) deletedTasks += taskResult.changes;
1091
1386
  if (refResult) deletedEntries += refResult.changes;
1092
1387
  }
1388
+ await this.rebuildMiniSearchIndex(entityId);
1093
1389
  return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
1094
1390
  }
1095
1391
  async ingestDocument(entityId, params) {
@@ -1115,6 +1411,9 @@ The following document anchors are provided for contradiction detection only. Do
1115
1411
  if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
1116
1412
  throw new WikiBusyError("prune", entityId);
1117
1413
  }
1414
+ if (this._isReembedActive(entityId)) {
1415
+ throw new WikiBusyError("reembed", entityId);
1416
+ }
1118
1417
  this.activeIngestJobs.add(jobKey);
1119
1418
  try {
1120
1419
  const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
@@ -1146,6 +1445,7 @@ ${chunk}`;
1146
1445
  }
1147
1446
  }
1148
1447
  const now = Date.now();
1448
+ const insertedFacts = [];
1149
1449
  await this.db.withTransactionAsync(async () => {
1150
1450
  await this.db.runAsync(
1151
1451
  `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
@@ -1158,8 +1458,13 @@ ${chunk}`;
1158
1458
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1159
1459
  [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]
1160
1460
  );
1461
+ insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
1161
1462
  }
1162
1463
  });
1464
+ for (const fact of insertedFacts) {
1465
+ await this.embedFact(fact);
1466
+ }
1467
+ await this.rebuildMiniSearchIndex(entityId);
1163
1468
  return { truncated, chunks: chunks.length };
1164
1469
  } finally {
1165
1470
  this.activeIngestJobs.delete(jobKey);