@equationalapplications/core-llm-wiki 2.4.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,5 +1,11 @@
1
1
  'use strict';
2
2
 
3
+ var MiniSearch = require('minisearch');
4
+
5
+ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
6
+
7
+ var MiniSearch__default = /*#__PURE__*/_interopDefault(MiniSearch);
8
+
3
9
  // src/db/schema.ts
4
10
  async function setupDatabase(db, prefix) {
5
11
  await db.execAsync(`
@@ -17,7 +23,8 @@ async function setupDatabase(db, prefix) {
17
23
  updated_at INTEGER NOT NULL,
18
24
  last_accessed_at INTEGER,
19
25
  access_count INTEGER NOT NULL DEFAULT 0,
20
- deleted_at INTEGER
26
+ deleted_at INTEGER,
27
+ embedding TEXT
21
28
  );
22
29
 
23
30
  CREATE INDEX IF NOT EXISTS ${prefix}entries_entity_idx ON ${prefix}entries(entity_id);
@@ -25,34 +32,6 @@ async function setupDatabase(db, prefix) {
25
32
  CREATE INDEX IF NOT EXISTS ${prefix}entries_source_hash_idx ON ${prefix}entries(entity_id, source_hash) WHERE source_hash IS NOT NULL;
26
33
  CREATE INDEX IF NOT EXISTS ${prefix}entries_updated_idx ON ${prefix}entries(updated_at DESC);
27
34
 
28
- -- FTS5 Virtual Table for full-text search
29
- CREATE VIRTUAL TABLE IF NOT EXISTS ${prefix}entries_fts USING fts5(
30
- title,
31
- body,
32
- tags,
33
- content='${prefix}entries',
34
- content_rowid='rowid',
35
- tokenize='porter unicode61'
36
- );
37
-
38
- -- Triggers to keep FTS5 in sync with entries
39
- CREATE TRIGGER IF NOT EXISTS ${prefix}entries_ai AFTER INSERT ON ${prefix}entries BEGIN
40
- INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
41
- VALUES (new.rowid, new.title, new.body, new.tags);
42
- END;
43
-
44
- CREATE TRIGGER IF NOT EXISTS ${prefix}entries_ad AFTER DELETE ON ${prefix}entries BEGIN
45
- INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
46
- VALUES ('delete', old.rowid, old.title, old.body, old.tags);
47
- END;
48
-
49
- CREATE TRIGGER IF NOT EXISTS ${prefix}entries_au AFTER UPDATE ON ${prefix}entries BEGIN
50
- INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
51
- VALUES ('delete', old.rowid, old.title, old.body, old.tags);
52
- INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
53
- VALUES (new.rowid, new.title, new.body, new.tags);
54
- END;
55
-
56
35
  CREATE TABLE IF NOT EXISTS ${prefix}tasks (
57
36
  id TEXT PRIMARY KEY,
58
37
  entity_id TEXT NOT NULL,
@@ -95,7 +74,13 @@ async function setupDatabase(db, prefix) {
95
74
  var MIGRATIONS = [
96
75
  {
97
76
  version: 1,
98
- description: "Rebuild FTS5 with porter unicode61 tokenizer",
77
+ description: "Rebuild FTS5 with porter unicode61 tokenizer (superseded by v2)",
78
+ run: async (_db, _prefix) => {
79
+ }
80
+ },
81
+ {
82
+ version: 2,
83
+ description: "Remove FTS5; add embedding column for semantic retrieval",
99
84
  run: async (db, prefix) => {
100
85
  await db.withTransactionAsync(async () => {
101
86
  await db.execAsync(`
@@ -103,32 +88,14 @@ var MIGRATIONS = [
103
88
  DROP TRIGGER IF EXISTS ${prefix}entries_ad;
104
89
  DROP TRIGGER IF EXISTS ${prefix}entries_au;
105
90
  DROP TABLE IF EXISTS ${prefix}entries_fts;
106
- CREATE VIRTUAL TABLE ${prefix}entries_fts USING fts5(
107
- title,
108
- body,
109
- tags,
110
- content='${prefix}entries',
111
- content_rowid='rowid',
112
- tokenize='porter unicode61'
113
- );
114
- INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
115
- SELECT rowid, title, body, tags FROM ${prefix}entries;
116
- CREATE TRIGGER ${prefix}entries_ai AFTER INSERT ON ${prefix}entries BEGIN
117
- INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
118
- VALUES (new.rowid, new.title, new.body, new.tags);
119
- END;
120
- CREATE TRIGGER ${prefix}entries_ad AFTER DELETE ON ${prefix}entries BEGIN
121
- INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
122
- VALUES ('delete', old.rowid, old.title, old.body, old.tags);
123
- END;
124
- CREATE TRIGGER ${prefix}entries_au AFTER UPDATE ON ${prefix}entries BEGIN
125
- INSERT INTO ${prefix}entries_fts(${prefix}entries_fts, rowid, title, body, tags)
126
- VALUES ('delete', old.rowid, old.title, old.body, old.tags);
127
- INSERT INTO ${prefix}entries_fts(rowid, title, body, tags)
128
- VALUES (new.rowid, new.title, new.body, new.tags);
129
- END;
130
91
  `);
131
92
  });
93
+ const cols = await db.getAllAsync(
94
+ `PRAGMA table_info(${prefix}entries)`
95
+ );
96
+ if (!cols.some((c) => c.name === "embedding")) {
97
+ await db.execAsync(`ALTER TABLE ${prefix}entries ADD COLUMN embedding TEXT`);
98
+ }
132
99
  }
133
100
  }
134
101
  ];
@@ -174,6 +141,19 @@ Return ONLY a valid JSON object matching this schema:
174
141
  }
175
142
  Extract verbatim factual content. Do not return markdown, just raw JSON.`;
176
143
 
144
+ // src/utils/cosine.ts
145
+ function cosineSimilarity(a, b) {
146
+ let dot = 0, normA = 0, normB = 0;
147
+ const len = Math.min(a.length, b.length);
148
+ for (let i = 0; i < len; i++) {
149
+ dot += a[i] * b[i];
150
+ normA += a[i] * a[i];
151
+ normB += b[i] * b[i];
152
+ }
153
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
154
+ return denom === 0 ? 0 : dot / denom;
155
+ }
156
+
177
157
  // src/WikiMemory.ts
178
158
  function parseJsonResponse(text) {
179
159
  const firstBrace = text.indexOf("{");
@@ -382,10 +362,146 @@ var WikiMemory = class {
382
362
  constructor(db, options) {
383
363
  this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
384
364
  this.activeIngestJobs = /* @__PURE__ */ new Set();
365
+ this.miniSearch = new MiniSearch__default.default({
366
+ fields: ["title", "body", "tags"],
367
+ storeFields: ["entity_id"],
368
+ searchOptions: {
369
+ boost: { title: 2 },
370
+ fuzzy: 0.2,
371
+ prefix: true
372
+ }
373
+ });
374
+ this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
385
375
  this.db = db;
386
376
  this.options = options;
387
377
  this.prefix = options.config?.tablePrefix || "llm_wiki_";
388
378
  }
379
+ normalizeMiniSearchRow(row) {
380
+ return {
381
+ id: row.id,
382
+ entity_id: row.entity_id,
383
+ title: row.title,
384
+ body: row.body,
385
+ tags: (() => {
386
+ try {
387
+ const parsed = JSON.parse(row.tags);
388
+ return Array.isArray(parsed) ? parsed.join(" ") : row.tags;
389
+ } catch {
390
+ return row.tags;
391
+ }
392
+ })()
393
+ };
394
+ }
395
+ async rebuildMiniSearchIndex(entityId) {
396
+ if (entityId) {
397
+ const rows2 = await this.db.getAllAsync(
398
+ `SELECT id, entity_id, title, body, tags FROM ${this.prefix}entries WHERE deleted_at IS NULL AND entity_id = ?`,
399
+ [entityId]
400
+ );
401
+ const previousIds = this.miniSearchEntryIdsByEntity.get(entityId);
402
+ if (previousIds) {
403
+ for (const id of previousIds) {
404
+ this.miniSearch.discard(id);
405
+ }
406
+ }
407
+ const documents2 = rows2.map((row) => this.normalizeMiniSearchRow(row));
408
+ if (documents2.length > 0) {
409
+ this.miniSearch.addAll(documents2);
410
+ }
411
+ this.miniSearchEntryIdsByEntity.set(entityId, new Set(documents2.map((document) => document.id)));
412
+ return;
413
+ }
414
+ const rows = await this.db.getAllAsync(`SELECT id, entity_id, title, body, tags FROM ${this.prefix}entries WHERE deleted_at IS NULL`);
415
+ this.miniSearch.removeAll();
416
+ this.miniSearchEntryIdsByEntity.clear();
417
+ const documents = rows.map((row) => this.normalizeMiniSearchRow(row));
418
+ if (documents.length > 0) {
419
+ this.miniSearch.addAll(documents);
420
+ }
421
+ for (const document of documents) {
422
+ const ids = this.miniSearchEntryIdsByEntity.get(document.entity_id) ?? /* @__PURE__ */ new Set();
423
+ ids.add(document.id);
424
+ this.miniSearchEntryIdsByEntity.set(document.entity_id, ids);
425
+ }
426
+ }
427
+ async storeEmbeddingDimension(dim) {
428
+ const existing = await this.db.getFirstAsync(
429
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
430
+ );
431
+ if (existing) {
432
+ const storedDim = parseInt(existing.value, 10);
433
+ if (storedDim !== dim) {
434
+ console.warn(
435
+ `[WikiMemory] Embedding dimension mismatch: stored ${storedDim}, got ${dim}. Call runReembed() to rebuild embeddings with the new model.`
436
+ );
437
+ await this.db.runAsync(
438
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
439
+ [String(dim)]
440
+ );
441
+ } else {
442
+ await this.db.runAsync(
443
+ `DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
444
+ );
445
+ }
446
+ } else {
447
+ await this.db.runAsync(
448
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
449
+ [String(dim)]
450
+ );
451
+ }
452
+ }
453
+ /**
454
+ * After a successful runReembed(), promote the pending `embedding_dimension_mismatch`
455
+ * value to the canonical `embedding_dimension` key and clear the mismatch flag.
456
+ * This ensures future read() calls use embedding-based retrieval rather than staying
457
+ * stuck on the MiniSearch fallback.
458
+ */
459
+ async _reconcileEmbeddingDimension() {
460
+ const mismatch = await this.db.getFirstAsync(
461
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
462
+ );
463
+ if (mismatch) {
464
+ await this.db.runAsync(
465
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
466
+ [mismatch.value]
467
+ );
468
+ await this.db.runAsync(
469
+ `DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
470
+ );
471
+ }
472
+ }
473
+ async embedFact(fact) {
474
+ const embedFn = this.options.llmProvider.embed;
475
+ if (!embedFn) return false;
476
+ let tagsStr;
477
+ if (Array.isArray(fact.tags)) {
478
+ tagsStr = fact.tags.join(" ");
479
+ } else {
480
+ try {
481
+ const parsed = JSON.parse(fact.tags);
482
+ tagsStr = Array.isArray(parsed) ? parsed.join(" ") : fact.tags;
483
+ } catch {
484
+ tagsStr = fact.tags;
485
+ }
486
+ }
487
+ const text = `${fact.title} ${fact.body} ${tagsStr}`.trim();
488
+ try {
489
+ const vector = await embedFn(text);
490
+ if (vector.length === 0 || !vector.every((v) => typeof v === "number" && isFinite(v))) {
491
+ console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
492
+ return false;
493
+ }
494
+ await this.storeEmbeddingDimension(vector.length);
495
+ await this.db.runAsync(
496
+ `UPDATE ${this.prefix}entries SET embedding = ? WHERE id = ?`,
497
+ [JSON.stringify(vector), fact.id]
498
+ );
499
+ return true;
500
+ } catch (err) {
501
+ console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
502
+ return false;
503
+ }
504
+ }
389
505
  _librarianKey(entityId) {
390
506
  return `${this.prefix}:${entityId}:librarian`;
391
507
  }
@@ -467,6 +583,7 @@ var WikiMemory = class {
467
583
  }
468
584
  }
469
585
  });
586
+ await this.rebuildMiniSearchIndex();
470
587
  }
471
588
  async hasChanged(entityId, sourceRef, sourceHash) {
472
589
  const normalizedRef = normalizeSourceRef(sourceRef);
@@ -491,6 +608,31 @@ var WikiMemory = class {
491
608
  _pruneKey(entityId) {
492
609
  return `${this.prefix}:${entityId}:prune`;
493
610
  }
611
+ _reembedKey(entityId) {
612
+ return `${this.prefix}:${entityId}:reembed`;
613
+ }
614
+ _globalReembedKey() {
615
+ return `${this.prefix}:reembed`;
616
+ }
617
+ _isReembedActive(entityId) {
618
+ return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
619
+ }
620
+ /** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
621
+ _isAnyMaintenanceActiveWithSuffix(suffix) {
622
+ const entityKeyPrefix = `${this.prefix}:`;
623
+ for (const k of this.activeMaintenanceJobs) {
624
+ if (k.startsWith(entityKeyPrefix) && k.endsWith(suffix)) return true;
625
+ }
626
+ return false;
627
+ }
628
+ /** Returns true if any ingest job is active for the given entity. */
629
+ _isIngestActiveFor(entityId) {
630
+ const entityKeyPrefix = `${this.prefix}:${entityId}:`;
631
+ for (const k of this.activeIngestJobs) {
632
+ if (k.startsWith(entityKeyPrefix)) return true;
633
+ }
634
+ return false;
635
+ }
494
636
  _validatePruneDuration(value, name) {
495
637
  if (value !== null && value !== void 0 && (typeof value !== "number" || !isFinite(value) || value < 0)) {
496
638
  throw new Error(`Invalid ${name}: must be a non-negative finite number or null`);
@@ -513,6 +655,8 @@ var WikiMemory = class {
513
655
  blockingOperation = "librarian";
514
656
  } else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
515
657
  blockingOperation = "heal";
658
+ } else if (this._isReembedActive(entityId)) {
659
+ blockingOperation = "reembed";
516
660
  } else if (isIngestRunning) {
517
661
  blockingOperation = "ingest";
518
662
  }
@@ -558,91 +702,145 @@ var WikiMemory = class {
558
702
  await this.db.execAsync(`PRAGMA wal_checkpoint(TRUNCATE)`);
559
703
  await this.db.execAsync(`VACUUM`);
560
704
  }
705
+ await this.rebuildMiniSearchIndex(entityId);
561
706
  return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
562
707
  } finally {
563
708
  this.activeMaintenanceJobs.delete(pruneKey);
564
709
  }
565
710
  }
566
- formatSearchQuery(query) {
567
- const normalizeTokens = (value) => value.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter((t) => t.length >= 3);
568
- const baseTokens = normalizeTokens(query);
569
- if (baseTokens.length === 0) return "";
570
- const synonymMap = this.options.config?.synonymMap;
571
- const expanded = [];
572
- const seen = /* @__PURE__ */ new Set();
573
- const pushNormalized = (value) => {
574
- for (const token of normalizeTokens(value)) {
575
- if (expanded.length >= 12) return false;
576
- if (seen.has(token)) continue;
577
- seen.add(token);
578
- expanded.push(token);
579
- }
580
- return true;
581
- };
582
- for (const t of baseTokens) {
583
- if (!pushNormalized(t)) break;
584
- if (synonymMap) {
585
- const synonyms = synonymMap[t];
586
- if (Array.isArray(synonyms)) {
587
- for (const s of synonyms) {
588
- if (typeof s === "string") {
589
- if (!pushNormalized(s)) break;
711
+ async read(entityId, query) {
712
+ const maxResults = this.options.config?.maxResults ?? this.options.config?.maxFtsResults ?? 10;
713
+ const embedFn = this.options.llmProvider.embed;
714
+ const trimmedQuery = query.trim();
715
+ let facts = [];
716
+ if (trimmedQuery) {
717
+ let usedEmbed = false;
718
+ if (embedFn) {
719
+ try {
720
+ const queryVec = await embedFn(trimmedQuery);
721
+ if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
722
+ throw new Error(
723
+ "embed() returned an empty or non-finite vector. Falling back to keyword search."
724
+ );
725
+ }
726
+ const storedDimRow = await this.db.getFirstAsync(
727
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
728
+ );
729
+ if (storedDimRow) {
730
+ const storedDim = parseInt(storedDimRow.value, 10);
731
+ if (storedDim !== queryVec.length) {
732
+ throw new Error(
733
+ `Embedding dimension mismatch: stored ${storedDim}, query has ${queryVec.length}. Call runReembed() to rebuild embeddings with the new model.`
734
+ );
590
735
  }
591
736
  }
737
+ const scoreRows = await this.db.getAllAsync(
738
+ `SELECT id, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
739
+ [entityId]
740
+ );
741
+ const scored = scoreRows.map((row) => {
742
+ let score = 0;
743
+ if (row.embedding) {
744
+ try {
745
+ const parsed = JSON.parse(row.embedding);
746
+ if (Array.isArray(parsed) && parsed.length === queryVec.length && parsed.every((v) => typeof v === "number" && isFinite(v))) {
747
+ score = cosineSimilarity(queryVec, parsed);
748
+ }
749
+ } catch {
750
+ }
751
+ }
752
+ return { row, score };
753
+ });
754
+ scored.sort((a, b) => {
755
+ const scoreDiff = b.score - a.score;
756
+ if (scoreDiff !== 0) {
757
+ return scoreDiff;
758
+ }
759
+ const updatedAtDiff = (b.row.updated_at ?? 0) - (a.row.updated_at ?? 0);
760
+ if (updatedAtDiff !== 0) {
761
+ return updatedAtDiff;
762
+ }
763
+ const accessCountDiff = (b.row.access_count ?? 0) - (a.row.access_count ?? 0);
764
+ if (accessCountDiff !== 0) {
765
+ return accessCountDiff;
766
+ }
767
+ return a.row.id.localeCompare(b.row.id);
768
+ });
769
+ const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
770
+ if (topIds.length > 0) {
771
+ const placeholders = topIds.map(() => "?").join(",");
772
+ const fullRows = await this.db.getAllAsync(
773
+ `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
774
+ topIds
775
+ );
776
+ const byId = new Map(fullRows.map((r) => [r.id, r]));
777
+ facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
778
+ }
779
+ usedEmbed = true;
780
+ } catch (err) {
781
+ const error = err instanceof Error ? err : new Error(String(err));
782
+ this.options.onRetrievalFallback?.(error);
592
783
  }
593
784
  }
594
- }
595
- return expanded.map((t) => `"${t}"*`).join(" OR ");
596
- }
597
- async read(entityId, query) {
598
- const ftsQuery = this.formatSearchQuery(query);
599
- const maxResults = this.options.config?.maxFtsResults || 10;
600
- let factsPromise;
601
- if (ftsQuery) {
602
- factsPromise = this.db.getAllAsync(`
603
- SELECT e.* FROM ${this.prefix}entries e
604
- JOIN ${this.prefix}entries_fts fts ON e.rowid = fts.rowid
605
- WHERE fts.${this.prefix}entries_fts MATCH ?
606
- AND e.entity_id = ?
607
- AND e.deleted_at IS NULL
608
- ORDER BY e.confidence DESC, e.access_count DESC, e.updated_at DESC
609
- LIMIT ?
610
- `, [ftsQuery, entityId, maxResults]);
785
+ if (!usedEmbed) {
786
+ const results = this.miniSearch.search(trimmedQuery, {
787
+ filter: (r) => r.entity_id === entityId,
788
+ combineWith: "OR"
789
+ });
790
+ const topIds = results.slice(0, maxResults).map((r) => r.id);
791
+ if (topIds.length > 0) {
792
+ const placeholders = topIds.map(() => "?").join(",");
793
+ const rows = await this.db.getAllAsync(
794
+ `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
795
+ topIds
796
+ );
797
+ const byId = new Map(rows.map((r) => [r.id, r]));
798
+ facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
799
+ }
800
+ }
801
+ if (facts.length > 0) {
802
+ const ids = facts.map((f) => f.id);
803
+ const placeholders = ids.map(() => "?").join(",");
804
+ const now = Date.now();
805
+ await this.db.runAsync(
806
+ `UPDATE ${this.prefix}entries
807
+ SET access_count = access_count + 1, last_accessed_at = ?
808
+ WHERE id IN (${placeholders})`,
809
+ [now, ...ids]
810
+ );
811
+ }
611
812
  } else {
612
- factsPromise = this.db.getAllAsync(`
613
- SELECT * FROM ${this.prefix}entries
614
- WHERE entity_id = ? AND deleted_at IS NULL
615
- ORDER BY updated_at DESC
616
- LIMIT ?
617
- `, [entityId, maxResults]);
618
- }
619
- const tasksPromise = this.db.getAllAsync(`
620
- SELECT * FROM ${this.prefix}tasks
621
- WHERE entity_id = ? AND status IN ('pending', 'in_progress') AND deleted_at IS NULL
622
- ORDER BY priority DESC, created_at ASC
623
- `, [entityId]);
624
- const eventsPromise = this.db.getAllAsync(`
625
- SELECT * FROM ${this.prefix}events
626
- WHERE entity_id = ?
627
- ORDER BY created_at DESC
628
- LIMIT 10
629
- `, [entityId]);
630
- const [factsRaw, tasks, events] = await Promise.all([factsPromise, tasksPromise, eventsPromise]);
631
- if (ftsQuery && factsRaw.length > 0) {
632
- const ids = factsRaw.map((f) => f.id);
633
- const placeholders = ids.map(() => "?").join(",");
634
- const now = Date.now();
635
- await this.db.runAsync(`
636
- UPDATE ${this.prefix}entries
637
- SET access_count = access_count + 1, last_accessed_at = ?
638
- WHERE id IN (${placeholders})
639
- `, [now, ...ids]);
640
- }
641
- const facts = factsRaw.map((f) => ({
642
- ...f,
643
- tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags
644
- }));
645
- return { facts, tasks, events: events.reverse() };
813
+ facts = await this.db.getAllAsync(
814
+ `SELECT * FROM ${this.prefix}entries
815
+ WHERE entity_id = ? AND deleted_at IS NULL
816
+ ORDER BY updated_at DESC
817
+ LIMIT ?`,
818
+ [entityId, maxResults]
819
+ );
820
+ }
821
+ const [tasks, events] = await Promise.all([
822
+ this.db.getAllAsync(
823
+ `SELECT * FROM ${this.prefix}tasks
824
+ WHERE entity_id = ? AND status IN ('pending', 'in_progress') AND deleted_at IS NULL
825
+ ORDER BY priority DESC, created_at ASC`,
826
+ [entityId]
827
+ ),
828
+ this.db.getAllAsync(
829
+ `SELECT * FROM ${this.prefix}events
830
+ WHERE entity_id = ?
831
+ ORDER BY created_at DESC
832
+ LIMIT 10`,
833
+ [entityId]
834
+ )
835
+ ]);
836
+ const parsedFacts = facts.map((f) => {
837
+ const { embedding: _embedding, ...rest } = f;
838
+ return {
839
+ ...rest,
840
+ tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
841
+ };
842
+ });
843
+ return { facts: parsedFacts, tasks, events: events.reverse() };
646
844
  }
647
845
  async getMemoryBundle(entityId) {
648
846
  return this._getFullBundle(entityId, { maxEvents: 10 });
@@ -715,10 +913,13 @@ var WikiMemory = class {
715
913
  ORDER BY updated_at DESC
716
914
  LIMIT 100
717
915
  `, [entityId]);
718
- const currentFacts = currentFactsRows.map((f) => ({
719
- ...f,
720
- tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags
721
- }));
916
+ const currentFacts = currentFactsRows.map((f) => {
917
+ const { embedding: _embedding, ...rest } = f;
918
+ return {
919
+ ...rest,
920
+ tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
921
+ };
922
+ });
722
923
  const userPrompt = `Events:
723
924
  ${JSON.stringify(events.reverse(), null, 2)}
724
925
 
@@ -734,6 +935,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
734
935
  const validFacts = facts.map(validateFact).filter((f) => f !== null);
735
936
  const validTasks = tasks.map(validateTask).filter((t) => t !== null);
736
937
  const now = Date.now();
938
+ const insertedFacts = [];
737
939
  await this.db.withTransactionAsync(async () => {
738
940
  for (const fact of validFacts) {
739
941
  const newTokens = titleTokens(fact.title);
@@ -756,6 +958,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
756
958
  INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
757
959
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
758
960
  `, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
961
+ insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
759
962
  }
760
963
  for (const task of validTasks) {
761
964
  const id = generateId("task_");
@@ -765,6 +968,10 @@ ${JSON.stringify(currentFacts, null, 2)}`;
765
968
  `, [id, entityId, task.description, "pending", task.priority, now, now]);
766
969
  }
767
970
  });
971
+ for (const fact of insertedFacts) {
972
+ await this.embedFact(fact);
973
+ }
974
+ await this.rebuildMiniSearchIndex(entityId);
768
975
  }
769
976
  async _doRunHeal(entityId) {
770
977
  const now = Date.now();
@@ -801,7 +1008,10 @@ ${JSON.stringify(currentFacts, null, 2)}`;
801
1008
  const healCandidates = allFactsRows.filter((f) => f.source_type !== "user_document");
802
1009
  const documentAnchors = allFactsRows.filter((f) => f.source_type === "user_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
803
1010
  const userPrompt = `Heal Candidates:
804
- ${JSON.stringify(healCandidates.map((f) => ({ ...f, tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags })), null, 2)}
1011
+ ${JSON.stringify(healCandidates.map((f) => {
1012
+ const { embedding: _embedding, ...rest } = f;
1013
+ return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
1014
+ }), null, 2)}
805
1015
 
806
1016
  Document Anchors (DO NOT MODIFY OR DELETE):
807
1017
  ${JSON.stringify(documentAnchors, null, 2)}
@@ -825,6 +1035,7 @@ The following document anchors are provided for contradiction detection only. Do
825
1035
  const safeDowngraded = downgraded.filter((id) => mutableIds.has(id));
826
1036
  const safeDeleted = deleted.filter((id) => mutableIds.has(id));
827
1037
  const validNewFacts = newFacts.map(validateFact).filter((f) => f !== null);
1038
+ const insertedFacts = [];
828
1039
  await this.db.withTransactionAsync(async () => {
829
1040
  for (const id of safeDowngraded) {
830
1041
  await this.db.runAsync(`UPDATE ${this.prefix}entries SET confidence = 'tentative', updated_at = ? WHERE id = ? AND entity_id = ?`, [now, id, entityId]);
@@ -838,8 +1049,13 @@ The following document anchors are provided for contradiction detection only. Do
838
1049
  INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
839
1050
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
840
1051
  `, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
1052
+ insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
841
1053
  }
842
1054
  });
1055
+ for (const fact of insertedFacts) {
1056
+ await this.embedFact(fact);
1057
+ }
1058
+ await this.rebuildMiniSearchIndex(entityId);
843
1059
  }
844
1060
  async runLibrarian(entityId) {
845
1061
  const jobKey = this._librarianKey(entityId);
@@ -849,6 +1065,9 @@ The following document anchors are provided for contradiction detection only. Do
849
1065
  if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
850
1066
  throw new WikiBusyError("prune", entityId);
851
1067
  }
1068
+ if (this._isReembedActive(entityId)) {
1069
+ throw new WikiBusyError("reembed", entityId);
1070
+ }
852
1071
  this.activeMaintenanceJobs.add(jobKey);
853
1072
  try {
854
1073
  await this._doRunLibrarian(entityId);
@@ -864,6 +1083,9 @@ The following document anchors are provided for contradiction detection only. Do
864
1083
  if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
865
1084
  throw new WikiBusyError("prune", entityId);
866
1085
  }
1086
+ if (this._isReembedActive(entityId)) {
1087
+ throw new WikiBusyError("reembed", entityId);
1088
+ }
867
1089
  this.activeMaintenanceJobs.add(jobKey);
868
1090
  try {
869
1091
  await this._doRunHeal(entityId);
@@ -871,6 +1093,69 @@ The following document anchors are provided for contradiction detection only. Do
871
1093
  this.activeMaintenanceJobs.delete(jobKey);
872
1094
  }
873
1095
  }
1096
+ async runReembed(entityId) {
1097
+ const embedFn = this.options.llmProvider.embed;
1098
+ if (!embedFn) return { embedded: 0, skipped: 0 };
1099
+ const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
1100
+ if (this.activeMaintenanceJobs.has(reembedKey)) {
1101
+ throw new WikiBusyError("reembed", entityId ?? "*");
1102
+ }
1103
+ if (entityId) {
1104
+ if (this.activeMaintenanceJobs.has(this._globalReembedKey())) {
1105
+ throw new WikiBusyError("reembed", entityId);
1106
+ }
1107
+ if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
1108
+ throw new WikiBusyError("prune", entityId);
1109
+ }
1110
+ if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
1111
+ throw new WikiBusyError("librarian", entityId);
1112
+ }
1113
+ if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
1114
+ throw new WikiBusyError("heal", entityId);
1115
+ }
1116
+ if (this._isIngestActiveFor(entityId)) {
1117
+ throw new WikiBusyError("ingest", entityId);
1118
+ }
1119
+ } else {
1120
+ if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
1121
+ throw new WikiBusyError("reembed", "*");
1122
+ }
1123
+ if (this._isAnyMaintenanceActiveWithSuffix(":prune")) {
1124
+ throw new WikiBusyError("prune", "*");
1125
+ }
1126
+ if (this._isAnyMaintenanceActiveWithSuffix(":librarian")) {
1127
+ throw new WikiBusyError("librarian", "*");
1128
+ }
1129
+ if (this._isAnyMaintenanceActiveWithSuffix(":heal")) {
1130
+ throw new WikiBusyError("heal", "*");
1131
+ }
1132
+ if (this.activeIngestJobs.size > 0) {
1133
+ throw new WikiBusyError("ingest", "*");
1134
+ }
1135
+ }
1136
+ this.activeMaintenanceJobs.add(reembedKey);
1137
+ try {
1138
+ const where = entityId ? `entity_id = ? AND deleted_at IS NULL` : `deleted_at IS NULL`;
1139
+ const params = entityId ? [entityId] : [];
1140
+ const rows = await this.db.getAllAsync(
1141
+ `SELECT * FROM ${this.prefix}entries WHERE ${where}`,
1142
+ params
1143
+ );
1144
+ let embedded = 0;
1145
+ let skipped = 0;
1146
+ for (const row of rows) {
1147
+ const success = await this.embedFact(row);
1148
+ if (success) embedded++;
1149
+ else skipped++;
1150
+ }
1151
+ if (embedded > 0) {
1152
+ await this._reconcileEmbeddingDimension();
1153
+ }
1154
+ return { embedded, skipped };
1155
+ } finally {
1156
+ this.activeMaintenanceJobs.delete(reembedKey);
1157
+ }
1158
+ }
874
1159
  getEntityStatus(entityId) {
875
1160
  const ingestPrefix = `${this.prefix}:${entityId}:`;
876
1161
  let ingesting = false;
@@ -901,10 +1186,13 @@ The following document anchors are provided for contradiction detection only. Do
901
1186
  ),
902
1187
  this.db.getAllAsync(eventsQuery, eventsParams)
903
1188
  ]);
904
- const facts = factsRaw.map((f) => ({
905
- ...f,
906
- tags: typeof f.tags === "string" ? JSON.parse(f.tags) : f.tags
907
- }));
1189
+ const facts = factsRaw.map((f) => {
1190
+ const { embedding: _embedding, ...rest } = f;
1191
+ return {
1192
+ ...rest,
1193
+ tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
1194
+ };
1195
+ });
908
1196
  const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
909
1197
  return { facts, tasks, events };
910
1198
  }
@@ -1043,7 +1331,18 @@ The following document anchors are provided for contradiction detection only. Do
1043
1331
  );
1044
1332
  }
1045
1333
  });
1334
+ for (const fact of bundle.facts) {
1335
+ if (!fact.deleted_at) {
1336
+ await this.embedFact({
1337
+ id: fact.id,
1338
+ title: fact.title,
1339
+ body: fact.body,
1340
+ tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
1341
+ });
1342
+ }
1343
+ }
1046
1344
  }
1345
+ await this.rebuildMiniSearchIndex();
1047
1346
  }
1048
1347
  async forget(entityId, params) {
1049
1348
  const now = Date.now();
@@ -1092,6 +1391,7 @@ The following document anchors are provided for contradiction detection only. Do
1092
1391
  if (taskResult) deletedTasks += taskResult.changes;
1093
1392
  if (refResult) deletedEntries += refResult.changes;
1094
1393
  }
1394
+ await this.rebuildMiniSearchIndex(entityId);
1095
1395
  return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
1096
1396
  }
1097
1397
  async ingestDocument(entityId, params) {
@@ -1117,6 +1417,9 @@ The following document anchors are provided for contradiction detection only. Do
1117
1417
  if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
1118
1418
  throw new WikiBusyError("prune", entityId);
1119
1419
  }
1420
+ if (this._isReembedActive(entityId)) {
1421
+ throw new WikiBusyError("reembed", entityId);
1422
+ }
1120
1423
  this.activeIngestJobs.add(jobKey);
1121
1424
  try {
1122
1425
  const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
@@ -1148,6 +1451,7 @@ ${chunk}`;
1148
1451
  }
1149
1452
  }
1150
1453
  const now = Date.now();
1454
+ const insertedFacts = [];
1151
1455
  await this.db.withTransactionAsync(async () => {
1152
1456
  await this.db.runAsync(
1153
1457
  `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
@@ -1160,8 +1464,13 @@ ${chunk}`;
1160
1464
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1161
1465
  [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]
1162
1466
  );
1467
+ insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
1163
1468
  }
1164
1469
  });
1470
+ for (const fact of insertedFacts) {
1471
+ await this.embedFact(fact);
1472
+ }
1473
+ await this.rebuildMiniSearchIndex(entityId);
1165
1474
  return { truncated, chunks: chunks.length };
1166
1475
  } finally {
1167
1476
  this.activeIngestJobs.delete(jobKey);