@equationalapplications/core-llm-wiki 2.6.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -24,7 +24,8 @@ async function setupDatabase(db, prefix) {
24
24
  last_accessed_at INTEGER,
25
25
  access_count INTEGER NOT NULL DEFAULT 0,
26
26
  deleted_at INTEGER,
27
- embedding TEXT
27
+ embedding TEXT,
28
+ embedding_blob BLOB
28
29
  );
29
30
 
30
31
  CREATE INDEX IF NOT EXISTS ${prefix}entries_entity_idx ON ${prefix}entries(entity_id);
@@ -97,6 +98,20 @@ var MIGRATIONS = [
97
98
  await db.execAsync(`ALTER TABLE ${prefix}entries ADD COLUMN embedding TEXT`);
98
99
  }
99
100
  }
101
+ },
102
+ {
103
+ version: 3,
104
+ description: "Add embedding_blob BLOB column for Float32Array vector storage",
105
+ run: async (db, prefix) => {
106
+ const cols = await db.getAllAsync(
107
+ `PRAGMA table_info(${prefix}entries)`
108
+ );
109
+ if (!cols.some((c) => c.name === "embedding_blob")) {
110
+ await db.execAsync(
111
+ `ALTER TABLE ${prefix}entries ADD COLUMN embedding_blob BLOB`
112
+ );
113
+ }
114
+ }
100
115
  }
101
116
  ];
102
117
  for (let i = 1; i < MIGRATIONS.length; i++) {
@@ -154,6 +169,34 @@ function cosineSimilarity(a, b) {
154
169
  return denom === 0 ? 0 : dot / denom;
155
170
  }
156
171
 
172
+ // src/utils/embedding.ts
173
+ function parseEmbedding(blob, text) {
174
+ if (blob && blob.byteLength > 0) {
175
+ if (blob.byteLength % 4 !== 0) return null;
176
+ const copy = new ArrayBuffer(blob.byteLength);
177
+ new Uint8Array(copy).set(blob);
178
+ const vector = new Float32Array(copy);
179
+ for (const value of vector) {
180
+ if (!Number.isFinite(value)) return null;
181
+ }
182
+ return vector;
183
+ }
184
+ if (text) {
185
+ try {
186
+ const arr = JSON.parse(text);
187
+ if (!Array.isArray(arr) || !arr.every((v) => typeof v === "number" && isFinite(v))) return null;
188
+ const vector = new Float32Array(arr);
189
+ for (const value of vector) {
190
+ if (!Number.isFinite(value)) return null;
191
+ }
192
+ return vector;
193
+ } catch {
194
+ return null;
195
+ }
196
+ }
197
+ return null;
198
+ }
199
+
157
200
  // src/WikiMemory.ts
158
201
  function parseJsonResponse(text) {
159
202
  const firstBrace = text.indexOf("{");
@@ -358,7 +401,7 @@ function jaccardScore(a, b) {
358
401
  }
359
402
  var FUZZY_THRESHOLD = 0.5;
360
403
  var MIN_TOKENS_TO_QUALIFY = 3;
361
- var WikiMemory = class {
404
+ var _WikiMemory = class _WikiMemory {
362
405
  constructor(db, options) {
363
406
  this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
364
407
  this.activeIngestJobs = /* @__PURE__ */ new Set();
@@ -372,6 +415,7 @@ var WikiMemory = class {
372
415
  }
373
416
  });
374
417
  this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
418
+ this.vectorCache = /* @__PURE__ */ new Map();
375
419
  this.db = db;
376
420
  this.options = options;
377
421
  this.prefix = options.config?.tablePrefix || "llm_wiki_";
@@ -438,10 +482,6 @@ var WikiMemory = class {
438
482
  `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
439
483
  [String(dim)]
440
484
  );
441
- } else {
442
- await this.db.runAsync(
443
- `DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
444
- );
445
485
  }
446
486
  } else {
447
487
  await this.db.runAsync(
@@ -460,7 +500,18 @@ var WikiMemory = class {
460
500
  const mismatch = await this.db.getFirstAsync(
461
501
  `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
462
502
  );
463
- if (mismatch) {
503
+ if (!mismatch) return;
504
+ const newDim = parseInt(mismatch.value, 10);
505
+ const residual = await this.db.getFirstAsync(
506
+ `SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
507
+ WHERE deleted_at IS NULL
508
+ AND (
509
+ (embedding_blob IS NOT NULL AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?)
510
+ OR (embedding_blob IS NULL AND embedding IS NOT NULL)
511
+ )`,
512
+ [newDim]
513
+ );
514
+ if (!residual || residual.cnt === 0) {
464
515
  await this.db.runAsync(
465
516
  `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
466
517
  [mismatch.value]
@@ -491,10 +542,23 @@ var WikiMemory = class {
491
542
  console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
492
543
  return false;
493
544
  }
494
- await this.storeEmbeddingDimension(vector.length);
545
+ const float32Vector = new Float32Array(vector);
546
+ let hasNonFinite = false;
547
+ for (let i = 0; i < float32Vector.length; i++) {
548
+ if (!isFinite(float32Vector[i])) {
549
+ hasNonFinite = true;
550
+ break;
551
+ }
552
+ }
553
+ if (hasNonFinite) {
554
+ console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
555
+ return false;
556
+ }
557
+ await this.storeEmbeddingDimension(float32Vector.length);
558
+ const blob = new Uint8Array(float32Vector.buffer);
495
559
  await this.db.runAsync(
496
- `UPDATE ${this.prefix}entries SET embedding = ? WHERE id = ?`,
497
- [JSON.stringify(vector), fact.id]
560
+ `UPDATE ${this.prefix}entries SET embedding_blob = ?, embedding = NULL WHERE id = ?`,
561
+ [blob, fact.id]
498
562
  );
499
563
  return true;
500
564
  } catch (err) {
@@ -614,9 +678,24 @@ var WikiMemory = class {
614
678
  _globalReembedKey() {
615
679
  return `${this.prefix}:reembed`;
616
680
  }
681
+ _importKey(entityId) {
682
+ return `${this.prefix}:${entityId}:import`;
683
+ }
684
+ _globalImportKey() {
685
+ return `${this.prefix}:import`;
686
+ }
687
+ _forgetKey(entityId) {
688
+ return `${this.prefix}:${entityId}:forget`;
689
+ }
617
690
  _isReembedActive(entityId) {
618
691
  return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
619
692
  }
693
+ _isImportActiveFor(entityId) {
694
+ return this.activeMaintenanceJobs.has(this._importKey(entityId)) || this.activeMaintenanceJobs.has(this._globalImportKey());
695
+ }
696
+ _isForgetActiveFor(entityId) {
697
+ return this.activeMaintenanceJobs.has(this._forgetKey(entityId));
698
+ }
620
699
  /** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
621
700
  _isAnyMaintenanceActiveWithSuffix(suffix) {
622
701
  const entityKeyPrefix = `${this.prefix}:`;
@@ -659,6 +738,10 @@ var WikiMemory = class {
659
738
  blockingOperation = "reembed";
660
739
  } else if (isIngestRunning) {
661
740
  blockingOperation = "ingest";
741
+ } else if (this._isImportActiveFor(entityId)) {
742
+ blockingOperation = "import";
743
+ } else if (this._isForgetActiveFor(entityId)) {
744
+ blockingOperation = "forget";
662
745
  }
663
746
  if (blockingOperation !== null) {
664
747
  throw new WikiBusyError(blockingOperation, entityId);
@@ -703,19 +786,27 @@ var WikiMemory = class {
703
786
  await this.db.execAsync(`VACUUM`);
704
787
  }
705
788
  await this.rebuildMiniSearchIndex(entityId);
789
+ this.vectorCache.delete(entityId);
706
790
  return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
707
791
  } finally {
708
792
  this.activeMaintenanceJobs.delete(pruneKey);
709
793
  }
710
794
  }
711
- async read(entityId, query) {
712
- const maxResults = this.options.config?.maxResults ?? this.options.config?.maxFtsResults ?? 10;
795
+ async read(entityId, query, options) {
796
+ const config = this.options.config;
797
+ const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
798
+ const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
799
+ const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
800
+ const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
801
+ const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
802
+ const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
803
+ const skipEmbed = weight === 0;
713
804
  const embedFn = this.options.llmProvider.embed;
714
805
  const trimmedQuery = query.trim();
715
806
  let facts = [];
716
- if (trimmedQuery) {
807
+ if (maxResults === 0) ; else if (trimmedQuery) {
717
808
  let usedEmbed = false;
718
- if (embedFn) {
809
+ if (!skipEmbed && embedFn) {
719
810
  try {
720
811
  const queryVec = await embedFn(trimmedQuery);
721
812
  if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
@@ -734,49 +825,138 @@ var WikiMemory = class {
734
825
  );
735
826
  }
736
827
  }
737
- const scoreRows = await this.db.getAllAsync(
738
- `SELECT id, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
739
- [entityId]
828
+ const mismatchedCount = await this.db.getFirstAsync(
829
+ `SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
830
+ WHERE entity_id = ? AND deleted_at IS NULL
831
+ AND embedding_blob IS NOT NULL
832
+ AND (CAST(length(embedding_blob) AS INTEGER) % 4 = 0)
833
+ AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?`,
834
+ [entityId, queryVec.length]
740
835
  );
741
- const scored = scoreRows.map((row) => {
742
- let score = 0;
743
- if (row.embedding) {
744
- try {
745
- const parsed = JSON.parse(row.embedding);
746
- if (Array.isArray(parsed) && parsed.length === queryVec.length && parsed.every((v) => typeof v === "number" && isFinite(v))) {
747
- score = cosineSimilarity(queryVec, parsed);
836
+ if (mismatchedCount && mismatchedCount.cnt > 0) {
837
+ throw new Error(
838
+ `Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
839
+ );
840
+ }
841
+ let candidateRows;
842
+ let populateCache = true;
843
+ let miniSearchScores;
844
+ if (effectivePreFilterLimit !== void 0) {
845
+ populateCache = false;
846
+ const preResults = this.miniSearch.search(trimmedQuery, {
847
+ filter: (r) => r.entity_id === entityId,
848
+ combineWith: "OR"
849
+ });
850
+ if (preResults.length === 0) {
851
+ candidateRows = null;
852
+ } else {
853
+ const topKResults = preResults.slice(0, effectivePreFilterLimit);
854
+ if (topKResults.length === 0) {
855
+ candidateRows = null;
856
+ } else {
857
+ const topKIds = topKResults.map((r) => r.id);
858
+ const inClauseChunkSize = 500;
859
+ candidateRows = [];
860
+ for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
861
+ const idChunk = topKIds.slice(i, i + inClauseChunkSize);
862
+ const placeholders = idChunk.map(() => "?").join(",");
863
+ const chunkRows = await this.db.getAllAsync(
864
+ `SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
865
+ idChunk
866
+ );
867
+ candidateRows.push(...chunkRows);
868
+ }
869
+ if (weight !== void 0 && weight < 1) {
870
+ const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
871
+ miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
748
872
  }
749
- } catch {
750
873
  }
751
874
  }
752
- return { row, score };
753
- });
754
- scored.sort((a, b) => {
755
- const scoreDiff = b.score - a.score;
756
- if (scoreDiff !== 0) {
757
- return scoreDiff;
875
+ } else {
876
+ candidateRows = await this.db.getAllAsync(
877
+ `SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
878
+ [entityId]
879
+ );
880
+ if (weight !== void 0 && weight < 1) {
881
+ const msResults = this.miniSearch.search(trimmedQuery, {
882
+ filter: (r) => r.entity_id === entityId,
883
+ combineWith: "OR"
884
+ });
885
+ const maxMsScore = Math.max(1, msResults[0]?.score ?? 1);
886
+ miniSearchScores = new Map(msResults.map((r) => [r.id, r.score / maxMsScore]));
758
887
  }
759
- const updatedAtDiff = (b.row.updated_at ?? 0) - (a.row.updated_at ?? 0);
760
- if (updatedAtDiff !== 0) {
761
- return updatedAtDiff;
888
+ }
889
+ if (candidateRows === null) {
890
+ usedEmbed = true;
891
+ } else {
892
+ let entityCache = this.vectorCache.get(entityId);
893
+ const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
894
+ if (tooLarge && entityCache) {
895
+ this.vectorCache.delete(entityId);
896
+ entityCache = void 0;
762
897
  }
763
- const accessCountDiff = (b.row.access_count ?? 0) - (a.row.access_count ?? 0);
764
- if (accessCountDiff !== 0) {
765
- return accessCountDiff;
898
+ const canCache = populateCache && !tooLarge;
899
+ if (canCache && !entityCache) {
900
+ entityCache = /* @__PURE__ */ new Map();
766
901
  }
767
- return a.row.id.localeCompare(b.row.id);
768
- });
769
- const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
770
- if (topIds.length > 0) {
771
- const placeholders = topIds.map(() => "?").join(",");
772
- const fullRows = await this.db.getAllAsync(
773
- `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
774
- topIds
775
- );
776
- const byId = new Map(fullRows.map((r) => [r.id, r]));
777
- facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
902
+ const scored = candidateRows.map((row) => {
903
+ let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
904
+ if (vector && canCache && entityCache && !entityCache.has(row.id)) {
905
+ entityCache.set(row.id, vector);
906
+ }
907
+ let score = 0;
908
+ if (vector && vector.length === queryVec.length) {
909
+ const cosSim = cosineSimilarity(queryVec, vector);
910
+ if (weight !== void 0) {
911
+ const kwScore = miniSearchScores?.get(row.id) ?? 0;
912
+ score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
913
+ } else {
914
+ score = cosSim;
915
+ }
916
+ } else if (weight !== void 0 && weight < 1) {
917
+ const kwScore = miniSearchScores?.get(row.id) ?? 0;
918
+ score = (1 - weight) * kwScore;
919
+ } else {
920
+ score = -2;
921
+ }
922
+ return { row, score };
923
+ });
924
+ if (canCache && entityCache && entityCache.size > 0) {
925
+ if (!this.vectorCache.has(entityId)) {
926
+ if (this.vectorCache.size >= _WikiMemory.MAX_VECTOR_CACHE_ENTITIES) {
927
+ const oldestKey = this.vectorCache.keys().next().value;
928
+ if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
929
+ }
930
+ this.vectorCache.set(entityId, entityCache);
931
+ }
932
+ }
933
+ scored.sort((a, b) => {
934
+ const scoreDiff = b.score - a.score;
935
+ if (scoreDiff !== 0) return scoreDiff;
936
+ const accessCountDiff = (b.row.access_count ?? 0) - (a.row.access_count ?? 0);
937
+ if (accessCountDiff !== 0) return accessCountDiff;
938
+ const updatedAtDiff = (b.row.updated_at ?? 0) - (a.row.updated_at ?? 0);
939
+ if (updatedAtDiff !== 0) return updatedAtDiff;
940
+ return a.row.id.localeCompare(b.row.id);
941
+ });
942
+ const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
943
+ if (topIds.length > 0) {
944
+ const fullRows = [];
945
+ const phase2ChunkSize = 500;
946
+ for (let i = 0; i < topIds.length; i += phase2ChunkSize) {
947
+ const idChunk = topIds.slice(i, i + phase2ChunkSize);
948
+ const placeholders = idChunk.map(() => "?").join(",");
949
+ const chunkRows = await this.db.getAllAsync(
950
+ `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
951
+ idChunk
952
+ );
953
+ fullRows.push(...chunkRows);
954
+ }
955
+ const byId = new Map(fullRows.map((r) => [r.id, r]));
956
+ facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
957
+ }
958
+ usedEmbed = true;
778
959
  }
779
- usedEmbed = true;
780
960
  } catch (err) {
781
961
  const error = err instanceof Error ? err : new Error(String(err));
782
962
  this.options.onRetrievalFallback?.(error);
@@ -789,25 +969,35 @@ var WikiMemory = class {
789
969
  });
790
970
  const topIds = results.slice(0, maxResults).map((r) => r.id);
791
971
  if (topIds.length > 0) {
792
- const placeholders = topIds.map(() => "?").join(",");
793
- const rows = await this.db.getAllAsync(
794
- `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
795
- topIds
796
- );
797
- const byId = new Map(rows.map((r) => [r.id, r]));
972
+ const kwRows = [];
973
+ const kwChunkSize = 500;
974
+ for (let i = 0; i < topIds.length; i += kwChunkSize) {
975
+ const idChunk = topIds.slice(i, i + kwChunkSize);
976
+ const placeholders = idChunk.map(() => "?").join(",");
977
+ const chunkRows = await this.db.getAllAsync(
978
+ `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
979
+ idChunk
980
+ );
981
+ kwRows.push(...chunkRows);
982
+ }
983
+ const byId = new Map(kwRows.map((r) => [r.id, r]));
798
984
  facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
799
985
  }
800
986
  }
801
987
  if (facts.length > 0) {
802
988
  const ids = facts.map((f) => f.id);
803
- const placeholders = ids.map(() => "?").join(",");
804
989
  const now = Date.now();
805
- await this.db.runAsync(
806
- `UPDATE ${this.prefix}entries
807
- SET access_count = access_count + 1, last_accessed_at = ?
808
- WHERE id IN (${placeholders})`,
809
- [now, ...ids]
810
- );
990
+ const accessChunkSize = 500;
991
+ for (let i = 0; i < ids.length; i += accessChunkSize) {
992
+ const idChunk = ids.slice(i, i + accessChunkSize);
993
+ const placeholders = idChunk.map(() => "?").join(",");
994
+ await this.db.runAsync(
995
+ `UPDATE ${this.prefix}entries
996
+ SET access_count = access_count + 1, last_accessed_at = ?
997
+ WHERE id IN (${placeholders})`,
998
+ [now, ...idChunk]
999
+ );
1000
+ }
811
1001
  }
812
1002
  } else {
813
1003
  facts = await this.db.getAllAsync(
@@ -834,7 +1024,7 @@ var WikiMemory = class {
834
1024
  )
835
1025
  ]);
836
1026
  const parsedFacts = facts.map((f) => {
837
- const { embedding: _embedding, ...rest } = f;
1027
+ const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
838
1028
  return {
839
1029
  ...rest,
840
1030
  tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
@@ -866,7 +1056,7 @@ var WikiMemory = class {
866
1056
  if (memoryCheckpoint > count) memoryCheckpoint = 0;
867
1057
  if (count - memoryCheckpoint >= threshold) {
868
1058
  const jobKey = this._librarianKey(entityId);
869
- if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
1059
+ if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId)) && !this._isReembedActive(entityId) && !this._isImportActiveFor(entityId) && !this._isForgetActiveFor(entityId)) {
870
1060
  this.activeMaintenanceJobs.add(jobKey);
871
1061
  this.runLibrarianThenMaybeHeal(entityId, count).catch(console.error).finally(() => this.activeMaintenanceJobs.delete(jobKey));
872
1062
  }
@@ -914,7 +1104,7 @@ var WikiMemory = class {
914
1104
  LIMIT 100
915
1105
  `, [entityId]);
916
1106
  const currentFacts = currentFactsRows.map((f) => {
917
- const { embedding: _embedding, ...rest } = f;
1107
+ const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
918
1108
  return {
919
1109
  ...rest,
920
1110
  tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
@@ -968,10 +1158,12 @@ ${JSON.stringify(currentFacts, null, 2)}`;
968
1158
  `, [id, entityId, task.description, "pending", task.priority, now, now]);
969
1159
  }
970
1160
  });
1161
+ await this.rebuildMiniSearchIndex(entityId);
1162
+ this.vectorCache.delete(entityId);
971
1163
  for (const fact of insertedFacts) {
972
1164
  await this.embedFact(fact);
973
1165
  }
974
- await this.rebuildMiniSearchIndex(entityId);
1166
+ this.vectorCache.delete(entityId);
975
1167
  }
976
1168
  async _doRunHeal(entityId) {
977
1169
  const now = Date.now();
@@ -1009,7 +1201,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
1009
1201
  const documentAnchors = allFactsRows.filter((f) => f.source_type === "user_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
1010
1202
  const userPrompt = `Heal Candidates:
1011
1203
  ${JSON.stringify(healCandidates.map((f) => {
1012
- const { embedding: _embedding, ...rest } = f;
1204
+ const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
1013
1205
  return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
1014
1206
  }), null, 2)}
1015
1207
 
@@ -1052,10 +1244,12 @@ The following document anchors are provided for contradiction detection only. Do
1052
1244
  insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
1053
1245
  }
1054
1246
  });
1247
+ this.vectorCache.delete(entityId);
1248
+ await this.rebuildMiniSearchIndex(entityId);
1055
1249
  for (const fact of insertedFacts) {
1056
1250
  await this.embedFact(fact);
1057
1251
  }
1058
- await this.rebuildMiniSearchIndex(entityId);
1252
+ this.vectorCache.delete(entityId);
1059
1253
  }
1060
1254
  async runLibrarian(entityId) {
1061
1255
  const jobKey = this._librarianKey(entityId);
@@ -1068,6 +1262,12 @@ The following document anchors are provided for contradiction detection only. Do
1068
1262
  if (this._isReembedActive(entityId)) {
1069
1263
  throw new WikiBusyError("reembed", entityId);
1070
1264
  }
1265
+ if (this._isImportActiveFor(entityId)) {
1266
+ throw new WikiBusyError("import", entityId);
1267
+ }
1268
+ if (this._isForgetActiveFor(entityId)) {
1269
+ throw new WikiBusyError("forget", entityId);
1270
+ }
1071
1271
  this.activeMaintenanceJobs.add(jobKey);
1072
1272
  try {
1073
1273
  await this._doRunLibrarian(entityId);
@@ -1086,6 +1286,12 @@ The following document anchors are provided for contradiction detection only. Do
1086
1286
  if (this._isReembedActive(entityId)) {
1087
1287
  throw new WikiBusyError("reembed", entityId);
1088
1288
  }
1289
+ if (this._isImportActiveFor(entityId)) {
1290
+ throw new WikiBusyError("import", entityId);
1291
+ }
1292
+ if (this._isForgetActiveFor(entityId)) {
1293
+ throw new WikiBusyError("forget", entityId);
1294
+ }
1089
1295
  this.activeMaintenanceJobs.add(jobKey);
1090
1296
  try {
1091
1297
  await this._doRunHeal(entityId);
@@ -1093,9 +1299,9 @@ The following document anchors are provided for contradiction detection only. Do
1093
1299
  this.activeMaintenanceJobs.delete(jobKey);
1094
1300
  }
1095
1301
  }
1096
- async runReembed(entityId) {
1302
+ async runReembed(entityId, opts) {
1097
1303
  const embedFn = this.options.llmProvider.embed;
1098
- if (!embedFn) return { embedded: 0, skipped: 0 };
1304
+ if (!embedFn) return { embedded: 0, skipped: 0, failed: 0 };
1099
1305
  const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
1100
1306
  if (this.activeMaintenanceJobs.has(reembedKey)) {
1101
1307
  throw new WikiBusyError("reembed", entityId ?? "*");
@@ -1116,6 +1322,12 @@ The following document anchors are provided for contradiction detection only. Do
1116
1322
  if (this._isIngestActiveFor(entityId)) {
1117
1323
  throw new WikiBusyError("ingest", entityId);
1118
1324
  }
1325
+ if (this._isImportActiveFor(entityId)) {
1326
+ throw new WikiBusyError("import", entityId);
1327
+ }
1328
+ if (this._isForgetActiveFor(entityId)) {
1329
+ throw new WikiBusyError("forget", entityId);
1330
+ }
1119
1331
  } else {
1120
1332
  if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
1121
1333
  throw new WikiBusyError("reembed", "*");
@@ -1132,6 +1344,12 @@ The following document anchors are provided for contradiction detection only. Do
1132
1344
  if (this.activeIngestJobs.size > 0) {
1133
1345
  throw new WikiBusyError("ingest", "*");
1134
1346
  }
1347
+ if (this._isAnyMaintenanceActiveWithSuffix(":import")) {
1348
+ throw new WikiBusyError("import", "*");
1349
+ }
1350
+ if (this._isAnyMaintenanceActiveWithSuffix(":forget")) {
1351
+ throw new WikiBusyError("forget", "*");
1352
+ }
1135
1353
  }
1136
1354
  this.activeMaintenanceJobs.add(reembedKey);
1137
1355
  try {
@@ -1141,17 +1359,64 @@ The following document anchors are provided for contradiction detection only. Do
1141
1359
  `SELECT * FROM ${this.prefix}entries WHERE ${where}`,
1142
1360
  params
1143
1361
  );
1362
+ if (entityId) {
1363
+ this.vectorCache.delete(entityId);
1364
+ } else {
1365
+ this.vectorCache.clear();
1366
+ }
1367
+ const skipExisting = opts?.skipExisting ?? false;
1368
+ let effectiveSkip = skipExisting;
1369
+ if (skipExisting) {
1370
+ const mismatchRow = await this.db.getFirstAsync(
1371
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
1372
+ );
1373
+ if (mismatchRow) {
1374
+ if (entityId) {
1375
+ const mismatchDim = parseInt(mismatchRow.value, 10);
1376
+ const staleForEntity = await this.db.getFirstAsync(
1377
+ `SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
1378
+ WHERE entity_id = ? AND deleted_at IS NULL
1379
+ AND (
1380
+ embedding_blob IS NULL
1381
+ OR (CAST(length(embedding_blob) AS INTEGER) / 4) != ?
1382
+ )`,
1383
+ [entityId, mismatchDim]
1384
+ );
1385
+ if (staleForEntity && staleForEntity.cnt > 0) effectiveSkip = false;
1386
+ } else {
1387
+ effectiveSkip = false;
1388
+ }
1389
+ }
1390
+ }
1144
1391
  let embedded = 0;
1145
1392
  let skipped = 0;
1146
- for (const row of rows) {
1147
- const success = await this.embedFact(row);
1148
- if (success) embedded++;
1149
- else skipped++;
1150
- }
1151
- if (embedded > 0) {
1152
- await this._reconcileEmbeddingDimension();
1393
+ let failed = 0;
1394
+ try {
1395
+ for (const row of rows) {
1396
+ const existingBlob = row.embedding_blob;
1397
+ const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
1398
+ if (effectiveSkip && blobIsValid) {
1399
+ const vec = parseEmbedding(existingBlob, null);
1400
+ if (vec !== null && vec.every((v) => Number.isFinite(v))) {
1401
+ skipped++;
1402
+ continue;
1403
+ }
1404
+ }
1405
+ const success = await this.embedFact(row);
1406
+ if (success) embedded++;
1407
+ else failed++;
1408
+ }
1409
+ if (embedded > 0) {
1410
+ await this._reconcileEmbeddingDimension();
1411
+ }
1412
+ } finally {
1413
+ if (entityId) {
1414
+ this.vectorCache.delete(entityId);
1415
+ } else {
1416
+ this.vectorCache.clear();
1417
+ }
1153
1418
  }
1154
- return { embedded, skipped };
1419
+ return { embedded, skipped, failed };
1155
1420
  } finally {
1156
1421
  this.activeMaintenanceJobs.delete(reembedKey);
1157
1422
  }
@@ -1171,6 +1436,9 @@ The following document anchors are provided for contradiction detection only. Do
1171
1436
  heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
1172
1437
  };
1173
1438
  }
1439
+ clearVectorCache() {
1440
+ this.vectorCache.clear();
1441
+ }
1174
1442
  async _getFullBundle(entityId, opts) {
1175
1443
  const maxEvents = opts?.maxEvents;
1176
1444
  const eventsQuery = maxEvents != null ? `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at ASC`;
@@ -1187,10 +1455,16 @@ The following document anchors are provided for contradiction detection only. Do
1187
1455
  this.db.getAllAsync(eventsQuery, eventsParams)
1188
1456
  ]);
1189
1457
  const facts = factsRaw.map((f) => {
1190
- const { embedding: _embedding, ...rest } = f;
1458
+ const { embedding: _embedding, embedding_blob, ...rest } = f;
1459
+ const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
1460
+ const c = new ArrayBuffer(embedding_blob.byteLength);
1461
+ new Uint8Array(c).set(embedding_blob);
1462
+ return new Uint8Array(c);
1463
+ })() : void 0;
1464
+ const factBase = safeBlobCopy ? { ...rest, embedding_blob: safeBlobCopy } : rest;
1191
1465
  return {
1192
- ...rest,
1193
- tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
1466
+ ...factBase,
1467
+ tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
1194
1468
  };
1195
1469
  });
1196
1470
  const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
@@ -1217,7 +1491,7 @@ The following document anchors are provided for contradiction detection only. Do
1217
1491
  for (let i = 0; i < ids.length; i += BATCH) {
1218
1492
  const batch = ids.slice(i, i + BATCH);
1219
1493
  const batchResults = await Promise.all(
1220
- batch.map(async (id) => [id, await this._getFullBundle(id)])
1494
+ batch.map(async (id) => [id, await this._getFullBundle(id, { includeBlobs: true })])
1221
1495
  );
1222
1496
  for (const [id, bundle] of batchResults) {
1223
1497
  entities[id] = bundle;
@@ -1227,172 +1501,339 @@ The following document anchors are provided for contradiction detection only. Do
1227
1501
  }
1228
1502
  async importDump(dump, opts) {
1229
1503
  const merge = opts?.merge ?? false;
1230
- for (const [entityId, bundle] of Object.entries(dump.entities)) {
1231
- await this.db.withTransactionAsync(async () => {
1232
- if (!merge) {
1233
- const now = Date.now();
1234
- await this.db.runAsync(
1235
- `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
1236
- [now, now, entityId]
1237
- );
1238
- await this.db.runAsync(
1239
- `UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
1240
- [now, now, entityId]
1241
- );
1242
- await this.db.runAsync(
1243
- `DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
1244
- [entityId]
1245
- );
1504
+ const entityIds = Object.keys(dump.entities);
1505
+ for (const entityId of entityIds) {
1506
+ if (this.activeMaintenanceJobs.has(this._importKey(entityId))) {
1507
+ throw new WikiBusyError("import", entityId);
1508
+ }
1509
+ if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
1510
+ throw new WikiBusyError("librarian", entityId);
1511
+ }
1512
+ if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
1513
+ throw new WikiBusyError("heal", entityId);
1514
+ }
1515
+ if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
1516
+ throw new WikiBusyError("prune", entityId);
1517
+ }
1518
+ if (this._isReembedActive(entityId)) {
1519
+ throw new WikiBusyError("reembed", entityId);
1520
+ }
1521
+ if (this._isIngestActiveFor(entityId)) {
1522
+ throw new WikiBusyError("ingest", entityId);
1523
+ }
1524
+ if (this._isForgetActiveFor(entityId)) {
1525
+ throw new WikiBusyError("forget", entityId);
1526
+ }
1527
+ }
1528
+ if (this.activeMaintenanceJobs.has(this._globalImportKey())) {
1529
+ throw new WikiBusyError("import", "*");
1530
+ }
1531
+ this.activeMaintenanceJobs.add(this._globalImportKey());
1532
+ for (const entityId of entityIds) {
1533
+ this.activeMaintenanceJobs.add(this._importKey(entityId));
1534
+ }
1535
+ try {
1536
+ for (const [entityId, bundle] of Object.entries(dump.entities)) {
1537
+ await this._doImportEntity(entityId, bundle, merge);
1538
+ }
1539
+ } finally {
1540
+ this.activeMaintenanceJobs.delete(this._globalImportKey());
1541
+ for (const entityId of entityIds) {
1542
+ this.activeMaintenanceJobs.delete(this._importKey(entityId));
1543
+ }
1544
+ }
1545
+ }
1546
+ async _doImportEntity(entityId, bundle, merge) {
1547
+ const upsertedFactIds = /* @__PURE__ */ new Set();
1548
+ const factsWithPreservedBlob = /* @__PURE__ */ new Set();
1549
+ const preservedBlobDims = /* @__PURE__ */ new Set();
1550
+ await this.db.withTransactionAsync(async () => {
1551
+ if (!merge) {
1552
+ const now = Date.now();
1553
+ await this.db.runAsync(
1554
+ `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
1555
+ [now, now, entityId]
1556
+ );
1557
+ await this.db.runAsync(
1558
+ `UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
1559
+ [now, now, entityId]
1560
+ );
1561
+ await this.db.runAsync(
1562
+ `DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
1563
+ [entityId]
1564
+ );
1565
+ }
1566
+ const factIds = bundle.facts.map((fact) => fact.id);
1567
+ const existingFactsById = /* @__PURE__ */ new Map();
1568
+ const factLookupChunkSize = 500;
1569
+ for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
1570
+ const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
1571
+ if (factIdChunk.length === 0) continue;
1572
+ const placeholders = factIdChunk.map(() => "?").join(", ");
1573
+ const existingFacts = await this.db.getAllAsync(
1574
+ `SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
1575
+ factIdChunk
1576
+ );
1577
+ for (const existingFact of existingFacts) {
1578
+ existingFactsById.set(existingFact.id, existingFact);
1246
1579
  }
1247
- const factIds = bundle.facts.map((fact) => fact.id);
1248
- const existingFactsById = /* @__PURE__ */ new Map();
1249
- const factLookupChunkSize = 500;
1250
- for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
1251
- const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
1252
- if (factIdChunk.length === 0) continue;
1253
- const placeholders = factIdChunk.map(() => "?").join(", ");
1254
- const existingFacts = await this.db.getAllAsync(
1255
- `SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
1256
- factIdChunk
1257
- );
1258
- for (const existingFact of existingFacts) {
1259
- existingFactsById.set(existingFact.id, existingFact);
1580
+ }
1581
+ for (const fact of bundle.facts) {
1582
+ const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
1583
+ const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
1584
+ const existing = existingFactsById.get(fact.id);
1585
+ const rawBlobRaw = fact.embedding_blob;
1586
+ let rawBlob = null;
1587
+ if (rawBlobRaw instanceof Uint8Array) {
1588
+ rawBlob = rawBlobRaw;
1589
+ } else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
1590
+ const obj = rawBlobRaw;
1591
+ if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
1592
+ rawBlob = new Uint8Array(obj["data"]);
1593
+ } else if (!Array.isArray(rawBlobRaw)) {
1594
+ const entries = Object.keys(obj);
1595
+ if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
1596
+ const len = entries.length;
1597
+ rawBlob = new Uint8Array(len);
1598
+ for (let i = 0; i < len; i++) rawBlob[i] = obj[String(i)] ?? 0;
1599
+ }
1260
1600
  }
1261
1601
  }
1262
- for (const fact of bundle.facts) {
1263
- const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
1264
- const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
1265
- const existing = existingFactsById.get(fact.id);
1266
- if (existing) {
1267
- if (existing.entity_id !== entityId) {
1268
- this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
1269
- continue;
1270
- }
1271
- if (merge) {
1272
- if (safeUpdatedAt <= existing.updated_at) continue;
1602
+ let blobData = null;
1603
+ if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
1604
+ const copy = new ArrayBuffer(rawBlob.byteLength);
1605
+ new Uint8Array(copy).set(rawBlob);
1606
+ const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
1607
+ let allFinite = true;
1608
+ for (let i = 0; i < floats.length; i++) {
1609
+ if (!isFinite(floats[i])) {
1610
+ allFinite = false;
1611
+ break;
1273
1612
  }
1613
+ }
1614
+ if (allFinite) {
1615
+ blobData = rawBlob;
1616
+ }
1617
+ }
1618
+ if (existing) {
1619
+ if (existing.entity_id !== entityId) {
1620
+ this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
1621
+ continue;
1622
+ }
1623
+ if (merge) {
1624
+ if (safeUpdatedAt <= existing.updated_at) continue;
1625
+ }
1626
+ if (blobData != null) {
1627
+ await this.db.runAsync(
1628
+ `UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = ?, embedding = NULL WHERE id = ?`,
1629
+ [entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData, fact.id]
1630
+ );
1631
+ factsWithPreservedBlob.add(fact.id);
1632
+ if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
1633
+ } else {
1274
1634
  await this.db.runAsync(
1275
- `UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ? WHERE id = ?`,
1635
+ `UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = NULL, embedding = NULL WHERE id = ?`,
1276
1636
  [entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, fact.id]
1277
1637
  );
1278
- existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
1638
+ }
1639
+ existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
1640
+ upsertedFactIds.add(fact.id);
1641
+ } else {
1642
+ if (blobData != null) {
1643
+ await this.db.runAsync(
1644
+ `INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at, embedding_blob) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1645
+ [fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData]
1646
+ );
1647
+ factsWithPreservedBlob.add(fact.id);
1648
+ if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
1279
1649
  } else {
1280
1650
  await this.db.runAsync(
1281
1651
  `INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1282
1652
  [fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at]
1283
1653
  );
1284
- existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
1285
1654
  }
1655
+ existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
1656
+ upsertedFactIds.add(fact.id);
1286
1657
  }
1287
- const taskIds = bundle.tasks.map((task) => task.id);
1288
- const existingTasksById = /* @__PURE__ */ new Map();
1289
- const taskLookupChunkSize = 500;
1290
- for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
1291
- const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
1292
- if (taskIdChunk.length === 0) continue;
1293
- const placeholders = taskIdChunk.map(() => "?").join(", ");
1294
- const existingTasks = await this.db.getAllAsync(
1295
- `SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
1296
- taskIdChunk
1297
- );
1298
- for (const existingTask of existingTasks) {
1299
- existingTasksById.set(existingTask.id, existingTask);
1658
+ }
1659
+ const taskIds = bundle.tasks.map((task) => task.id);
1660
+ const existingTasksById = /* @__PURE__ */ new Map();
1661
+ const taskLookupChunkSize = 500;
1662
+ for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
1663
+ const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
1664
+ if (taskIdChunk.length === 0) continue;
1665
+ const placeholders = taskIdChunk.map(() => "?").join(", ");
1666
+ const existingTasks = await this.db.getAllAsync(
1667
+ `SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
1668
+ taskIdChunk
1669
+ );
1670
+ for (const existingTask of existingTasks) {
1671
+ existingTasksById.set(existingTask.id, existingTask);
1672
+ }
1673
+ }
1674
+ for (const task of bundle.tasks) {
1675
+ const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
1676
+ const existing = existingTasksById.get(task.id);
1677
+ if (existing) {
1678
+ if (existing.entity_id !== entityId) {
1679
+ this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
1680
+ continue;
1681
+ }
1682
+ if (merge) {
1683
+ if (safeUpdatedAt <= existing.updated_at) continue;
1300
1684
  }
1685
+ await this.db.runAsync(
1686
+ `UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
1687
+ [entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
1688
+ );
1689
+ existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
1690
+ } else {
1691
+ await this.db.runAsync(
1692
+ `INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1693
+ [task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
1694
+ );
1695
+ existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
1301
1696
  }
1302
- for (const task of bundle.tasks) {
1303
- const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
1304
- const existing = existingTasksById.get(task.id);
1305
- if (existing) {
1306
- if (existing.entity_id !== entityId) {
1307
- this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
1308
- continue;
1309
- }
1310
- if (merge) {
1311
- if (safeUpdatedAt <= existing.updated_at) continue;
1312
- }
1313
- await this.db.runAsync(
1314
- `UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
1315
- [entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
1316
- );
1317
- existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
1318
- } else {
1697
+ }
1698
+ for (const event of bundle.events) {
1699
+ await this.db.runAsync(
1700
+ `INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
1701
+ VALUES (?, ?, ?, ?, ?, ?)`,
1702
+ [event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
1703
+ );
1704
+ }
1705
+ });
1706
+ this.vectorCache.delete(entityId);
1707
+ await this.rebuildMiniSearchIndex(entityId);
1708
+ for (const fact of bundle.facts) {
1709
+ if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
1710
+ await this.embedFact({
1711
+ id: fact.id,
1712
+ title: fact.title,
1713
+ body: fact.body,
1714
+ tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
1715
+ });
1716
+ }
1717
+ }
1718
+ try {
1719
+ const canonicalRow = await this.db.getFirstAsync(
1720
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
1721
+ );
1722
+ const canonicalDim = canonicalRow ? parseInt(canonicalRow.value, 10) : null;
1723
+ if (preservedBlobDims.size === 1) {
1724
+ const preservedDim = [...preservedBlobDims][0];
1725
+ if (canonicalDim === null || canonicalDim === preservedDim) {
1726
+ await this.storeEmbeddingDimension(preservedDim);
1727
+ const staleMismatch = await this.db.getFirstAsync(
1728
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
1729
+ );
1730
+ if (staleMismatch && parseInt(staleMismatch.value, 10) !== preservedDim) {
1319
1731
  await this.db.runAsync(
1320
- `INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1321
- [task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
1732
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
1733
+ [String(preservedDim)]
1322
1734
  );
1323
- existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
1324
1735
  }
1325
- }
1326
- for (const event of bundle.events) {
1736
+ await this._reconcileEmbeddingDimension();
1737
+ } else {
1327
1738
  await this.db.runAsync(
1328
- `INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
1329
- VALUES (?, ?, ?, ?, ?, ?)`,
1330
- [event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
1739
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
1740
+ [String(canonicalDim)]
1331
1741
  );
1332
1742
  }
1333
- });
1334
- for (const fact of bundle.facts) {
1335
- if (!fact.deleted_at) {
1336
- await this.embedFact({
1337
- id: fact.id,
1338
- title: fact.title,
1339
- body: fact.body,
1340
- tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
1341
- });
1743
+ } else if (preservedBlobDims.size > 1) {
1744
+ if (canonicalDim === null) {
1745
+ const sortedPreservedBlobDims = [...preservedBlobDims].sort((a, b) => a - b);
1746
+ await this.storeEmbeddingDimension(sortedPreservedBlobDims[0]);
1747
+ await this.db.runAsync(
1748
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
1749
+ [String(sortedPreservedBlobDims[0])]
1750
+ );
1751
+ } else {
1752
+ await this.db.runAsync(
1753
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
1754
+ [String(canonicalDim)]
1755
+ );
1342
1756
  }
1343
1757
  }
1758
+ } finally {
1759
+ this.vectorCache.delete(entityId);
1344
1760
  }
1345
- await this.rebuildMiniSearchIndex();
1346
1761
  }
1347
1762
  async forget(entityId, params) {
1348
- const now = Date.now();
1349
- let deletedEntries = 0;
1350
- let deletedTasks = 0;
1351
- if (params.clearAll) {
1352
- const [entriesRes, tasksRes] = await Promise.all([
1353
- this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId]),
1354
- this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId])
1355
- ]);
1356
- await this.db.runAsync(`UPDATE ${this.prefix}checkpoints SET memory_checkpoint = 0, heal_checkpoint = 0 WHERE entity_id = ?`, [entityId]);
1357
- deletedEntries = entriesRes.changes;
1358
- deletedTasks = tasksRes.changes;
1359
- } else {
1360
- const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
1361
- const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
1362
- if (hasIdSelectors && hasSourceSelectors) {
1363
- throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
1364
- }
1365
- const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
1366
- if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
1367
- const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
1368
- if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
1369
- const entryPromise = params.entryId ? this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.entryId, entityId]) : null;
1370
- const taskPromise = params.taskId ? this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.taskId, entityId]) : null;
1371
- let refPromise = null;
1372
- if (sourceRef || sourceHash) {
1373
- let q = `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`;
1374
- const args = [now, now, entityId];
1375
- if (sourceRef) {
1376
- q += ` AND source_ref = ?`;
1377
- args.push(sourceRef);
1763
+ let blockingOperation = null;
1764
+ if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
1765
+ blockingOperation = "librarian";
1766
+ } else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
1767
+ blockingOperation = "heal";
1768
+ } else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
1769
+ blockingOperation = "prune";
1770
+ } else if (this._isReembedActive(entityId)) {
1771
+ blockingOperation = "reembed";
1772
+ } else if (this._isIngestActiveFor(entityId)) {
1773
+ blockingOperation = "ingest";
1774
+ } else if (this._isImportActiveFor(entityId)) {
1775
+ blockingOperation = "import";
1776
+ } else if (this._isForgetActiveFor(entityId)) {
1777
+ blockingOperation = "forget";
1778
+ }
1779
+ if (blockingOperation !== null) {
1780
+ throw new WikiBusyError(blockingOperation, entityId);
1781
+ }
1782
+ const forgetKey = this._forgetKey(entityId);
1783
+ this.activeMaintenanceJobs.add(forgetKey);
1784
+ try {
1785
+ const now = Date.now();
1786
+ let deletedEntries = 0;
1787
+ let deletedTasks = 0;
1788
+ if (params.clearAll) {
1789
+ const [entriesRes, tasksRes] = await Promise.all([
1790
+ this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId]),
1791
+ this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId])
1792
+ ]);
1793
+ await this.db.runAsync(`UPDATE ${this.prefix}checkpoints SET memory_checkpoint = 0, heal_checkpoint = 0 WHERE entity_id = ?`, [entityId]);
1794
+ deletedEntries = entriesRes.changes;
1795
+ deletedTasks = tasksRes.changes;
1796
+ } else {
1797
+ const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
1798
+ const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
1799
+ if (hasIdSelectors && hasSourceSelectors) {
1800
+ throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
1378
1801
  }
1379
- if (sourceHash) {
1380
- q += ` AND source_hash = ?`;
1381
- args.push(sourceHash);
1802
+ const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
1803
+ if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
1804
+ const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
1805
+ if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
1806
+ const entryPromise = params.entryId ? this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.entryId, entityId]) : null;
1807
+ const taskPromise = params.taskId ? this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.taskId, entityId]) : null;
1808
+ let refPromise = null;
1809
+ if (sourceRef || sourceHash) {
1810
+ let q = `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`;
1811
+ const args = [now, now, entityId];
1812
+ if (sourceRef) {
1813
+ q += ` AND source_ref = ?`;
1814
+ args.push(sourceRef);
1815
+ }
1816
+ if (sourceHash) {
1817
+ q += ` AND source_hash = ?`;
1818
+ args.push(sourceHash);
1819
+ }
1820
+ refPromise = this.db.runAsync(q, args);
1382
1821
  }
1383
- refPromise = this.db.runAsync(q, args);
1822
+ const [entryResult, taskResult, refResult] = await Promise.all([
1823
+ entryPromise ?? Promise.resolve(null),
1824
+ taskPromise ?? Promise.resolve(null),
1825
+ refPromise ?? Promise.resolve(null)
1826
+ ]);
1827
+ if (entryResult) deletedEntries += entryResult.changes;
1828
+ if (taskResult) deletedTasks += taskResult.changes;
1829
+ if (refResult) deletedEntries += refResult.changes;
1384
1830
  }
1385
- const [entryResult, taskResult, refResult] = await Promise.all([
1386
- entryPromise ?? Promise.resolve(null),
1387
- taskPromise ?? Promise.resolve(null),
1388
- refPromise ?? Promise.resolve(null)
1389
- ]);
1390
- if (entryResult) deletedEntries += entryResult.changes;
1391
- if (taskResult) deletedTasks += taskResult.changes;
1392
- if (refResult) deletedEntries += refResult.changes;
1831
+ await this.rebuildMiniSearchIndex(entityId);
1832
+ this.vectorCache.delete(entityId);
1833
+ return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
1834
+ } finally {
1835
+ this.activeMaintenanceJobs.delete(forgetKey);
1393
1836
  }
1394
- await this.rebuildMiniSearchIndex(entityId);
1395
- return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
1396
1837
  }
1397
1838
  async ingestDocument(entityId, params) {
1398
1839
  const sourceRef = normalizeSourceRef(params.sourceRef);
@@ -1420,6 +1861,12 @@ The following document anchors are provided for contradiction detection only. Do
1420
1861
  if (this._isReembedActive(entityId)) {
1421
1862
  throw new WikiBusyError("reembed", entityId);
1422
1863
  }
1864
+ if (this._isImportActiveFor(entityId)) {
1865
+ throw new WikiBusyError("import", entityId);
1866
+ }
1867
+ if (this._isForgetActiveFor(entityId)) {
1868
+ throw new WikiBusyError("forget", entityId);
1869
+ }
1423
1870
  this.activeIngestJobs.add(jobKey);
1424
1871
  try {
1425
1872
  const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
@@ -1467,16 +1914,31 @@ ${chunk}`;
1467
1914
  insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
1468
1915
  }
1469
1916
  });
1917
+ await this.rebuildMiniSearchIndex(entityId);
1918
+ this.vectorCache.delete(entityId);
1470
1919
  for (const fact of insertedFacts) {
1471
1920
  await this.embedFact(fact);
1472
1921
  }
1473
- await this.rebuildMiniSearchIndex(entityId);
1922
+ this.vectorCache.delete(entityId);
1474
1923
  return { truncated, chunks: chunks.length };
1475
1924
  } finally {
1476
1925
  this.activeIngestJobs.delete(jobKey);
1477
1926
  }
1478
1927
  }
1479
1928
  };
1929
+ /**
1930
+ * Maximum number of entities whose parsed embedding vectors are held in
1931
+ * memory. This cap is intentionally conservative so the cache remains safe
1932
+ * on memory-constrained runtimes (e.g., mobile/Expo).
1933
+ */
1934
+ _WikiMemory.MAX_VECTOR_CACHE_ENTITIES = 16;
1935
+ /**
1936
+ * Maximum number of fact vectors cached per entity. Keep this high enough to
1937
+ * preserve the parsed-embedding reuse optimization for common mid-sized
1938
+ * entities while still maintaining a bounded memory footprint.
1939
+ */
1940
+ _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
1941
+ var WikiMemory = _WikiMemory;
1480
1942
 
1481
1943
  // src/utils/formatContext.ts
1482
1944
  function validateMaxOption(value, name) {
@@ -1672,8 +2134,23 @@ function formatMemoryDump(dump) {
1672
2134
  name: formatEntityFileName(entityId),
1673
2135
  content: renderEntity(entityId, bundle, dump.generatedAt)
1674
2136
  }));
2137
+ const manifestDump = {
2138
+ generatedAt: dump.generatedAt,
2139
+ entities: Object.fromEntries(
2140
+ Object.entries(dump.entities).map(([entityId, bundle]) => [
2141
+ entityId,
2142
+ {
2143
+ ...bundle,
2144
+ facts: bundle.facts.map((f) => {
2145
+ const { embedding_blob: _blob, ...rest } = f;
2146
+ return rest;
2147
+ })
2148
+ }
2149
+ ])
2150
+ )
2151
+ };
1675
2152
  return {
1676
- manifest: JSON.stringify(dump, null, 2),
2153
+ manifest: JSON.stringify(manifestDump, null, 2),
1677
2154
  files
1678
2155
  };
1679
2156
  }