@equationalapplications/core-llm-wiki 2.6.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -18,7 +18,8 @@ async function setupDatabase(db, prefix) {
18
18
  last_accessed_at INTEGER,
19
19
  access_count INTEGER NOT NULL DEFAULT 0,
20
20
  deleted_at INTEGER,
21
- embedding TEXT
21
+ embedding TEXT,
22
+ embedding_blob BLOB
22
23
  );
23
24
 
24
25
  CREATE INDEX IF NOT EXISTS ${prefix}entries_entity_idx ON ${prefix}entries(entity_id);
@@ -91,6 +92,20 @@ var MIGRATIONS = [
91
92
  await db.execAsync(`ALTER TABLE ${prefix}entries ADD COLUMN embedding TEXT`);
92
93
  }
93
94
  }
95
+ },
96
+ {
97
+ version: 3,
98
+ description: "Add embedding_blob BLOB column for Float32Array vector storage",
99
+ run: async (db, prefix) => {
100
+ const cols = await db.getAllAsync(
101
+ `PRAGMA table_info(${prefix}entries)`
102
+ );
103
+ if (!cols.some((c) => c.name === "embedding_blob")) {
104
+ await db.execAsync(
105
+ `ALTER TABLE ${prefix}entries ADD COLUMN embedding_blob BLOB`
106
+ );
107
+ }
108
+ }
94
109
  }
95
110
  ];
96
111
  for (let i = 1; i < MIGRATIONS.length; i++) {
@@ -148,6 +163,34 @@ function cosineSimilarity(a, b) {
148
163
  return denom === 0 ? 0 : dot / denom;
149
164
  }
150
165
 
166
+ // src/utils/embedding.ts
167
+ function parseEmbedding(blob, text) {
168
+ if (blob && blob.byteLength > 0) {
169
+ if (blob.byteLength % 4 !== 0) return null;
170
+ const copy = new ArrayBuffer(blob.byteLength);
171
+ new Uint8Array(copy).set(blob);
172
+ const vector = new Float32Array(copy);
173
+ for (const value of vector) {
174
+ if (!Number.isFinite(value)) return null;
175
+ }
176
+ return vector;
177
+ }
178
+ if (text) {
179
+ try {
180
+ const arr = JSON.parse(text);
181
+ if (!Array.isArray(arr) || !arr.every((v) => typeof v === "number" && isFinite(v))) return null;
182
+ const vector = new Float32Array(arr);
183
+ for (const value of vector) {
184
+ if (!Number.isFinite(value)) return null;
185
+ }
186
+ return vector;
187
+ } catch {
188
+ return null;
189
+ }
190
+ }
191
+ return null;
192
+ }
193
+
151
194
  // src/WikiMemory.ts
152
195
  function parseJsonResponse(text) {
153
196
  const firstBrace = text.indexOf("{");
@@ -352,7 +395,7 @@ function jaccardScore(a, b) {
352
395
  }
353
396
  var FUZZY_THRESHOLD = 0.5;
354
397
  var MIN_TOKENS_TO_QUALIFY = 3;
355
- var WikiMemory = class {
398
+ var _WikiMemory = class _WikiMemory {
356
399
  constructor(db, options) {
357
400
  this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
358
401
  this.activeIngestJobs = /* @__PURE__ */ new Set();
@@ -366,6 +409,7 @@ var WikiMemory = class {
366
409
  }
367
410
  });
368
411
  this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
412
+ this.vectorCache = /* @__PURE__ */ new Map();
369
413
  this.db = db;
370
414
  this.options = options;
371
415
  this.prefix = options.config?.tablePrefix || "llm_wiki_";
@@ -432,10 +476,6 @@ var WikiMemory = class {
432
476
  `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
433
477
  [String(dim)]
434
478
  );
435
- } else {
436
- await this.db.runAsync(
437
- `DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
438
- );
439
479
  }
440
480
  } else {
441
481
  await this.db.runAsync(
@@ -454,7 +494,18 @@ var WikiMemory = class {
454
494
  const mismatch = await this.db.getFirstAsync(
455
495
  `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
456
496
  );
457
- if (mismatch) {
497
+ if (!mismatch) return;
498
+ const newDim = parseInt(mismatch.value, 10);
499
+ const residual = await this.db.getFirstAsync(
500
+ `SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
501
+ WHERE deleted_at IS NULL
502
+ AND (
503
+ (embedding_blob IS NOT NULL AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?)
504
+ OR (embedding_blob IS NULL AND embedding IS NOT NULL)
505
+ )`,
506
+ [newDim]
507
+ );
508
+ if (!residual || residual.cnt === 0) {
458
509
  await this.db.runAsync(
459
510
  `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
460
511
  [mismatch.value]
@@ -485,10 +536,23 @@ var WikiMemory = class {
485
536
  console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
486
537
  return false;
487
538
  }
488
- await this.storeEmbeddingDimension(vector.length);
539
+ const float32Vector = new Float32Array(vector);
540
+ let hasNonFinite = false;
541
+ for (let i = 0; i < float32Vector.length; i++) {
542
+ if (!isFinite(float32Vector[i])) {
543
+ hasNonFinite = true;
544
+ break;
545
+ }
546
+ }
547
+ if (hasNonFinite) {
548
+ console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
549
+ return false;
550
+ }
551
+ await this.storeEmbeddingDimension(float32Vector.length);
552
+ const blob = new Uint8Array(float32Vector.buffer);
489
553
  await this.db.runAsync(
490
- `UPDATE ${this.prefix}entries SET embedding = ? WHERE id = ?`,
491
- [JSON.stringify(vector), fact.id]
554
+ `UPDATE ${this.prefix}entries SET embedding_blob = ?, embedding = NULL WHERE id = ?`,
555
+ [blob, fact.id]
492
556
  );
493
557
  return true;
494
558
  } catch (err) {
@@ -608,9 +672,24 @@ var WikiMemory = class {
608
672
  _globalReembedKey() {
609
673
  return `${this.prefix}:reembed`;
610
674
  }
675
+ _importKey(entityId) {
676
+ return `${this.prefix}:${entityId}:import`;
677
+ }
678
+ _globalImportKey() {
679
+ return `${this.prefix}:import`;
680
+ }
681
+ _forgetKey(entityId) {
682
+ return `${this.prefix}:${entityId}:forget`;
683
+ }
611
684
  _isReembedActive(entityId) {
612
685
  return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
613
686
  }
687
+ _isImportActiveFor(entityId) {
688
+ return this.activeMaintenanceJobs.has(this._importKey(entityId)) || this.activeMaintenanceJobs.has(this._globalImportKey());
689
+ }
690
+ _isForgetActiveFor(entityId) {
691
+ return this.activeMaintenanceJobs.has(this._forgetKey(entityId));
692
+ }
614
693
  /** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
615
694
  _isAnyMaintenanceActiveWithSuffix(suffix) {
616
695
  const entityKeyPrefix = `${this.prefix}:`;
@@ -653,6 +732,10 @@ var WikiMemory = class {
653
732
  blockingOperation = "reembed";
654
733
  } else if (isIngestRunning) {
655
734
  blockingOperation = "ingest";
735
+ } else if (this._isImportActiveFor(entityId)) {
736
+ blockingOperation = "import";
737
+ } else if (this._isForgetActiveFor(entityId)) {
738
+ blockingOperation = "forget";
656
739
  }
657
740
  if (blockingOperation !== null) {
658
741
  throw new WikiBusyError(blockingOperation, entityId);
@@ -697,19 +780,27 @@ var WikiMemory = class {
697
780
  await this.db.execAsync(`VACUUM`);
698
781
  }
699
782
  await this.rebuildMiniSearchIndex(entityId);
783
+ this.vectorCache.delete(entityId);
700
784
  return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
701
785
  } finally {
702
786
  this.activeMaintenanceJobs.delete(pruneKey);
703
787
  }
704
788
  }
705
- async read(entityId, query) {
706
- const maxResults = this.options.config?.maxResults ?? this.options.config?.maxFtsResults ?? 10;
789
+ async read(entityId, query, options) {
790
+ const config = this.options.config;
791
+ const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
792
+ const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
793
+ const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
794
+ const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
795
+ const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
796
+ const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
797
+ const skipEmbed = weight === 0;
707
798
  const embedFn = this.options.llmProvider.embed;
708
799
  const trimmedQuery = query.trim();
709
800
  let facts = [];
710
- if (trimmedQuery) {
801
+ if (maxResults === 0) ; else if (trimmedQuery) {
711
802
  let usedEmbed = false;
712
- if (embedFn) {
803
+ if (!skipEmbed && embedFn) {
713
804
  try {
714
805
  const queryVec = await embedFn(trimmedQuery);
715
806
  if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
@@ -728,49 +819,138 @@ var WikiMemory = class {
728
819
  );
729
820
  }
730
821
  }
731
- const scoreRows = await this.db.getAllAsync(
732
- `SELECT id, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
733
- [entityId]
822
+ const mismatchedCount = await this.db.getFirstAsync(
823
+ `SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
824
+ WHERE entity_id = ? AND deleted_at IS NULL
825
+ AND embedding_blob IS NOT NULL
826
+ AND (CAST(length(embedding_blob) AS INTEGER) % 4 = 0)
827
+ AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?`,
828
+ [entityId, queryVec.length]
734
829
  );
735
- const scored = scoreRows.map((row) => {
736
- let score = 0;
737
- if (row.embedding) {
738
- try {
739
- const parsed = JSON.parse(row.embedding);
740
- if (Array.isArray(parsed) && parsed.length === queryVec.length && parsed.every((v) => typeof v === "number" && isFinite(v))) {
741
- score = cosineSimilarity(queryVec, parsed);
830
+ if (mismatchedCount && mismatchedCount.cnt > 0) {
831
+ throw new Error(
832
+ `Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
833
+ );
834
+ }
835
+ let candidateRows;
836
+ let populateCache = true;
837
+ let miniSearchScores;
838
+ if (effectivePreFilterLimit !== void 0) {
839
+ populateCache = false;
840
+ const preResults = this.miniSearch.search(trimmedQuery, {
841
+ filter: (r) => r.entity_id === entityId,
842
+ combineWith: "OR"
843
+ });
844
+ if (preResults.length === 0) {
845
+ candidateRows = null;
846
+ } else {
847
+ const topKResults = preResults.slice(0, effectivePreFilterLimit);
848
+ if (topKResults.length === 0) {
849
+ candidateRows = null;
850
+ } else {
851
+ const topKIds = topKResults.map((r) => r.id);
852
+ const inClauseChunkSize = 500;
853
+ candidateRows = [];
854
+ for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
855
+ const idChunk = topKIds.slice(i, i + inClauseChunkSize);
856
+ const placeholders = idChunk.map(() => "?").join(",");
857
+ const chunkRows = await this.db.getAllAsync(
858
+ `SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
859
+ idChunk
860
+ );
861
+ candidateRows.push(...chunkRows);
862
+ }
863
+ if (weight !== void 0 && weight < 1) {
864
+ const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
865
+ miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
742
866
  }
743
- } catch {
744
867
  }
745
868
  }
746
- return { row, score };
747
- });
748
- scored.sort((a, b) => {
749
- const scoreDiff = b.score - a.score;
750
- if (scoreDiff !== 0) {
751
- return scoreDiff;
869
+ } else {
870
+ candidateRows = await this.db.getAllAsync(
871
+ `SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
872
+ [entityId]
873
+ );
874
+ if (weight !== void 0 && weight < 1) {
875
+ const msResults = this.miniSearch.search(trimmedQuery, {
876
+ filter: (r) => r.entity_id === entityId,
877
+ combineWith: "OR"
878
+ });
879
+ const maxMsScore = Math.max(1, msResults[0]?.score ?? 1);
880
+ miniSearchScores = new Map(msResults.map((r) => [r.id, r.score / maxMsScore]));
752
881
  }
753
- const updatedAtDiff = (b.row.updated_at ?? 0) - (a.row.updated_at ?? 0);
754
- if (updatedAtDiff !== 0) {
755
- return updatedAtDiff;
882
+ }
883
+ if (candidateRows === null) {
884
+ usedEmbed = true;
885
+ } else {
886
+ let entityCache = this.vectorCache.get(entityId);
887
+ const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
888
+ if (tooLarge && entityCache) {
889
+ this.vectorCache.delete(entityId);
890
+ entityCache = void 0;
756
891
  }
757
- const accessCountDiff = (b.row.access_count ?? 0) - (a.row.access_count ?? 0);
758
- if (accessCountDiff !== 0) {
759
- return accessCountDiff;
892
+ const canCache = populateCache && !tooLarge;
893
+ if (canCache && !entityCache) {
894
+ entityCache = /* @__PURE__ */ new Map();
760
895
  }
761
- return a.row.id.localeCompare(b.row.id);
762
- });
763
- const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
764
- if (topIds.length > 0) {
765
- const placeholders = topIds.map(() => "?").join(",");
766
- const fullRows = await this.db.getAllAsync(
767
- `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
768
- topIds
769
- );
770
- const byId = new Map(fullRows.map((r) => [r.id, r]));
771
- facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
896
+ const scored = candidateRows.map((row) => {
897
+ let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
898
+ if (vector && canCache && entityCache && !entityCache.has(row.id)) {
899
+ entityCache.set(row.id, vector);
900
+ }
901
+ let score = 0;
902
+ if (vector && vector.length === queryVec.length) {
903
+ const cosSim = cosineSimilarity(queryVec, vector);
904
+ if (weight !== void 0) {
905
+ const kwScore = miniSearchScores?.get(row.id) ?? 0;
906
+ score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
907
+ } else {
908
+ score = cosSim;
909
+ }
910
+ } else if (weight !== void 0 && weight < 1) {
911
+ const kwScore = miniSearchScores?.get(row.id) ?? 0;
912
+ score = (1 - weight) * kwScore;
913
+ } else {
914
+ score = -2;
915
+ }
916
+ return { row, score };
917
+ });
918
+ if (canCache && entityCache && entityCache.size > 0) {
919
+ if (!this.vectorCache.has(entityId)) {
920
+ if (this.vectorCache.size >= _WikiMemory.MAX_VECTOR_CACHE_ENTITIES) {
921
+ const oldestKey = this.vectorCache.keys().next().value;
922
+ if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
923
+ }
924
+ this.vectorCache.set(entityId, entityCache);
925
+ }
926
+ }
927
+ scored.sort((a, b) => {
928
+ const scoreDiff = b.score - a.score;
929
+ if (scoreDiff !== 0) return scoreDiff;
930
+ const accessCountDiff = (b.row.access_count ?? 0) - (a.row.access_count ?? 0);
931
+ if (accessCountDiff !== 0) return accessCountDiff;
932
+ const updatedAtDiff = (b.row.updated_at ?? 0) - (a.row.updated_at ?? 0);
933
+ if (updatedAtDiff !== 0) return updatedAtDiff;
934
+ return a.row.id.localeCompare(b.row.id);
935
+ });
936
+ const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
937
+ if (topIds.length > 0) {
938
+ const fullRows = [];
939
+ const phase2ChunkSize = 500;
940
+ for (let i = 0; i < topIds.length; i += phase2ChunkSize) {
941
+ const idChunk = topIds.slice(i, i + phase2ChunkSize);
942
+ const placeholders = idChunk.map(() => "?").join(",");
943
+ const chunkRows = await this.db.getAllAsync(
944
+ `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
945
+ idChunk
946
+ );
947
+ fullRows.push(...chunkRows);
948
+ }
949
+ const byId = new Map(fullRows.map((r) => [r.id, r]));
950
+ facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
951
+ }
952
+ usedEmbed = true;
772
953
  }
773
- usedEmbed = true;
774
954
  } catch (err) {
775
955
  const error = err instanceof Error ? err : new Error(String(err));
776
956
  this.options.onRetrievalFallback?.(error);
@@ -783,25 +963,35 @@ var WikiMemory = class {
783
963
  });
784
964
  const topIds = results.slice(0, maxResults).map((r) => r.id);
785
965
  if (topIds.length > 0) {
786
- const placeholders = topIds.map(() => "?").join(",");
787
- const rows = await this.db.getAllAsync(
788
- `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
789
- topIds
790
- );
791
- const byId = new Map(rows.map((r) => [r.id, r]));
966
+ const kwRows = [];
967
+ const kwChunkSize = 500;
968
+ for (let i = 0; i < topIds.length; i += kwChunkSize) {
969
+ const idChunk = topIds.slice(i, i + kwChunkSize);
970
+ const placeholders = idChunk.map(() => "?").join(",");
971
+ const chunkRows = await this.db.getAllAsync(
972
+ `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
973
+ idChunk
974
+ );
975
+ kwRows.push(...chunkRows);
976
+ }
977
+ const byId = new Map(kwRows.map((r) => [r.id, r]));
792
978
  facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
793
979
  }
794
980
  }
795
981
  if (facts.length > 0) {
796
982
  const ids = facts.map((f) => f.id);
797
- const placeholders = ids.map(() => "?").join(",");
798
983
  const now = Date.now();
799
- await this.db.runAsync(
800
- `UPDATE ${this.prefix}entries
801
- SET access_count = access_count + 1, last_accessed_at = ?
802
- WHERE id IN (${placeholders})`,
803
- [now, ...ids]
804
- );
984
+ const accessChunkSize = 500;
985
+ for (let i = 0; i < ids.length; i += accessChunkSize) {
986
+ const idChunk = ids.slice(i, i + accessChunkSize);
987
+ const placeholders = idChunk.map(() => "?").join(",");
988
+ await this.db.runAsync(
989
+ `UPDATE ${this.prefix}entries
990
+ SET access_count = access_count + 1, last_accessed_at = ?
991
+ WHERE id IN (${placeholders})`,
992
+ [now, ...idChunk]
993
+ );
994
+ }
805
995
  }
806
996
  } else {
807
997
  facts = await this.db.getAllAsync(
@@ -828,7 +1018,7 @@ var WikiMemory = class {
828
1018
  )
829
1019
  ]);
830
1020
  const parsedFacts = facts.map((f) => {
831
- const { embedding: _embedding, ...rest } = f;
1021
+ const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
832
1022
  return {
833
1023
  ...rest,
834
1024
  tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
@@ -860,7 +1050,7 @@ var WikiMemory = class {
860
1050
  if (memoryCheckpoint > count) memoryCheckpoint = 0;
861
1051
  if (count - memoryCheckpoint >= threshold) {
862
1052
  const jobKey = this._librarianKey(entityId);
863
- if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
1053
+ if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId)) && !this._isReembedActive(entityId) && !this._isImportActiveFor(entityId) && !this._isForgetActiveFor(entityId)) {
864
1054
  this.activeMaintenanceJobs.add(jobKey);
865
1055
  this.runLibrarianThenMaybeHeal(entityId, count).catch(console.error).finally(() => this.activeMaintenanceJobs.delete(jobKey));
866
1056
  }
@@ -908,7 +1098,7 @@ var WikiMemory = class {
908
1098
  LIMIT 100
909
1099
  `, [entityId]);
910
1100
  const currentFacts = currentFactsRows.map((f) => {
911
- const { embedding: _embedding, ...rest } = f;
1101
+ const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
912
1102
  return {
913
1103
  ...rest,
914
1104
  tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
@@ -962,10 +1152,12 @@ ${JSON.stringify(currentFacts, null, 2)}`;
962
1152
  `, [id, entityId, task.description, "pending", task.priority, now, now]);
963
1153
  }
964
1154
  });
1155
+ await this.rebuildMiniSearchIndex(entityId);
1156
+ this.vectorCache.delete(entityId);
965
1157
  for (const fact of insertedFacts) {
966
1158
  await this.embedFact(fact);
967
1159
  }
968
- await this.rebuildMiniSearchIndex(entityId);
1160
+ this.vectorCache.delete(entityId);
969
1161
  }
970
1162
  async _doRunHeal(entityId) {
971
1163
  const now = Date.now();
@@ -1003,7 +1195,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
1003
1195
  const documentAnchors = allFactsRows.filter((f) => f.source_type === "user_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
1004
1196
  const userPrompt = `Heal Candidates:
1005
1197
  ${JSON.stringify(healCandidates.map((f) => {
1006
- const { embedding: _embedding, ...rest } = f;
1198
+ const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
1007
1199
  return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
1008
1200
  }), null, 2)}
1009
1201
 
@@ -1046,10 +1238,12 @@ The following document anchors are provided for contradiction detection only. Do
1046
1238
  insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
1047
1239
  }
1048
1240
  });
1241
+ this.vectorCache.delete(entityId);
1242
+ await this.rebuildMiniSearchIndex(entityId);
1049
1243
  for (const fact of insertedFacts) {
1050
1244
  await this.embedFact(fact);
1051
1245
  }
1052
- await this.rebuildMiniSearchIndex(entityId);
1246
+ this.vectorCache.delete(entityId);
1053
1247
  }
1054
1248
  async runLibrarian(entityId) {
1055
1249
  const jobKey = this._librarianKey(entityId);
@@ -1062,6 +1256,12 @@ The following document anchors are provided for contradiction detection only. Do
1062
1256
  if (this._isReembedActive(entityId)) {
1063
1257
  throw new WikiBusyError("reembed", entityId);
1064
1258
  }
1259
+ if (this._isImportActiveFor(entityId)) {
1260
+ throw new WikiBusyError("import", entityId);
1261
+ }
1262
+ if (this._isForgetActiveFor(entityId)) {
1263
+ throw new WikiBusyError("forget", entityId);
1264
+ }
1065
1265
  this.activeMaintenanceJobs.add(jobKey);
1066
1266
  try {
1067
1267
  await this._doRunLibrarian(entityId);
@@ -1080,6 +1280,12 @@ The following document anchors are provided for contradiction detection only. Do
1080
1280
  if (this._isReembedActive(entityId)) {
1081
1281
  throw new WikiBusyError("reembed", entityId);
1082
1282
  }
1283
+ if (this._isImportActiveFor(entityId)) {
1284
+ throw new WikiBusyError("import", entityId);
1285
+ }
1286
+ if (this._isForgetActiveFor(entityId)) {
1287
+ throw new WikiBusyError("forget", entityId);
1288
+ }
1083
1289
  this.activeMaintenanceJobs.add(jobKey);
1084
1290
  try {
1085
1291
  await this._doRunHeal(entityId);
@@ -1087,9 +1293,9 @@ The following document anchors are provided for contradiction detection only. Do
1087
1293
  this.activeMaintenanceJobs.delete(jobKey);
1088
1294
  }
1089
1295
  }
1090
- async runReembed(entityId) {
1296
+ async runReembed(entityId, opts) {
1091
1297
  const embedFn = this.options.llmProvider.embed;
1092
- if (!embedFn) return { embedded: 0, skipped: 0 };
1298
+ if (!embedFn) return { embedded: 0, skipped: 0, failed: 0 };
1093
1299
  const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
1094
1300
  if (this.activeMaintenanceJobs.has(reembedKey)) {
1095
1301
  throw new WikiBusyError("reembed", entityId ?? "*");
@@ -1110,6 +1316,12 @@ The following document anchors are provided for contradiction detection only. Do
1110
1316
  if (this._isIngestActiveFor(entityId)) {
1111
1317
  throw new WikiBusyError("ingest", entityId);
1112
1318
  }
1319
+ if (this._isImportActiveFor(entityId)) {
1320
+ throw new WikiBusyError("import", entityId);
1321
+ }
1322
+ if (this._isForgetActiveFor(entityId)) {
1323
+ throw new WikiBusyError("forget", entityId);
1324
+ }
1113
1325
  } else {
1114
1326
  if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
1115
1327
  throw new WikiBusyError("reembed", "*");
@@ -1126,6 +1338,12 @@ The following document anchors are provided for contradiction detection only. Do
1126
1338
  if (this.activeIngestJobs.size > 0) {
1127
1339
  throw new WikiBusyError("ingest", "*");
1128
1340
  }
1341
+ if (this._isAnyMaintenanceActiveWithSuffix(":import")) {
1342
+ throw new WikiBusyError("import", "*");
1343
+ }
1344
+ if (this._isAnyMaintenanceActiveWithSuffix(":forget")) {
1345
+ throw new WikiBusyError("forget", "*");
1346
+ }
1129
1347
  }
1130
1348
  this.activeMaintenanceJobs.add(reembedKey);
1131
1349
  try {
@@ -1135,17 +1353,64 @@ The following document anchors are provided for contradiction detection only. Do
1135
1353
  `SELECT * FROM ${this.prefix}entries WHERE ${where}`,
1136
1354
  params
1137
1355
  );
1356
+ if (entityId) {
1357
+ this.vectorCache.delete(entityId);
1358
+ } else {
1359
+ this.vectorCache.clear();
1360
+ }
1361
+ const skipExisting = opts?.skipExisting ?? false;
1362
+ let effectiveSkip = skipExisting;
1363
+ if (skipExisting) {
1364
+ const mismatchRow = await this.db.getFirstAsync(
1365
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
1366
+ );
1367
+ if (mismatchRow) {
1368
+ if (entityId) {
1369
+ const mismatchDim = parseInt(mismatchRow.value, 10);
1370
+ const staleForEntity = await this.db.getFirstAsync(
1371
+ `SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
1372
+ WHERE entity_id = ? AND deleted_at IS NULL
1373
+ AND (
1374
+ embedding_blob IS NULL
1375
+ OR (CAST(length(embedding_blob) AS INTEGER) / 4) != ?
1376
+ )`,
1377
+ [entityId, mismatchDim]
1378
+ );
1379
+ if (staleForEntity && staleForEntity.cnt > 0) effectiveSkip = false;
1380
+ } else {
1381
+ effectiveSkip = false;
1382
+ }
1383
+ }
1384
+ }
1138
1385
  let embedded = 0;
1139
1386
  let skipped = 0;
1140
- for (const row of rows) {
1141
- const success = await this.embedFact(row);
1142
- if (success) embedded++;
1143
- else skipped++;
1144
- }
1145
- if (embedded > 0) {
1146
- await this._reconcileEmbeddingDimension();
1387
+ let failed = 0;
1388
+ try {
1389
+ for (const row of rows) {
1390
+ const existingBlob = row.embedding_blob;
1391
+ const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
1392
+ if (effectiveSkip && blobIsValid) {
1393
+ const vec = parseEmbedding(existingBlob, null);
1394
+ if (vec !== null && vec.every((v) => Number.isFinite(v))) {
1395
+ skipped++;
1396
+ continue;
1397
+ }
1398
+ }
1399
+ const success = await this.embedFact(row);
1400
+ if (success) embedded++;
1401
+ else failed++;
1402
+ }
1403
+ if (embedded > 0) {
1404
+ await this._reconcileEmbeddingDimension();
1405
+ }
1406
+ } finally {
1407
+ if (entityId) {
1408
+ this.vectorCache.delete(entityId);
1409
+ } else {
1410
+ this.vectorCache.clear();
1411
+ }
1147
1412
  }
1148
- return { embedded, skipped };
1413
+ return { embedded, skipped, failed };
1149
1414
  } finally {
1150
1415
  this.activeMaintenanceJobs.delete(reembedKey);
1151
1416
  }
@@ -1165,6 +1430,9 @@ The following document anchors are provided for contradiction detection only. Do
1165
1430
  heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
1166
1431
  };
1167
1432
  }
1433
+ clearVectorCache() {
1434
+ this.vectorCache.clear();
1435
+ }
1168
1436
  async _getFullBundle(entityId, opts) {
1169
1437
  const maxEvents = opts?.maxEvents;
1170
1438
  const eventsQuery = maxEvents != null ? `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at ASC`;
@@ -1181,10 +1449,16 @@ The following document anchors are provided for contradiction detection only. Do
1181
1449
  this.db.getAllAsync(eventsQuery, eventsParams)
1182
1450
  ]);
1183
1451
  const facts = factsRaw.map((f) => {
1184
- const { embedding: _embedding, ...rest } = f;
1452
+ const { embedding: _embedding, embedding_blob, ...rest } = f;
1453
+ const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
1454
+ const c = new ArrayBuffer(embedding_blob.byteLength);
1455
+ new Uint8Array(c).set(embedding_blob);
1456
+ return new Uint8Array(c);
1457
+ })() : void 0;
1458
+ const factBase = safeBlobCopy ? { ...rest, embedding_blob: safeBlobCopy } : rest;
1185
1459
  return {
1186
- ...rest,
1187
- tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
1460
+ ...factBase,
1461
+ tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
1188
1462
  };
1189
1463
  });
1190
1464
  const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
@@ -1211,7 +1485,7 @@ The following document anchors are provided for contradiction detection only. Do
1211
1485
  for (let i = 0; i < ids.length; i += BATCH) {
1212
1486
  const batch = ids.slice(i, i + BATCH);
1213
1487
  const batchResults = await Promise.all(
1214
- batch.map(async (id) => [id, await this._getFullBundle(id)])
1488
+ batch.map(async (id) => [id, await this._getFullBundle(id, { includeBlobs: true })])
1215
1489
  );
1216
1490
  for (const [id, bundle] of batchResults) {
1217
1491
  entities[id] = bundle;
@@ -1221,172 +1495,339 @@ The following document anchors are provided for contradiction detection only. Do
1221
1495
  }
1222
1496
  async importDump(dump, opts) {
1223
1497
  const merge = opts?.merge ?? false;
1224
- for (const [entityId, bundle] of Object.entries(dump.entities)) {
1225
- await this.db.withTransactionAsync(async () => {
1226
- if (!merge) {
1227
- const now = Date.now();
1228
- await this.db.runAsync(
1229
- `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
1230
- [now, now, entityId]
1231
- );
1232
- await this.db.runAsync(
1233
- `UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
1234
- [now, now, entityId]
1235
- );
1236
- await this.db.runAsync(
1237
- `DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
1238
- [entityId]
1239
- );
1498
+ const entityIds = Object.keys(dump.entities);
1499
+ for (const entityId of entityIds) {
1500
+ if (this.activeMaintenanceJobs.has(this._importKey(entityId))) {
1501
+ throw new WikiBusyError("import", entityId);
1502
+ }
1503
+ if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
1504
+ throw new WikiBusyError("librarian", entityId);
1505
+ }
1506
+ if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
1507
+ throw new WikiBusyError("heal", entityId);
1508
+ }
1509
+ if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
1510
+ throw new WikiBusyError("prune", entityId);
1511
+ }
1512
+ if (this._isReembedActive(entityId)) {
1513
+ throw new WikiBusyError("reembed", entityId);
1514
+ }
1515
+ if (this._isIngestActiveFor(entityId)) {
1516
+ throw new WikiBusyError("ingest", entityId);
1517
+ }
1518
+ if (this._isForgetActiveFor(entityId)) {
1519
+ throw new WikiBusyError("forget", entityId);
1520
+ }
1521
+ }
1522
+ if (this.activeMaintenanceJobs.has(this._globalImportKey())) {
1523
+ throw new WikiBusyError("import", "*");
1524
+ }
1525
+ this.activeMaintenanceJobs.add(this._globalImportKey());
1526
+ for (const entityId of entityIds) {
1527
+ this.activeMaintenanceJobs.add(this._importKey(entityId));
1528
+ }
1529
+ try {
1530
+ for (const [entityId, bundle] of Object.entries(dump.entities)) {
1531
+ await this._doImportEntity(entityId, bundle, merge);
1532
+ }
1533
+ } finally {
1534
+ this.activeMaintenanceJobs.delete(this._globalImportKey());
1535
+ for (const entityId of entityIds) {
1536
+ this.activeMaintenanceJobs.delete(this._importKey(entityId));
1537
+ }
1538
+ }
1539
+ }
1540
+ async _doImportEntity(entityId, bundle, merge) {
1541
+ const upsertedFactIds = /* @__PURE__ */ new Set();
1542
+ const factsWithPreservedBlob = /* @__PURE__ */ new Set();
1543
+ const preservedBlobDims = /* @__PURE__ */ new Set();
1544
+ await this.db.withTransactionAsync(async () => {
1545
+ if (!merge) {
1546
+ const now = Date.now();
1547
+ await this.db.runAsync(
1548
+ `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
1549
+ [now, now, entityId]
1550
+ );
1551
+ await this.db.runAsync(
1552
+ `UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
1553
+ [now, now, entityId]
1554
+ );
1555
+ await this.db.runAsync(
1556
+ `DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
1557
+ [entityId]
1558
+ );
1559
+ }
1560
+ const factIds = bundle.facts.map((fact) => fact.id);
1561
+ const existingFactsById = /* @__PURE__ */ new Map();
1562
+ const factLookupChunkSize = 500;
1563
+ for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
1564
+ const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
1565
+ if (factIdChunk.length === 0) continue;
1566
+ const placeholders = factIdChunk.map(() => "?").join(", ");
1567
+ const existingFacts = await this.db.getAllAsync(
1568
+ `SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
1569
+ factIdChunk
1570
+ );
1571
+ for (const existingFact of existingFacts) {
1572
+ existingFactsById.set(existingFact.id, existingFact);
1240
1573
  }
1241
- const factIds = bundle.facts.map((fact) => fact.id);
1242
- const existingFactsById = /* @__PURE__ */ new Map();
1243
- const factLookupChunkSize = 500;
1244
- for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
1245
- const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
1246
- if (factIdChunk.length === 0) continue;
1247
- const placeholders = factIdChunk.map(() => "?").join(", ");
1248
- const existingFacts = await this.db.getAllAsync(
1249
- `SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
1250
- factIdChunk
1251
- );
1252
- for (const existingFact of existingFacts) {
1253
- existingFactsById.set(existingFact.id, existingFact);
1574
+ }
1575
+ for (const fact of bundle.facts) {
1576
+ const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
1577
+ const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
1578
+ const existing = existingFactsById.get(fact.id);
1579
+ const rawBlobRaw = fact.embedding_blob;
1580
+ let rawBlob = null;
1581
+ if (rawBlobRaw instanceof Uint8Array) {
1582
+ rawBlob = rawBlobRaw;
1583
+ } else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
1584
+ const obj = rawBlobRaw;
1585
+ if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
1586
+ rawBlob = new Uint8Array(obj["data"]);
1587
+ } else if (!Array.isArray(rawBlobRaw)) {
1588
+ const entries = Object.keys(obj);
1589
+ if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
1590
+ const len = entries.length;
1591
+ rawBlob = new Uint8Array(len);
1592
+ for (let i = 0; i < len; i++) rawBlob[i] = obj[String(i)] ?? 0;
1593
+ }
1254
1594
  }
1255
1595
  }
1256
- for (const fact of bundle.facts) {
1257
- const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
1258
- const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
1259
- const existing = existingFactsById.get(fact.id);
1260
- if (existing) {
1261
- if (existing.entity_id !== entityId) {
1262
- this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
1263
- continue;
1264
- }
1265
- if (merge) {
1266
- if (safeUpdatedAt <= existing.updated_at) continue;
1596
+ let blobData = null;
1597
+ if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
1598
+ const copy = new ArrayBuffer(rawBlob.byteLength);
1599
+ new Uint8Array(copy).set(rawBlob);
1600
+ const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
1601
+ let allFinite = true;
1602
+ for (let i = 0; i < floats.length; i++) {
1603
+ if (!isFinite(floats[i])) {
1604
+ allFinite = false;
1605
+ break;
1267
1606
  }
1607
+ }
1608
+ if (allFinite) {
1609
+ blobData = rawBlob;
1610
+ }
1611
+ }
1612
+ if (existing) {
1613
+ if (existing.entity_id !== entityId) {
1614
+ this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
1615
+ continue;
1616
+ }
1617
+ if (merge) {
1618
+ if (safeUpdatedAt <= existing.updated_at) continue;
1619
+ }
1620
+ if (blobData != null) {
1621
+ await this.db.runAsync(
1622
+ `UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = ?, embedding = NULL WHERE id = ?`,
1623
+ [entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData, fact.id]
1624
+ );
1625
+ factsWithPreservedBlob.add(fact.id);
1626
+ if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
1627
+ } else {
1268
1628
  await this.db.runAsync(
1269
- `UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ? WHERE id = ?`,
1629
+ `UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = NULL, embedding = NULL WHERE id = ?`,
1270
1630
  [entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, fact.id]
1271
1631
  );
1272
- existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
1632
+ }
1633
+ existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
1634
+ upsertedFactIds.add(fact.id);
1635
+ } else {
1636
+ if (blobData != null) {
1637
+ await this.db.runAsync(
1638
+ `INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at, embedding_blob) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1639
+ [fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData]
1640
+ );
1641
+ factsWithPreservedBlob.add(fact.id);
1642
+ if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
1273
1643
  } else {
1274
1644
  await this.db.runAsync(
1275
1645
  `INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1276
1646
  [fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at]
1277
1647
  );
1278
- existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
1279
1648
  }
1649
+ existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
1650
+ upsertedFactIds.add(fact.id);
1280
1651
  }
1281
- const taskIds = bundle.tasks.map((task) => task.id);
1282
- const existingTasksById = /* @__PURE__ */ new Map();
1283
- const taskLookupChunkSize = 500;
1284
- for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
1285
- const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
1286
- if (taskIdChunk.length === 0) continue;
1287
- const placeholders = taskIdChunk.map(() => "?").join(", ");
1288
- const existingTasks = await this.db.getAllAsync(
1289
- `SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
1290
- taskIdChunk
1291
- );
1292
- for (const existingTask of existingTasks) {
1293
- existingTasksById.set(existingTask.id, existingTask);
1652
+ }
1653
+ const taskIds = bundle.tasks.map((task) => task.id);
1654
+ const existingTasksById = /* @__PURE__ */ new Map();
1655
+ const taskLookupChunkSize = 500;
1656
+ for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
1657
+ const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
1658
+ if (taskIdChunk.length === 0) continue;
1659
+ const placeholders = taskIdChunk.map(() => "?").join(", ");
1660
+ const existingTasks = await this.db.getAllAsync(
1661
+ `SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
1662
+ taskIdChunk
1663
+ );
1664
+ for (const existingTask of existingTasks) {
1665
+ existingTasksById.set(existingTask.id, existingTask);
1666
+ }
1667
+ }
1668
+ for (const task of bundle.tasks) {
1669
+ const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
1670
+ const existing = existingTasksById.get(task.id);
1671
+ if (existing) {
1672
+ if (existing.entity_id !== entityId) {
1673
+ this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
1674
+ continue;
1675
+ }
1676
+ if (merge) {
1677
+ if (safeUpdatedAt <= existing.updated_at) continue;
1294
1678
  }
1679
+ await this.db.runAsync(
1680
+ `UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
1681
+ [entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
1682
+ );
1683
+ existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
1684
+ } else {
1685
+ await this.db.runAsync(
1686
+ `INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1687
+ [task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
1688
+ );
1689
+ existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
1295
1690
  }
1296
- for (const task of bundle.tasks) {
1297
- const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
1298
- const existing = existingTasksById.get(task.id);
1299
- if (existing) {
1300
- if (existing.entity_id !== entityId) {
1301
- this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
1302
- continue;
1303
- }
1304
- if (merge) {
1305
- if (safeUpdatedAt <= existing.updated_at) continue;
1306
- }
1307
- await this.db.runAsync(
1308
- `UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
1309
- [entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
1310
- );
1311
- existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
1312
- } else {
1691
+ }
1692
+ for (const event of bundle.events) {
1693
+ await this.db.runAsync(
1694
+ `INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
1695
+ VALUES (?, ?, ?, ?, ?, ?)`,
1696
+ [event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
1697
+ );
1698
+ }
1699
+ });
1700
+ this.vectorCache.delete(entityId);
1701
+ await this.rebuildMiniSearchIndex(entityId);
1702
+ for (const fact of bundle.facts) {
1703
+ if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
1704
+ await this.embedFact({
1705
+ id: fact.id,
1706
+ title: fact.title,
1707
+ body: fact.body,
1708
+ tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
1709
+ });
1710
+ }
1711
+ }
1712
+ try {
1713
+ const canonicalRow = await this.db.getFirstAsync(
1714
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
1715
+ );
1716
+ const canonicalDim = canonicalRow ? parseInt(canonicalRow.value, 10) : null;
1717
+ if (preservedBlobDims.size === 1) {
1718
+ const preservedDim = [...preservedBlobDims][0];
1719
+ if (canonicalDim === null || canonicalDim === preservedDim) {
1720
+ await this.storeEmbeddingDimension(preservedDim);
1721
+ const staleMismatch = await this.db.getFirstAsync(
1722
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
1723
+ );
1724
+ if (staleMismatch && parseInt(staleMismatch.value, 10) !== preservedDim) {
1313
1725
  await this.db.runAsync(
1314
- `INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1315
- [task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
1726
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
1727
+ [String(preservedDim)]
1316
1728
  );
1317
- existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
1318
1729
  }
1319
- }
1320
- for (const event of bundle.events) {
1730
+ await this._reconcileEmbeddingDimension();
1731
+ } else {
1321
1732
  await this.db.runAsync(
1322
- `INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
1323
- VALUES (?, ?, ?, ?, ?, ?)`,
1324
- [event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
1733
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
1734
+ [String(canonicalDim)]
1325
1735
  );
1326
1736
  }
1327
- });
1328
- for (const fact of bundle.facts) {
1329
- if (!fact.deleted_at) {
1330
- await this.embedFact({
1331
- id: fact.id,
1332
- title: fact.title,
1333
- body: fact.body,
1334
- tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
1335
- });
1737
+ } else if (preservedBlobDims.size > 1) {
1738
+ if (canonicalDim === null) {
1739
+ const sortedPreservedBlobDims = [...preservedBlobDims].sort((a, b) => a - b);
1740
+ await this.storeEmbeddingDimension(sortedPreservedBlobDims[0]);
1741
+ await this.db.runAsync(
1742
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
1743
+ [String(sortedPreservedBlobDims[0])]
1744
+ );
1745
+ } else {
1746
+ await this.db.runAsync(
1747
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
1748
+ [String(canonicalDim)]
1749
+ );
1336
1750
  }
1337
1751
  }
1752
+ } finally {
1753
+ this.vectorCache.delete(entityId);
1338
1754
  }
1339
- await this.rebuildMiniSearchIndex();
1340
1755
  }
1341
1756
  async forget(entityId, params) {
1342
- const now = Date.now();
1343
- let deletedEntries = 0;
1344
- let deletedTasks = 0;
1345
- if (params.clearAll) {
1346
- const [entriesRes, tasksRes] = await Promise.all([
1347
- this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId]),
1348
- this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId])
1349
- ]);
1350
- await this.db.runAsync(`UPDATE ${this.prefix}checkpoints SET memory_checkpoint = 0, heal_checkpoint = 0 WHERE entity_id = ?`, [entityId]);
1351
- deletedEntries = entriesRes.changes;
1352
- deletedTasks = tasksRes.changes;
1353
- } else {
1354
- const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
1355
- const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
1356
- if (hasIdSelectors && hasSourceSelectors) {
1357
- throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
1358
- }
1359
- const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
1360
- if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
1361
- const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
1362
- if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
1363
- const entryPromise = params.entryId ? this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.entryId, entityId]) : null;
1364
- const taskPromise = params.taskId ? this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.taskId, entityId]) : null;
1365
- let refPromise = null;
1366
- if (sourceRef || sourceHash) {
1367
- let q = `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`;
1368
- const args = [now, now, entityId];
1369
- if (sourceRef) {
1370
- q += ` AND source_ref = ?`;
1371
- args.push(sourceRef);
1757
+ let blockingOperation = null;
1758
+ if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
1759
+ blockingOperation = "librarian";
1760
+ } else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
1761
+ blockingOperation = "heal";
1762
+ } else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
1763
+ blockingOperation = "prune";
1764
+ } else if (this._isReembedActive(entityId)) {
1765
+ blockingOperation = "reembed";
1766
+ } else if (this._isIngestActiveFor(entityId)) {
1767
+ blockingOperation = "ingest";
1768
+ } else if (this._isImportActiveFor(entityId)) {
1769
+ blockingOperation = "import";
1770
+ } else if (this._isForgetActiveFor(entityId)) {
1771
+ blockingOperation = "forget";
1772
+ }
1773
+ if (blockingOperation !== null) {
1774
+ throw new WikiBusyError(blockingOperation, entityId);
1775
+ }
1776
+ const forgetKey = this._forgetKey(entityId);
1777
+ this.activeMaintenanceJobs.add(forgetKey);
1778
+ try {
1779
+ const now = Date.now();
1780
+ let deletedEntries = 0;
1781
+ let deletedTasks = 0;
1782
+ if (params.clearAll) {
1783
+ const [entriesRes, tasksRes] = await Promise.all([
1784
+ this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId]),
1785
+ this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId])
1786
+ ]);
1787
+ await this.db.runAsync(`UPDATE ${this.prefix}checkpoints SET memory_checkpoint = 0, heal_checkpoint = 0 WHERE entity_id = ?`, [entityId]);
1788
+ deletedEntries = entriesRes.changes;
1789
+ deletedTasks = tasksRes.changes;
1790
+ } else {
1791
+ const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
1792
+ const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
1793
+ if (hasIdSelectors && hasSourceSelectors) {
1794
+ throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
1372
1795
  }
1373
- if (sourceHash) {
1374
- q += ` AND source_hash = ?`;
1375
- args.push(sourceHash);
1796
+ const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
1797
+ if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
1798
+ const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
1799
+ if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
1800
+ const entryPromise = params.entryId ? this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.entryId, entityId]) : null;
1801
+ const taskPromise = params.taskId ? this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.taskId, entityId]) : null;
1802
+ let refPromise = null;
1803
+ if (sourceRef || sourceHash) {
1804
+ let q = `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`;
1805
+ const args = [now, now, entityId];
1806
+ if (sourceRef) {
1807
+ q += ` AND source_ref = ?`;
1808
+ args.push(sourceRef);
1809
+ }
1810
+ if (sourceHash) {
1811
+ q += ` AND source_hash = ?`;
1812
+ args.push(sourceHash);
1813
+ }
1814
+ refPromise = this.db.runAsync(q, args);
1376
1815
  }
1377
- refPromise = this.db.runAsync(q, args);
1816
+ const [entryResult, taskResult, refResult] = await Promise.all([
1817
+ entryPromise ?? Promise.resolve(null),
1818
+ taskPromise ?? Promise.resolve(null),
1819
+ refPromise ?? Promise.resolve(null)
1820
+ ]);
1821
+ if (entryResult) deletedEntries += entryResult.changes;
1822
+ if (taskResult) deletedTasks += taskResult.changes;
1823
+ if (refResult) deletedEntries += refResult.changes;
1378
1824
  }
1379
- const [entryResult, taskResult, refResult] = await Promise.all([
1380
- entryPromise ?? Promise.resolve(null),
1381
- taskPromise ?? Promise.resolve(null),
1382
- refPromise ?? Promise.resolve(null)
1383
- ]);
1384
- if (entryResult) deletedEntries += entryResult.changes;
1385
- if (taskResult) deletedTasks += taskResult.changes;
1386
- if (refResult) deletedEntries += refResult.changes;
1825
+ await this.rebuildMiniSearchIndex(entityId);
1826
+ this.vectorCache.delete(entityId);
1827
+ return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
1828
+ } finally {
1829
+ this.activeMaintenanceJobs.delete(forgetKey);
1387
1830
  }
1388
- await this.rebuildMiniSearchIndex(entityId);
1389
- return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
1390
1831
  }
1391
1832
  async ingestDocument(entityId, params) {
1392
1833
  const sourceRef = normalizeSourceRef(params.sourceRef);
@@ -1414,6 +1855,12 @@ The following document anchors are provided for contradiction detection only. Do
1414
1855
  if (this._isReembedActive(entityId)) {
1415
1856
  throw new WikiBusyError("reembed", entityId);
1416
1857
  }
1858
+ if (this._isImportActiveFor(entityId)) {
1859
+ throw new WikiBusyError("import", entityId);
1860
+ }
1861
+ if (this._isForgetActiveFor(entityId)) {
1862
+ throw new WikiBusyError("forget", entityId);
1863
+ }
1417
1864
  this.activeIngestJobs.add(jobKey);
1418
1865
  try {
1419
1866
  const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
@@ -1461,16 +1908,31 @@ ${chunk}`;
1461
1908
  insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
1462
1909
  }
1463
1910
  });
1911
+ await this.rebuildMiniSearchIndex(entityId);
1912
+ this.vectorCache.delete(entityId);
1464
1913
  for (const fact of insertedFacts) {
1465
1914
  await this.embedFact(fact);
1466
1915
  }
1467
- await this.rebuildMiniSearchIndex(entityId);
1916
+ this.vectorCache.delete(entityId);
1468
1917
  return { truncated, chunks: chunks.length };
1469
1918
  } finally {
1470
1919
  this.activeIngestJobs.delete(jobKey);
1471
1920
  }
1472
1921
  }
1473
1922
  };
1923
+ /**
1924
+ * Maximum number of entities whose parsed embedding vectors are held in
1925
+ * memory. This cap is intentionally conservative so the cache remains safe
1926
+ * on memory-constrained runtimes (e.g., mobile/Expo).
1927
+ */
1928
+ _WikiMemory.MAX_VECTOR_CACHE_ENTITIES = 16;
1929
+ /**
1930
+ * Maximum number of fact vectors cached per entity. Keep this high enough to
1931
+ * preserve the parsed-embedding reuse optimization for common mid-sized
1932
+ * entities while still maintaining a bounded memory footprint.
1933
+ */
1934
+ _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
1935
+ var WikiMemory = _WikiMemory;
1474
1936
 
1475
1937
  // src/utils/formatContext.ts
1476
1938
  function validateMaxOption(value, name) {
@@ -1666,8 +2128,23 @@ function formatMemoryDump(dump) {
1666
2128
  name: formatEntityFileName(entityId),
1667
2129
  content: renderEntity(entityId, bundle, dump.generatedAt)
1668
2130
  }));
2131
+ const manifestDump = {
2132
+ generatedAt: dump.generatedAt,
2133
+ entities: Object.fromEntries(
2134
+ Object.entries(dump.entities).map(([entityId, bundle]) => [
2135
+ entityId,
2136
+ {
2137
+ ...bundle,
2138
+ facts: bundle.facts.map((f) => {
2139
+ const { embedding_blob: _blob, ...rest } = f;
2140
+ return rest;
2141
+ })
2142
+ }
2143
+ ])
2144
+ )
2145
+ };
1669
2146
  return {
1670
- manifest: JSON.stringify(dump, null, 2),
2147
+ manifest: JSON.stringify(manifestDump, null, 2),
1671
2148
  files
1672
2149
  };
1673
2150
  }