@equationalapplications/core-llm-wiki 2.6.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -18,7 +18,8 @@ async function setupDatabase(db, prefix) {
18
18
  last_accessed_at INTEGER,
19
19
  access_count INTEGER NOT NULL DEFAULT 0,
20
20
  deleted_at INTEGER,
21
- embedding TEXT
21
+ embedding TEXT,
22
+ embedding_blob BLOB
22
23
  );
23
24
 
24
25
  CREATE INDEX IF NOT EXISTS ${prefix}entries_entity_idx ON ${prefix}entries(entity_id);
@@ -91,6 +92,20 @@ var MIGRATIONS = [
91
92
  await db.execAsync(`ALTER TABLE ${prefix}entries ADD COLUMN embedding TEXT`);
92
93
  }
93
94
  }
95
+ },
96
+ {
97
+ version: 3,
98
+ description: "Add embedding_blob BLOB column for Float32Array vector storage",
99
+ run: async (db, prefix) => {
100
+ const cols = await db.getAllAsync(
101
+ `PRAGMA table_info(${prefix}entries)`
102
+ );
103
+ if (!cols.some((c) => c.name === "embedding_blob")) {
104
+ await db.execAsync(
105
+ `ALTER TABLE ${prefix}entries ADD COLUMN embedding_blob BLOB`
106
+ );
107
+ }
108
+ }
94
109
  }
95
110
  ];
96
111
  for (let i = 1; i < MIGRATIONS.length; i++) {
@@ -148,6 +163,34 @@ function cosineSimilarity(a, b) {
148
163
  return denom === 0 ? 0 : dot / denom;
149
164
  }
150
165
 
166
+ // src/utils/embedding.ts
167
+ function parseEmbedding(blob, text) {
168
+ if (blob && blob.byteLength > 0) {
169
+ if (blob.byteLength % 4 !== 0) return null;
170
+ const copy = new ArrayBuffer(blob.byteLength);
171
+ new Uint8Array(copy).set(blob);
172
+ const vector = new Float32Array(copy);
173
+ for (const value of vector) {
174
+ if (!Number.isFinite(value)) return null;
175
+ }
176
+ return vector;
177
+ }
178
+ if (text) {
179
+ try {
180
+ const arr = JSON.parse(text);
181
+ if (!Array.isArray(arr) || !arr.every((v) => typeof v === "number" && isFinite(v))) return null;
182
+ const vector = new Float32Array(arr);
183
+ for (const value of vector) {
184
+ if (!Number.isFinite(value)) return null;
185
+ }
186
+ return vector;
187
+ } catch {
188
+ return null;
189
+ }
190
+ }
191
+ return null;
192
+ }
193
+
151
194
  // src/WikiMemory.ts
152
195
  function parseJsonResponse(text) {
153
196
  const firstBrace = text.indexOf("{");
@@ -352,7 +395,7 @@ function jaccardScore(a, b) {
352
395
  }
353
396
  var FUZZY_THRESHOLD = 0.5;
354
397
  var MIN_TOKENS_TO_QUALIFY = 3;
355
- var WikiMemory = class {
398
+ var _WikiMemory = class _WikiMemory {
356
399
  constructor(db, options) {
357
400
  this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
358
401
  this.activeIngestJobs = /* @__PURE__ */ new Set();
@@ -366,6 +409,7 @@ var WikiMemory = class {
366
409
  }
367
410
  });
368
411
  this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
412
+ this.vectorCache = /* @__PURE__ */ new Map();
369
413
  this.db = db;
370
414
  this.options = options;
371
415
  this.prefix = options.config?.tablePrefix || "llm_wiki_";
@@ -432,10 +476,6 @@ var WikiMemory = class {
432
476
  `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
433
477
  [String(dim)]
434
478
  );
435
- } else {
436
- await this.db.runAsync(
437
- `DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
438
- );
439
479
  }
440
480
  } else {
441
481
  await this.db.runAsync(
@@ -454,7 +494,18 @@ var WikiMemory = class {
454
494
  const mismatch = await this.db.getFirstAsync(
455
495
  `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
456
496
  );
457
- if (mismatch) {
497
+ if (!mismatch) return;
498
+ const newDim = parseInt(mismatch.value, 10);
499
+ const residual = await this.db.getFirstAsync(
500
+ `SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
501
+ WHERE deleted_at IS NULL
502
+ AND (
503
+ (embedding_blob IS NOT NULL AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?)
504
+ OR (embedding_blob IS NULL AND embedding IS NOT NULL)
505
+ )`,
506
+ [newDim]
507
+ );
508
+ if (!residual || residual.cnt === 0) {
458
509
  await this.db.runAsync(
459
510
  `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
460
511
  [mismatch.value]
@@ -485,11 +536,29 @@ var WikiMemory = class {
485
536
  console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
486
537
  return false;
487
538
  }
488
- await this.storeEmbeddingDimension(vector.length);
539
+ const float32Vector = new Float32Array(vector);
540
+ let hasNonFinite = false;
541
+ for (let i = 0; i < float32Vector.length; i++) {
542
+ if (!isFinite(float32Vector[i])) {
543
+ hasNonFinite = true;
544
+ break;
545
+ }
546
+ }
547
+ if (hasNonFinite) {
548
+ console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
549
+ return false;
550
+ }
551
+ await this.storeEmbeddingDimension(float32Vector.length);
552
+ const blob = new Uint8Array(float32Vector.buffer);
489
553
  await this.db.runAsync(
490
- `UPDATE ${this.prefix}entries SET embedding = ? WHERE id = ?`,
491
- [JSON.stringify(vector), fact.id]
554
+ `UPDATE ${this.prefix}entries SET embedding_blob = ?, embedding = NULL WHERE id = ?`,
555
+ [blob, fact.id]
492
556
  );
557
+ try {
558
+ await this._notifyEmbeddingPersisted(fact.entity_id, fact.id, float32Vector);
559
+ } catch (hookErr) {
560
+ console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for ${fact.id}:`, hookErr);
561
+ }
493
562
  return true;
494
563
  } catch (err) {
495
564
  console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
@@ -505,6 +574,9 @@ var WikiMemory = class {
505
574
  _warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
506
575
  console.warn(`[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`);
507
576
  }
577
+ async _notifyEmbeddingPersisted(entityId, factId, vector) {
578
+ await this.options.vectorRanker?.onEmbeddingPersisted?.({ entityId, factId, vector });
579
+ }
508
580
  async setup() {
509
581
  const entriesExistedBeforeSetup = await this.db.getFirstAsync(
510
582
  `SELECT name FROM sqlite_master WHERE type='table' AND name=?`,
@@ -608,9 +680,24 @@ var WikiMemory = class {
608
680
  _globalReembedKey() {
609
681
  return `${this.prefix}:reembed`;
610
682
  }
683
+ _importKey(entityId) {
684
+ return `${this.prefix}:${entityId}:import`;
685
+ }
686
+ _globalImportKey() {
687
+ return `${this.prefix}:import`;
688
+ }
689
+ _forgetKey(entityId) {
690
+ return `${this.prefix}:${entityId}:forget`;
691
+ }
611
692
  _isReembedActive(entityId) {
612
693
  return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
613
694
  }
695
+ _isImportActiveFor(entityId) {
696
+ return this.activeMaintenanceJobs.has(this._importKey(entityId)) || this.activeMaintenanceJobs.has(this._globalImportKey());
697
+ }
698
+ _isForgetActiveFor(entityId) {
699
+ return this.activeMaintenanceJobs.has(this._forgetKey(entityId));
700
+ }
614
701
  /** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
615
702
  _isAnyMaintenanceActiveWithSuffix(suffix) {
616
703
  const entityKeyPrefix = `${this.prefix}:`;
@@ -653,6 +740,10 @@ var WikiMemory = class {
653
740
  blockingOperation = "reembed";
654
741
  } else if (isIngestRunning) {
655
742
  blockingOperation = "ingest";
743
+ } else if (this._isImportActiveFor(entityId)) {
744
+ blockingOperation = "import";
745
+ } else if (this._isForgetActiveFor(entityId)) {
746
+ blockingOperation = "forget";
656
747
  }
657
748
  if (blockingOperation !== null) {
658
749
  throw new WikiBusyError(blockingOperation, entityId);
@@ -668,8 +759,15 @@ var WikiMemory = class {
668
759
  let deletedEntries = 0;
669
760
  let deletedTasks = 0;
670
761
  let deletedEvents = 0;
762
+ const deletedEntryIds = [];
671
763
  if (retainSoftDeletedFor !== null) {
672
764
  const cutoff = now - retainSoftDeletedFor * 864e5;
765
+ const entriesToDelete = await this.db.getAllAsync(
766
+ `SELECT id FROM ${this.prefix}entries
767
+ WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at < ?`,
768
+ [entityId, cutoff]
769
+ );
770
+ deletedEntryIds.push(...entriesToDelete.map((e) => e.id));
673
771
  const entryResult = await this.db.runAsync(
674
772
  `DELETE FROM ${this.prefix}entries
675
773
  WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at < ?`,
@@ -697,19 +795,39 @@ var WikiMemory = class {
697
795
  await this.db.execAsync(`VACUUM`);
698
796
  }
699
797
  await this.rebuildMiniSearchIndex(entityId);
798
+ this.vectorCache.delete(entityId);
799
+ const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
800
+ for (const factId of uniqueDeletedIds) {
801
+ try {
802
+ await this._notifyEmbeddingPersisted(entityId, factId, null);
803
+ } catch (hookErr) {
804
+ console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during prune for ${factId}:`, hookErr);
805
+ }
806
+ }
700
807
  return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
701
808
  } finally {
702
809
  this.activeMaintenanceJobs.delete(pruneKey);
703
810
  }
704
811
  }
705
- async read(entityId, query) {
706
- const maxResults = this.options.config?.maxResults ?? this.options.config?.maxFtsResults ?? 10;
812
+ async read(entityId, query, options) {
813
+ const config = this.options.config;
814
+ const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
815
+ const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
816
+ const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
817
+ const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
818
+ const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
819
+ const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
820
+ const skipEmbed = weight === 0;
707
821
  const embedFn = this.options.llmProvider.embed;
708
822
  const trimmedQuery = query.trim();
709
823
  let facts = [];
710
- if (trimmedQuery) {
824
+ if (maxResults === 0) ; else if (trimmedQuery) {
711
825
  let usedEmbed = false;
712
- if (embedFn) {
826
+ if (!skipEmbed && embedFn) {
827
+ let rankerShouldRethrow = false;
828
+ let pendingRankerFallbackError;
829
+ let usedKeywordFallback = false;
830
+ let scoredAlreadySortedAndLimited = false;
713
831
  try {
714
832
  const queryVec = await embedFn(trimmedQuery);
715
833
  if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
@@ -728,51 +846,350 @@ var WikiMemory = class {
728
846
  );
729
847
  }
730
848
  }
731
- const scoreRows = await this.db.getAllAsync(
732
- `SELECT id, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
733
- [entityId]
849
+ const mismatchedCount = await this.db.getFirstAsync(
850
+ `SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
851
+ WHERE entity_id = ? AND deleted_at IS NULL
852
+ AND embedding_blob IS NOT NULL
853
+ AND (CAST(length(embedding_blob) AS INTEGER) % 4 = 0)
854
+ AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?`,
855
+ [entityId, queryVec.length]
734
856
  );
735
- const scored = scoreRows.map((row) => {
736
- let score = 0;
737
- if (row.embedding) {
738
- try {
739
- const parsed = JSON.parse(row.embedding);
740
- if (Array.isArray(parsed) && parsed.length === queryVec.length && parsed.every((v) => typeof v === "number" && isFinite(v))) {
741
- score = cosineSimilarity(queryVec, parsed);
857
+ if (mismatchedCount && mismatchedCount.cnt > 0) {
858
+ throw new Error(
859
+ `Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
860
+ );
861
+ }
862
+ const useRanker = Boolean(this.options.vectorRanker);
863
+ let candidateRows;
864
+ let populateCache = true;
865
+ let miniSearchScores;
866
+ if (effectivePreFilterLimit !== void 0) {
867
+ populateCache = false;
868
+ const preResults = this.miniSearch.search(trimmedQuery, {
869
+ filter: (r) => r.entity_id === entityId,
870
+ combineWith: "OR"
871
+ });
872
+ if (preResults.length === 0) {
873
+ candidateRows = null;
874
+ } else {
875
+ const topKResults = preResults.slice(0, effectivePreFilterLimit);
876
+ if (topKResults.length === 0) {
877
+ candidateRows = null;
878
+ } else {
879
+ const topKIds = topKResults.map((r) => r.id);
880
+ const inClauseChunkSize = 500;
881
+ if (useRanker) {
882
+ const rows = [];
883
+ for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
884
+ const idChunk = topKIds.slice(i, i + inClauseChunkSize);
885
+ const placeholders = idChunk.map(() => "?").join(",");
886
+ const chunkRows = await this.db.getAllAsync(
887
+ `SELECT id, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
888
+ idChunk
889
+ );
890
+ rows.push(...chunkRows);
891
+ }
892
+ candidateRows = rows;
893
+ } else {
894
+ const rows = [];
895
+ for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
896
+ const idChunk = topKIds.slice(i, i + inClauseChunkSize);
897
+ const placeholders = idChunk.map(() => "?").join(",");
898
+ const chunkRows = await this.db.getAllAsync(
899
+ `SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
900
+ idChunk
901
+ );
902
+ rows.push(...chunkRows);
903
+ }
904
+ candidateRows = rows;
905
+ }
906
+ if (weight !== void 0 && weight < 1) {
907
+ const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
908
+ miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
742
909
  }
743
- } catch {
744
910
  }
745
911
  }
746
- return { row, score };
747
- });
748
- scored.sort((a, b) => {
749
- const scoreDiff = b.score - a.score;
750
- if (scoreDiff !== 0) {
751
- return scoreDiff;
912
+ } else {
913
+ if (useRanker) {
914
+ candidateRows = await this.db.getAllAsync(
915
+ `SELECT id, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
916
+ [entityId]
917
+ );
918
+ } else {
919
+ candidateRows = await this.db.getAllAsync(
920
+ `SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
921
+ [entityId]
922
+ );
752
923
  }
753
- const updatedAtDiff = (b.row.updated_at ?? 0) - (a.row.updated_at ?? 0);
754
- if (updatedAtDiff !== 0) {
755
- return updatedAtDiff;
924
+ if (weight !== void 0 && weight < 1) {
925
+ const msResults = this.miniSearch.search(trimmedQuery, {
926
+ filter: (r) => r.entity_id === entityId,
927
+ combineWith: "OR"
928
+ });
929
+ const maxMsScore = Math.max(1, msResults[0]?.score ?? 1);
930
+ miniSearchScores = new Map(msResults.map((r) => [r.id, r.score / maxMsScore]));
931
+ }
932
+ }
933
+ if (candidateRows === null) {
934
+ usedEmbed = true;
935
+ } else {
936
+ let scored;
937
+ if (useRanker) {
938
+ const candidateIds = effectivePreFilterLimit !== void 0 ? candidateRows.map((r) => r.id) : void 0;
939
+ try {
940
+ const oversampledLimit = Math.max(maxResults * 2, maxResults + 50);
941
+ scored = await this._rankWithVectorRanker({
942
+ entityId,
943
+ queryVec,
944
+ candidateIds,
945
+ weight,
946
+ miniSearchScores,
947
+ limit: oversampledLimit
948
+ });
949
+ if (scored.length > 0) {
950
+ const scoredIds2 = new Set(scored.map((s) => s.id));
951
+ const metaMap = /* @__PURE__ */ new Map();
952
+ for (const r of candidateRows) {
953
+ if (scoredIds2.has(r.id)) {
954
+ metaMap.set(r.id, { updated_at: r.updated_at, access_count: r.access_count });
955
+ }
956
+ }
957
+ scored = scored.map((s) => {
958
+ const meta = metaMap.get(s.id);
959
+ return { ...s, updated_at: meta?.updated_at ?? null, access_count: meta?.access_count ?? null };
960
+ });
961
+ }
962
+ const scoredIds = new Set(scored.map((s) => s.id));
963
+ const isHybrid = weight !== void 0 && weight < 1;
964
+ const maxBackfill = isHybrid ? maxResults : Math.max(0, maxResults - scored.length);
965
+ if (maxBackfill > 0) {
966
+ if (isHybrid) {
967
+ const topK = [];
968
+ for (const row of candidateRows) {
969
+ if (scoredIds.has(row.id)) continue;
970
+ const kwScore = miniSearchScores?.get(row.id) ?? 0;
971
+ const candidate = { row, kwScore };
972
+ if (topK.length < maxBackfill) {
973
+ let insertIdx = topK.length;
974
+ for (let i = 0; i < topK.length; i++) {
975
+ const cmp = this._compareScoredRows(
976
+ {
977
+ id: candidate.row.id,
978
+ score: candidate.kwScore,
979
+ updated_at: candidate.row.updated_at,
980
+ access_count: candidate.row.access_count
981
+ },
982
+ {
983
+ id: topK[i].row.id,
984
+ score: topK[i].kwScore,
985
+ updated_at: topK[i].row.updated_at,
986
+ access_count: topK[i].row.access_count
987
+ }
988
+ );
989
+ if (cmp < 0) {
990
+ insertIdx = i;
991
+ break;
992
+ }
993
+ }
994
+ topK.splice(insertIdx, 0, candidate);
995
+ } else {
996
+ const cmpWorst = this._compareScoredRows(
997
+ {
998
+ id: candidate.row.id,
999
+ score: candidate.kwScore,
1000
+ updated_at: candidate.row.updated_at,
1001
+ access_count: candidate.row.access_count
1002
+ },
1003
+ {
1004
+ id: topK[maxBackfill - 1].row.id,
1005
+ score: topK[maxBackfill - 1].kwScore,
1006
+ updated_at: topK[maxBackfill - 1].row.updated_at,
1007
+ access_count: topK[maxBackfill - 1].row.access_count
1008
+ }
1009
+ );
1010
+ if (cmpWorst < 0) {
1011
+ let insertIdx = maxBackfill - 1;
1012
+ for (let i = 0; i < topK.length; i++) {
1013
+ const cmp = this._compareScoredRows(
1014
+ {
1015
+ id: candidate.row.id,
1016
+ score: candidate.kwScore,
1017
+ updated_at: candidate.row.updated_at,
1018
+ access_count: candidate.row.access_count
1019
+ },
1020
+ {
1021
+ id: topK[i].row.id,
1022
+ score: topK[i].kwScore,
1023
+ updated_at: topK[i].row.updated_at,
1024
+ access_count: topK[i].row.access_count
1025
+ }
1026
+ );
1027
+ if (cmp < 0) {
1028
+ insertIdx = i;
1029
+ break;
1030
+ }
1031
+ }
1032
+ topK.splice(insertIdx, 0, candidate);
1033
+ topK.pop();
1034
+ }
1035
+ }
1036
+ }
1037
+ for (const { row, kwScore } of topK) {
1038
+ scored.push({
1039
+ id: row.id,
1040
+ score: (1 - weight) * kwScore,
1041
+ updated_at: row.updated_at,
1042
+ access_count: row.access_count
1043
+ });
1044
+ }
1045
+ } else {
1046
+ const omitted = [];
1047
+ for (const row of candidateRows) {
1048
+ if (scoredIds.has(row.id)) continue;
1049
+ omitted.push({ id: row.id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
1050
+ }
1051
+ if (omitted.length > 0) {
1052
+ this._tieBreakSort(omitted);
1053
+ scored.push(...omitted.slice(0, maxBackfill));
1054
+ }
1055
+ }
1056
+ }
1057
+ } catch (rankerErr) {
1058
+ const rankerError = rankerErr instanceof Error ? rankerErr : new Error(String(rankerErr));
1059
+ const policy = this.options.vectorRankerFallback ?? "js-cosine";
1060
+ this.options.onVectorRankerFallback?.({ error: rankerError, policy });
1061
+ if (policy === "throw") {
1062
+ rankerShouldRethrow = true;
1063
+ throw rankerError;
1064
+ } else if (policy === "js-cosine") {
1065
+ let fallbackRows = candidateRows;
1066
+ if (fallbackRows && fallbackRows.length > 0 && !("embedding_blob" in fallbackRows[0])) {
1067
+ const rowIds = fallbackRows.map((r) => r.id);
1068
+ const embeddingsMap = /* @__PURE__ */ new Map();
1069
+ const chunkSize = 500;
1070
+ for (let i = 0; i < rowIds.length; i += chunkSize) {
1071
+ const idChunk = rowIds.slice(i, i + chunkSize);
1072
+ const placeholders = idChunk.map(() => "?").join(",");
1073
+ const embeddingRows = await this.db.getAllAsync(
1074
+ `SELECT id, embedding_blob, embedding FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
1075
+ [...idChunk, entityId]
1076
+ );
1077
+ for (const row of embeddingRows) {
1078
+ embeddingsMap.set(row.id, { embedding_blob: row.embedding_blob, embedding: row.embedding });
1079
+ }
1080
+ }
1081
+ fallbackRows = fallbackRows.map((r) => ({
1082
+ ...r,
1083
+ embedding_blob: embeddingsMap.get(r.id)?.embedding_blob ?? null,
1084
+ embedding: embeddingsMap.get(r.id)?.embedding ?? null
1085
+ }));
1086
+ }
1087
+ scored = await this._rankWithJsCosine({
1088
+ entityId,
1089
+ queryVec,
1090
+ candidateRows: fallbackRows,
1091
+ weight,
1092
+ miniSearchScores,
1093
+ populateCache,
1094
+ limit: maxResults
1095
+ });
1096
+ scoredAlreadySortedAndLimited = true;
1097
+ } else if (policy === "keyword") {
1098
+ const msResults = this.miniSearch.search(trimmedQuery, {
1099
+ filter: (r) => r.entity_id === entityId,
1100
+ combineWith: "OR"
1101
+ });
1102
+ const topResults = msResults.slice(0, maxResults);
1103
+ const resultIds = new Set(topResults.map((r) => r.id));
1104
+ const candidateMap = /* @__PURE__ */ new Map();
1105
+ for (const r of candidateRows) {
1106
+ if (resultIds.has(r.id)) {
1107
+ candidateMap.set(r.id, { updated_at: r.updated_at, access_count: r.access_count });
1108
+ }
1109
+ }
1110
+ scored = topResults.map((r) => {
1111
+ const meta = candidateMap.get(r.id);
1112
+ return {
1113
+ id: r.id,
1114
+ score: r.score ?? 0,
1115
+ access_count: meta?.access_count ?? null,
1116
+ updated_at: meta?.updated_at ?? null
1117
+ };
1118
+ });
1119
+ usedKeywordFallback = true;
1120
+ } else {
1121
+ scored = [];
1122
+ }
1123
+ if (this.options.propagateRankerFailureToRetrievalFallback) {
1124
+ const mirrored = new Error("Vector ranker failed, falling back");
1125
+ mirrored.cause = rankerError;
1126
+ pendingRankerFallbackError = mirrored;
1127
+ }
1128
+ }
1129
+ } else {
1130
+ scored = await this._rankWithJsCosine({
1131
+ entityId,
1132
+ queryVec,
1133
+ candidateRows,
1134
+ weight,
1135
+ miniSearchScores,
1136
+ populateCache,
1137
+ limit: maxResults
1138
+ });
1139
+ scoredAlreadySortedAndLimited = true;
756
1140
  }
757
- const accessCountDiff = (b.row.access_count ?? 0) - (a.row.access_count ?? 0);
758
- if (accessCountDiff !== 0) {
759
- return accessCountDiff;
1141
+ if (scored.length > 0) {
1142
+ if (!usedKeywordFallback && !scoredAlreadySortedAndLimited) {
1143
+ this._tieBreakSort(scored);
1144
+ }
1145
+ const topIds = (scoredAlreadySortedAndLimited ? scored : scored.slice(0, maxResults)).map((s) => s.id);
1146
+ if (topIds.length > 0) {
1147
+ const fullRows = [];
1148
+ const phase2ChunkSize = 500;
1149
+ for (let i = 0; i < topIds.length; i += phase2ChunkSize) {
1150
+ const idChunk = topIds.slice(i, i + phase2ChunkSize);
1151
+ const placeholders = idChunk.map(() => "?").join(",");
1152
+ const chunkRows = await this.db.getAllAsync(
1153
+ `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
1154
+ [...idChunk, entityId]
1155
+ );
1156
+ fullRows.push(...chunkRows);
1157
+ }
1158
+ const byId = new Map(fullRows.map((r) => [r.id, r]));
1159
+ facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
1160
+ if (facts.length < topIds.length) {
1161
+ const missingIds = topIds.filter((id) => !byId.has(id));
1162
+ const missingCount = missingIds.length;
1163
+ const sample = missingIds.slice(0, 5);
1164
+ const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
1165
+ const error = new Error(
1166
+ `Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs for entity ${entityId}. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist for this entity.` + sampleSuffix
1167
+ );
1168
+ this.options.onRetrievalFallback?.(error);
1169
+ }
1170
+ }
1171
+ if (pendingRankerFallbackError) {
1172
+ this.options.onRetrievalFallback?.(pendingRankerFallbackError);
1173
+ pendingRankerFallbackError = void 0;
1174
+ }
1175
+ usedEmbed = true;
1176
+ } else {
1177
+ if (pendingRankerFallbackError) {
1178
+ this.options.onRetrievalFallback?.(pendingRankerFallbackError);
1179
+ pendingRankerFallbackError = void 0;
1180
+ }
1181
+ usedEmbed = true;
760
1182
  }
761
- return a.row.id.localeCompare(b.row.id);
762
- });
763
- const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
764
- if (topIds.length > 0) {
765
- const placeholders = topIds.map(() => "?").join(",");
766
- const fullRows = await this.db.getAllAsync(
767
- `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
768
- topIds
769
- );
770
- const byId = new Map(fullRows.map((r) => [r.id, r]));
771
- facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
772
1183
  }
773
- usedEmbed = true;
774
1184
  } catch (err) {
775
1185
  const error = err instanceof Error ? err : new Error(String(err));
1186
+ if (rankerShouldRethrow) {
1187
+ throw error;
1188
+ }
1189
+ if (pendingRankerFallbackError) {
1190
+ error.cause = pendingRankerFallbackError;
1191
+ pendingRankerFallbackError = void 0;
1192
+ }
776
1193
  this.options.onRetrievalFallback?.(error);
777
1194
  }
778
1195
  }
@@ -783,25 +1200,35 @@ var WikiMemory = class {
783
1200
  });
784
1201
  const topIds = results.slice(0, maxResults).map((r) => r.id);
785
1202
  if (topIds.length > 0) {
786
- const placeholders = topIds.map(() => "?").join(",");
787
- const rows = await this.db.getAllAsync(
788
- `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
789
- topIds
790
- );
791
- const byId = new Map(rows.map((r) => [r.id, r]));
1203
+ const kwRows = [];
1204
+ const kwChunkSize = 500;
1205
+ for (let i = 0; i < topIds.length; i += kwChunkSize) {
1206
+ const idChunk = topIds.slice(i, i + kwChunkSize);
1207
+ const placeholders = idChunk.map(() => "?").join(",");
1208
+ const chunkRows = await this.db.getAllAsync(
1209
+ `SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
1210
+ [...idChunk, entityId]
1211
+ );
1212
+ kwRows.push(...chunkRows);
1213
+ }
1214
+ const byId = new Map(kwRows.map((r) => [r.id, r]));
792
1215
  facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
793
1216
  }
794
1217
  }
795
1218
  if (facts.length > 0) {
796
1219
  const ids = facts.map((f) => f.id);
797
- const placeholders = ids.map(() => "?").join(",");
798
1220
  const now = Date.now();
799
- await this.db.runAsync(
800
- `UPDATE ${this.prefix}entries
801
- SET access_count = access_count + 1, last_accessed_at = ?
802
- WHERE id IN (${placeholders})`,
803
- [now, ...ids]
804
- );
1221
+ const accessChunkSize = 500;
1222
+ for (let i = 0; i < ids.length; i += accessChunkSize) {
1223
+ const idChunk = ids.slice(i, i + accessChunkSize);
1224
+ const placeholders = idChunk.map(() => "?").join(",");
1225
+ await this.db.runAsync(
1226
+ `UPDATE ${this.prefix}entries
1227
+ SET access_count = access_count + 1, last_accessed_at = ?
1228
+ WHERE id IN (${placeholders})`,
1229
+ [now, ...idChunk]
1230
+ );
1231
+ }
805
1232
  }
806
1233
  } else {
807
1234
  facts = await this.db.getAllAsync(
@@ -828,7 +1255,7 @@ var WikiMemory = class {
828
1255
  )
829
1256
  ]);
830
1257
  const parsedFacts = facts.map((f) => {
831
- const { embedding: _embedding, ...rest } = f;
1258
+ const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
832
1259
  return {
833
1260
  ...rest,
834
1261
  tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
@@ -836,6 +1263,113 @@ var WikiMemory = class {
836
1263
  });
837
1264
  return { facts: parsedFacts, tasks, events: events.reverse() };
838
1265
  }
1266
+ /**
1267
+ * Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
1268
+ */
1269
+ _tieBreakSort(items) {
1270
+ items.sort((a, b) => this._compareScoredRows(a, b));
1271
+ }
1272
+ /**
1273
+ * Comparator for score + deterministic tie-break fields.
1274
+ * Negative return means "a ranks ahead of b" for descending score order.
1275
+ */
1276
+ _compareScoredRows(a, b) {
1277
+ const scoreDiff = b.score - a.score;
1278
+ if (scoreDiff !== 0) return scoreDiff;
1279
+ const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
1280
+ if (accessCountDiff !== 0) return accessCountDiff;
1281
+ const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
1282
+ if (updatedAtDiff !== 0) return updatedAtDiff;
1283
+ return a.id.localeCompare(b.id);
1284
+ }
1285
+ /**
1286
+ * Score candidate rows using in-process JS cosine similarity.
1287
+ * Applies hybrid blending (if weight set) and tie-break sorting before returning.
1288
+ */
1289
+ async _rankWithJsCosine(args) {
1290
+ const { entityId, queryVec, candidateRows, weight, miniSearchScores, populateCache, limit } = args;
1291
+ let entityCache = this.vectorCache.get(entityId);
1292
+ const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
1293
+ if (tooLarge && entityCache) {
1294
+ this.vectorCache.delete(entityId);
1295
+ entityCache = void 0;
1296
+ }
1297
+ const canCache = populateCache && !tooLarge;
1298
+ if (canCache && !entityCache) {
1299
+ entityCache = /* @__PURE__ */ new Map();
1300
+ }
1301
+ const scored = candidateRows.map((row) => {
1302
+ let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
1303
+ if (vector && canCache && entityCache && !entityCache.has(row.id)) {
1304
+ entityCache.set(row.id, vector);
1305
+ }
1306
+ let score = 0;
1307
+ if (vector && vector.length === queryVec.length) {
1308
+ const cosSim = cosineSimilarity(queryVec, vector);
1309
+ if (weight !== void 0) {
1310
+ const kwScore = miniSearchScores?.get(row.id) ?? 0;
1311
+ score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
1312
+ } else {
1313
+ score = cosSim;
1314
+ }
1315
+ } else if (weight !== void 0 && weight < 1) {
1316
+ const kwScore = miniSearchScores?.get(row.id) ?? 0;
1317
+ score = (1 - weight) * kwScore;
1318
+ } else {
1319
+ score = -2;
1320
+ }
1321
+ return { id: row.id, score, updated_at: row.updated_at, access_count: row.access_count };
1322
+ });
1323
+ if (canCache && entityCache && entityCache.size > 0) {
1324
+ if (!this.vectorCache.has(entityId)) {
1325
+ if (this.vectorCache.size >= _WikiMemory.MAX_VECTOR_CACHE_ENTITIES) {
1326
+ const oldestKey = this.vectorCache.keys().next().value;
1327
+ if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
1328
+ }
1329
+ this.vectorCache.set(entityId, entityCache);
1330
+ }
1331
+ }
1332
+ this._tieBreakSort(scored);
1333
+ return scored.slice(0, limit);
1334
+ }
1335
+ /**
1336
+ * Delegate semantic ranking to the injected VectorRanker.
1337
+ * Caller should pass an oversampledLimit to preserve recall after re-ranking.
1338
+ * Returns scored results ready for hybrid blending and tie-break sorting.
1339
+ */
1340
+ async _rankWithVectorRanker(args) {
1341
+ const { entityId, queryVec, candidateIds, weight, miniSearchScores, limit } = args;
1342
+ const ranker = this.options.vectorRanker;
1343
+ if (!ranker) {
1344
+ throw new Error("vectorRanker not configured");
1345
+ }
1346
+ const rankerResults = await ranker.rankBySimilarity({
1347
+ entityId,
1348
+ queryVec,
1349
+ candidateIds,
1350
+ limit
1351
+ });
1352
+ const allowedIds = candidateIds ? new Set(candidateIds) : void 0;
1353
+ const seen = /* @__PURE__ */ new Set();
1354
+ const normalized = [];
1355
+ for (const r of rankerResults) {
1356
+ if (normalized.length >= limit) break;
1357
+ if (seen.has(r.id)) continue;
1358
+ if (allowedIds && !allowedIds.has(r.id)) continue;
1359
+ if (!Number.isFinite(r.semanticScore)) continue;
1360
+ seen.add(r.id);
1361
+ normalized.push(r);
1362
+ }
1363
+ const scored = normalized.map((r) => {
1364
+ let score = r.semanticScore;
1365
+ if (weight !== void 0) {
1366
+ const kwScore = miniSearchScores?.get(r.id) ?? 0;
1367
+ score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
1368
+ }
1369
+ return { id: r.id, score };
1370
+ });
1371
+ return scored;
1372
+ }
839
1373
  async getMemoryBundle(entityId) {
840
1374
  return this._getFullBundle(entityId, { maxEvents: 10 });
841
1375
  }
@@ -860,7 +1394,7 @@ var WikiMemory = class {
860
1394
  if (memoryCheckpoint > count) memoryCheckpoint = 0;
861
1395
  if (count - memoryCheckpoint >= threshold) {
862
1396
  const jobKey = this._librarianKey(entityId);
863
- if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
1397
+ if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId)) && !this._isReembedActive(entityId) && !this._isImportActiveFor(entityId) && !this._isForgetActiveFor(entityId)) {
864
1398
  this.activeMaintenanceJobs.add(jobKey);
865
1399
  this.runLibrarianThenMaybeHeal(entityId, count).catch(console.error).finally(() => this.activeMaintenanceJobs.delete(jobKey));
866
1400
  }
@@ -908,7 +1442,7 @@ var WikiMemory = class {
908
1442
  LIMIT 100
909
1443
  `, [entityId]);
910
1444
  const currentFacts = currentFactsRows.map((f) => {
911
- const { embedding: _embedding, ...rest } = f;
1445
+ const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
912
1446
  return {
913
1447
  ...rest,
914
1448
  tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
@@ -952,7 +1486,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
952
1486
  INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
953
1487
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
954
1488
  `, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
955
- insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
1489
+ insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
956
1490
  }
957
1491
  for (const task of validTasks) {
958
1492
  const id = generateId("task_");
@@ -962,10 +1496,12 @@ ${JSON.stringify(currentFacts, null, 2)}`;
962
1496
  `, [id, entityId, task.description, "pending", task.priority, now, now]);
963
1497
  }
964
1498
  });
1499
+ await this.rebuildMiniSearchIndex(entityId);
1500
+ this.vectorCache.delete(entityId);
965
1501
  for (const fact of insertedFacts) {
966
1502
  await this.embedFact(fact);
967
1503
  }
968
- await this.rebuildMiniSearchIndex(entityId);
1504
+ this.vectorCache.delete(entityId);
969
1505
  }
970
1506
  async _doRunHeal(entityId) {
971
1507
  const now = Date.now();
@@ -1003,7 +1539,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
1003
1539
  const documentAnchors = allFactsRows.filter((f) => f.source_type === "user_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
1004
1540
  const userPrompt = `Heal Candidates:
1005
1541
  ${JSON.stringify(healCandidates.map((f) => {
1006
- const { embedding: _embedding, ...rest } = f;
1542
+ const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
1007
1543
  return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
1008
1544
  }), null, 2)}
1009
1545
 
@@ -1030,6 +1566,7 @@ The following document anchors are provided for contradiction detection only. Do
1030
1566
  const safeDeleted = deleted.filter((id) => mutableIds.has(id));
1031
1567
  const validNewFacts = newFacts.map(validateFact).filter((f) => f !== null);
1032
1568
  const insertedFacts = [];
1569
+ const uniqueDeletedFactIds = Array.from(new Set(safeDeleted));
1033
1570
  await this.db.withTransactionAsync(async () => {
1034
1571
  for (const id of safeDowngraded) {
1035
1572
  await this.db.runAsync(`UPDATE ${this.prefix}entries SET confidence = 'tentative', updated_at = ? WHERE id = ? AND entity_id = ?`, [now, id, entityId]);
@@ -1043,13 +1580,22 @@ The following document anchors are provided for contradiction detection only. Do
1043
1580
  INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
1044
1581
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1045
1582
  `, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
1046
- insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
1583
+ insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
1047
1584
  }
1048
1585
  });
1586
+ this.vectorCache.delete(entityId);
1587
+ await this.rebuildMiniSearchIndex(entityId);
1588
+ for (const factId of uniqueDeletedFactIds) {
1589
+ try {
1590
+ await this._notifyEmbeddingPersisted(entityId, factId, null);
1591
+ } catch (hookErr) {
1592
+ console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal for ${factId}:`, hookErr);
1593
+ }
1594
+ }
1049
1595
  for (const fact of insertedFacts) {
1050
1596
  await this.embedFact(fact);
1051
1597
  }
1052
- await this.rebuildMiniSearchIndex(entityId);
1598
+ this.vectorCache.delete(entityId);
1053
1599
  }
1054
1600
  async runLibrarian(entityId) {
1055
1601
  const jobKey = this._librarianKey(entityId);
@@ -1062,6 +1608,12 @@ The following document anchors are provided for contradiction detection only. Do
1062
1608
  if (this._isReembedActive(entityId)) {
1063
1609
  throw new WikiBusyError("reembed", entityId);
1064
1610
  }
1611
+ if (this._isImportActiveFor(entityId)) {
1612
+ throw new WikiBusyError("import", entityId);
1613
+ }
1614
+ if (this._isForgetActiveFor(entityId)) {
1615
+ throw new WikiBusyError("forget", entityId);
1616
+ }
1065
1617
  this.activeMaintenanceJobs.add(jobKey);
1066
1618
  try {
1067
1619
  await this._doRunLibrarian(entityId);
@@ -1080,6 +1632,12 @@ The following document anchors are provided for contradiction detection only. Do
1080
1632
  if (this._isReembedActive(entityId)) {
1081
1633
  throw new WikiBusyError("reembed", entityId);
1082
1634
  }
1635
+ if (this._isImportActiveFor(entityId)) {
1636
+ throw new WikiBusyError("import", entityId);
1637
+ }
1638
+ if (this._isForgetActiveFor(entityId)) {
1639
+ throw new WikiBusyError("forget", entityId);
1640
+ }
1083
1641
  this.activeMaintenanceJobs.add(jobKey);
1084
1642
  try {
1085
1643
  await this._doRunHeal(entityId);
@@ -1087,9 +1645,9 @@ The following document anchors are provided for contradiction detection only. Do
1087
1645
  this.activeMaintenanceJobs.delete(jobKey);
1088
1646
  }
1089
1647
  }
1090
- async runReembed(entityId) {
1648
+ async runReembed(entityId, opts) {
1091
1649
  const embedFn = this.options.llmProvider.embed;
1092
- if (!embedFn) return { embedded: 0, skipped: 0 };
1650
+ if (!embedFn) return { embedded: 0, skipped: 0, failed: 0 };
1093
1651
  const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
1094
1652
  if (this.activeMaintenanceJobs.has(reembedKey)) {
1095
1653
  throw new WikiBusyError("reembed", entityId ?? "*");
@@ -1110,6 +1668,12 @@ The following document anchors are provided for contradiction detection only. Do
1110
1668
  if (this._isIngestActiveFor(entityId)) {
1111
1669
  throw new WikiBusyError("ingest", entityId);
1112
1670
  }
1671
+ if (this._isImportActiveFor(entityId)) {
1672
+ throw new WikiBusyError("import", entityId);
1673
+ }
1674
+ if (this._isForgetActiveFor(entityId)) {
1675
+ throw new WikiBusyError("forget", entityId);
1676
+ }
1113
1677
  } else {
1114
1678
  if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
1115
1679
  throw new WikiBusyError("reembed", "*");
@@ -1126,6 +1690,12 @@ The following document anchors are provided for contradiction detection only. Do
1126
1690
  if (this.activeIngestJobs.size > 0) {
1127
1691
  throw new WikiBusyError("ingest", "*");
1128
1692
  }
1693
+ if (this._isAnyMaintenanceActiveWithSuffix(":import")) {
1694
+ throw new WikiBusyError("import", "*");
1695
+ }
1696
+ if (this._isAnyMaintenanceActiveWithSuffix(":forget")) {
1697
+ throw new WikiBusyError("forget", "*");
1698
+ }
1129
1699
  }
1130
1700
  this.activeMaintenanceJobs.add(reembedKey);
1131
1701
  try {
@@ -1135,17 +1705,64 @@ The following document anchors are provided for contradiction detection only. Do
1135
1705
  `SELECT * FROM ${this.prefix}entries WHERE ${where}`,
1136
1706
  params
1137
1707
  );
1708
+ if (entityId) {
1709
+ this.vectorCache.delete(entityId);
1710
+ } else {
1711
+ this.vectorCache.clear();
1712
+ }
1713
+ const skipExisting = opts?.skipExisting ?? false;
1714
+ let effectiveSkip = skipExisting;
1715
+ if (skipExisting) {
1716
+ const mismatchRow = await this.db.getFirstAsync(
1717
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
1718
+ );
1719
+ if (mismatchRow) {
1720
+ if (entityId) {
1721
+ const mismatchDim = parseInt(mismatchRow.value, 10);
1722
+ const staleForEntity = await this.db.getFirstAsync(
1723
+ `SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
1724
+ WHERE entity_id = ? AND deleted_at IS NULL
1725
+ AND (
1726
+ embedding_blob IS NULL
1727
+ OR (CAST(length(embedding_blob) AS INTEGER) / 4) != ?
1728
+ )`,
1729
+ [entityId, mismatchDim]
1730
+ );
1731
+ if (staleForEntity && staleForEntity.cnt > 0) effectiveSkip = false;
1732
+ } else {
1733
+ effectiveSkip = false;
1734
+ }
1735
+ }
1736
+ }
1138
1737
  let embedded = 0;
1139
1738
  let skipped = 0;
1140
- for (const row of rows) {
1141
- const success = await this.embedFact(row);
1142
- if (success) embedded++;
1143
- else skipped++;
1144
- }
1145
- if (embedded > 0) {
1146
- await this._reconcileEmbeddingDimension();
1739
+ let failed = 0;
1740
+ try {
1741
+ for (const row of rows) {
1742
+ const existingBlob = row.embedding_blob;
1743
+ const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
1744
+ if (effectiveSkip && blobIsValid) {
1745
+ const vec = parseEmbedding(existingBlob, null);
1746
+ if (vec !== null && vec.every((v) => Number.isFinite(v))) {
1747
+ skipped++;
1748
+ continue;
1749
+ }
1750
+ }
1751
+ const success = await this.embedFact(row);
1752
+ if (success) embedded++;
1753
+ else failed++;
1754
+ }
1755
+ if (embedded > 0) {
1756
+ await this._reconcileEmbeddingDimension();
1757
+ }
1758
+ } finally {
1759
+ if (entityId) {
1760
+ this.vectorCache.delete(entityId);
1761
+ } else {
1762
+ this.vectorCache.clear();
1763
+ }
1147
1764
  }
1148
- return { embedded, skipped };
1765
+ return { embedded, skipped, failed };
1149
1766
  } finally {
1150
1767
  this.activeMaintenanceJobs.delete(reembedKey);
1151
1768
  }
@@ -1165,6 +1782,9 @@ The following document anchors are provided for contradiction detection only. Do
1165
1782
  heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
1166
1783
  };
1167
1784
  }
1785
+ clearVectorCache() {
1786
+ this.vectorCache.clear();
1787
+ }
1168
1788
  async _getFullBundle(entityId, opts) {
1169
1789
  const maxEvents = opts?.maxEvents;
1170
1790
  const eventsQuery = maxEvents != null ? `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at ASC`;
@@ -1181,10 +1801,16 @@ The following document anchors are provided for contradiction detection only. Do
1181
1801
  this.db.getAllAsync(eventsQuery, eventsParams)
1182
1802
  ]);
1183
1803
  const facts = factsRaw.map((f) => {
1184
- const { embedding: _embedding, ...rest } = f;
1804
+ const { embedding: _embedding, embedding_blob, ...rest } = f;
1805
+ const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
1806
+ const c = new ArrayBuffer(embedding_blob.byteLength);
1807
+ new Uint8Array(c).set(embedding_blob);
1808
+ return new Uint8Array(c);
1809
+ })() : void 0;
1810
+ const factBase = safeBlobCopy ? { ...rest, embedding_blob: safeBlobCopy } : rest;
1185
1811
  return {
1186
- ...rest,
1187
- tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
1812
+ ...factBase,
1813
+ tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
1188
1814
  };
1189
1815
  });
1190
1816
  const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
@@ -1211,7 +1837,7 @@ The following document anchors are provided for contradiction detection only. Do
1211
1837
  for (let i = 0; i < ids.length; i += BATCH) {
1212
1838
  const batch = ids.slice(i, i + BATCH);
1213
1839
  const batchResults = await Promise.all(
1214
- batch.map(async (id) => [id, await this._getFullBundle(id)])
1840
+ batch.map(async (id) => [id, await this._getFullBundle(id, { includeBlobs: true })])
1215
1841
  );
1216
1842
  for (const [id, bundle] of batchResults) {
1217
1843
  entities[id] = bundle;
@@ -1221,172 +1847,406 @@ The following document anchors are provided for contradiction detection only. Do
1221
1847
  }
1222
1848
  async importDump(dump, opts) {
1223
1849
  const merge = opts?.merge ?? false;
1224
- for (const [entityId, bundle] of Object.entries(dump.entities)) {
1225
- await this.db.withTransactionAsync(async () => {
1226
- if (!merge) {
1227
- const now = Date.now();
1228
- await this.db.runAsync(
1229
- `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
1230
- [now, now, entityId]
1231
- );
1232
- await this.db.runAsync(
1233
- `UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
1234
- [now, now, entityId]
1235
- );
1236
- await this.db.runAsync(
1237
- `DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
1238
- [entityId]
1239
- );
1850
+ const entityIds = Object.keys(dump.entities);
1851
+ for (const entityId of entityIds) {
1852
+ if (this.activeMaintenanceJobs.has(this._importKey(entityId))) {
1853
+ throw new WikiBusyError("import", entityId);
1854
+ }
1855
+ if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
1856
+ throw new WikiBusyError("librarian", entityId);
1857
+ }
1858
+ if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
1859
+ throw new WikiBusyError("heal", entityId);
1860
+ }
1861
+ if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
1862
+ throw new WikiBusyError("prune", entityId);
1863
+ }
1864
+ if (this._isReembedActive(entityId)) {
1865
+ throw new WikiBusyError("reembed", entityId);
1866
+ }
1867
+ if (this._isIngestActiveFor(entityId)) {
1868
+ throw new WikiBusyError("ingest", entityId);
1869
+ }
1870
+ if (this._isForgetActiveFor(entityId)) {
1871
+ throw new WikiBusyError("forget", entityId);
1872
+ }
1873
+ }
1874
+ if (this.activeMaintenanceJobs.has(this._globalImportKey())) {
1875
+ throw new WikiBusyError("import", "*");
1876
+ }
1877
+ this.activeMaintenanceJobs.add(this._globalImportKey());
1878
+ for (const entityId of entityIds) {
1879
+ this.activeMaintenanceJobs.add(this._importKey(entityId));
1880
+ }
1881
+ try {
1882
+ for (const [entityId, bundle] of Object.entries(dump.entities)) {
1883
+ await this._doImportEntity(entityId, bundle, merge);
1884
+ }
1885
+ } finally {
1886
+ this.activeMaintenanceJobs.delete(this._globalImportKey());
1887
+ for (const entityId of entityIds) {
1888
+ this.activeMaintenanceJobs.delete(this._importKey(entityId));
1889
+ }
1890
+ }
1891
+ }
1892
+ async _doImportEntity(entityId, bundle, merge) {
1893
+ const upsertedFactIds = /* @__PURE__ */ new Set();
1894
+ const upsertedDeletedFactIds = /* @__PURE__ */ new Set();
1895
+ const factsWithPreservedBlob = /* @__PURE__ */ new Map();
1896
+ const preservedBlobDims = /* @__PURE__ */ new Set();
1897
+ const softDeletedFactIds = [];
1898
+ await this.db.withTransactionAsync(async () => {
1899
+ if (!merge) {
1900
+ const toDelete = await this.db.getAllAsync(
1901
+ `SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
1902
+ [entityId]
1903
+ );
1904
+ softDeletedFactIds.push(...toDelete.map((r) => r.id));
1905
+ const now = Date.now();
1906
+ await this.db.runAsync(
1907
+ `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
1908
+ [now, now, entityId]
1909
+ );
1910
+ await this.db.runAsync(
1911
+ `UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
1912
+ [now, now, entityId]
1913
+ );
1914
+ await this.db.runAsync(
1915
+ `DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
1916
+ [entityId]
1917
+ );
1918
+ }
1919
+ const factIds = bundle.facts.map((fact) => fact.id);
1920
+ const existingFactsById = /* @__PURE__ */ new Map();
1921
+ const factLookupChunkSize = 500;
1922
+ for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
1923
+ const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
1924
+ if (factIdChunk.length === 0) continue;
1925
+ const placeholders = factIdChunk.map(() => "?").join(", ");
1926
+ const existingFacts = await this.db.getAllAsync(
1927
+ `SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
1928
+ factIdChunk
1929
+ );
1930
+ for (const existingFact of existingFacts) {
1931
+ existingFactsById.set(existingFact.id, existingFact);
1240
1932
  }
1241
- const factIds = bundle.facts.map((fact) => fact.id);
1242
- const existingFactsById = /* @__PURE__ */ new Map();
1243
- const factLookupChunkSize = 500;
1244
- for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
1245
- const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
1246
- if (factIdChunk.length === 0) continue;
1247
- const placeholders = factIdChunk.map(() => "?").join(", ");
1248
- const existingFacts = await this.db.getAllAsync(
1249
- `SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
1250
- factIdChunk
1251
- );
1252
- for (const existingFact of existingFacts) {
1253
- existingFactsById.set(existingFact.id, existingFact);
1933
+ }
1934
+ for (const fact of bundle.facts) {
1935
+ const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
1936
+ const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
1937
+ const existing = existingFactsById.get(fact.id);
1938
+ const rawBlobRaw = fact.embedding_blob;
1939
+ let rawBlob = null;
1940
+ if (rawBlobRaw instanceof Uint8Array) {
1941
+ rawBlob = rawBlobRaw;
1942
+ } else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
1943
+ const obj = rawBlobRaw;
1944
+ if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
1945
+ rawBlob = new Uint8Array(obj["data"]);
1946
+ } else if (!Array.isArray(rawBlobRaw)) {
1947
+ const entries = Object.keys(obj);
1948
+ if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
1949
+ const len = entries.length;
1950
+ rawBlob = new Uint8Array(len);
1951
+ for (let i = 0; i < len; i++) rawBlob[i] = obj[String(i)] ?? 0;
1952
+ }
1254
1953
  }
1255
1954
  }
1256
- for (const fact of bundle.facts) {
1257
- const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
1258
- const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
1259
- const existing = existingFactsById.get(fact.id);
1260
- if (existing) {
1261
- if (existing.entity_id !== entityId) {
1262
- this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
1263
- continue;
1264
- }
1265
- if (merge) {
1266
- if (safeUpdatedAt <= existing.updated_at) continue;
1955
+ let blobData = null;
1956
+ if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
1957
+ const copy = new ArrayBuffer(rawBlob.byteLength);
1958
+ const alignedBlob = new Uint8Array(copy);
1959
+ alignedBlob.set(rawBlob);
1960
+ const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
1961
+ let allFinite = true;
1962
+ for (let i = 0; i < floats.length; i++) {
1963
+ if (!isFinite(floats[i])) {
1964
+ allFinite = false;
1965
+ break;
1267
1966
  }
1967
+ }
1968
+ if (allFinite) {
1969
+ blobData = alignedBlob;
1970
+ }
1971
+ }
1972
+ if (existing) {
1973
+ if (existing.entity_id !== entityId) {
1974
+ this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
1975
+ continue;
1976
+ }
1977
+ if (merge) {
1978
+ if (safeUpdatedAt <= existing.updated_at) continue;
1979
+ }
1980
+ if (blobData != null) {
1981
+ await this.db.runAsync(
1982
+ `UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = ?, embedding = NULL WHERE id = ?`,
1983
+ [entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData, fact.id]
1984
+ );
1985
+ factsWithPreservedBlob.set(fact.id, blobData);
1986
+ if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
1987
+ } else {
1268
1988
  await this.db.runAsync(
1269
- `UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ? WHERE id = ?`,
1989
+ `UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = NULL, embedding = NULL WHERE id = ?`,
1270
1990
  [entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, fact.id]
1271
1991
  );
1272
- existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
1992
+ }
1993
+ existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
1994
+ upsertedFactIds.add(fact.id);
1995
+ if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
1996
+ } else {
1997
+ if (blobData != null) {
1998
+ await this.db.runAsync(
1999
+ `INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at, embedding_blob) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
2000
+ [fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData]
2001
+ );
2002
+ factsWithPreservedBlob.set(fact.id, blobData);
2003
+ if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
1273
2004
  } else {
1274
2005
  await this.db.runAsync(
1275
2006
  `INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1276
2007
  [fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at]
1277
2008
  );
1278
- existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
1279
2009
  }
2010
+ existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
2011
+ upsertedFactIds.add(fact.id);
2012
+ if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
1280
2013
  }
1281
- const taskIds = bundle.tasks.map((task) => task.id);
1282
- const existingTasksById = /* @__PURE__ */ new Map();
1283
- const taskLookupChunkSize = 500;
1284
- for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
1285
- const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
1286
- if (taskIdChunk.length === 0) continue;
1287
- const placeholders = taskIdChunk.map(() => "?").join(", ");
1288
- const existingTasks = await this.db.getAllAsync(
1289
- `SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
1290
- taskIdChunk
1291
- );
1292
- for (const existingTask of existingTasks) {
1293
- existingTasksById.set(existingTask.id, existingTask);
2014
+ }
2015
+ const taskIds = bundle.tasks.map((task) => task.id);
2016
+ const existingTasksById = /* @__PURE__ */ new Map();
2017
+ const taskLookupChunkSize = 500;
2018
+ for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
2019
+ const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
2020
+ if (taskIdChunk.length === 0) continue;
2021
+ const placeholders = taskIdChunk.map(() => "?").join(", ");
2022
+ const existingTasks = await this.db.getAllAsync(
2023
+ `SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
2024
+ taskIdChunk
2025
+ );
2026
+ for (const existingTask of existingTasks) {
2027
+ existingTasksById.set(existingTask.id, existingTask);
2028
+ }
2029
+ }
2030
+ for (const task of bundle.tasks) {
2031
+ const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
2032
+ const existing = existingTasksById.get(task.id);
2033
+ if (existing) {
2034
+ if (existing.entity_id !== entityId) {
2035
+ this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
2036
+ continue;
2037
+ }
2038
+ if (merge) {
2039
+ if (safeUpdatedAt <= existing.updated_at) continue;
1294
2040
  }
2041
+ await this.db.runAsync(
2042
+ `UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
2043
+ [entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
2044
+ );
2045
+ existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
2046
+ } else {
2047
+ await this.db.runAsync(
2048
+ `INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
2049
+ [task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
2050
+ );
2051
+ existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
1295
2052
  }
1296
- for (const task of bundle.tasks) {
1297
- const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
1298
- const existing = existingTasksById.get(task.id);
1299
- if (existing) {
1300
- if (existing.entity_id !== entityId) {
1301
- this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
1302
- continue;
1303
- }
1304
- if (merge) {
1305
- if (safeUpdatedAt <= existing.updated_at) continue;
1306
- }
1307
- await this.db.runAsync(
1308
- `UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
1309
- [entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
1310
- );
1311
- existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
1312
- } else {
2053
+ }
2054
+ for (const event of bundle.events) {
2055
+ await this.db.runAsync(
2056
+ `INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
2057
+ VALUES (?, ?, ?, ?, ?, ?)`,
2058
+ [event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
2059
+ );
2060
+ }
2061
+ });
2062
+ this.vectorCache.delete(entityId);
2063
+ await this.rebuildMiniSearchIndex(entityId);
2064
+ for (const fact of bundle.facts) {
2065
+ if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
2066
+ await this.embedFact({
2067
+ id: fact.id,
2068
+ entity_id: entityId,
2069
+ // Use authoritative entityId from dump key, not fact.entity_id
2070
+ title: fact.title,
2071
+ body: fact.body,
2072
+ tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
2073
+ });
2074
+ }
2075
+ }
2076
+ for (const fact of bundle.facts) {
2077
+ const blobData = factsWithPreservedBlob.get(fact.id);
2078
+ if (blobData && !fact.deleted_at && upsertedFactIds.has(fact.id)) {
2079
+ try {
2080
+ const float32Vector = new Float32Array(blobData.buffer, blobData.byteOffset, blobData.byteLength / 4);
2081
+ await this._notifyEmbeddingPersisted(entityId, fact.id, float32Vector);
2082
+ } catch (hookErr) {
2083
+ console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for preserved-blob fact ${fact.id}:`, hookErr);
2084
+ }
2085
+ }
2086
+ }
2087
+ for (const factId of softDeletedFactIds) {
2088
+ if (!upsertedFactIds.has(factId) || upsertedDeletedFactIds.has(factId)) {
2089
+ try {
2090
+ await this._notifyEmbeddingPersisted(entityId, factId, null);
2091
+ } catch (hookErr) {
2092
+ console.warn(`[WikiMemory] onEmbeddingPersisted(vector=null) hook failed for soft-deleted fact ${factId}:`, hookErr);
2093
+ }
2094
+ }
2095
+ }
2096
+ try {
2097
+ const canonicalRow = await this.db.getFirstAsync(
2098
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
2099
+ );
2100
+ const canonicalDim = canonicalRow ? parseInt(canonicalRow.value, 10) : null;
2101
+ if (preservedBlobDims.size === 1) {
2102
+ const preservedDim = [...preservedBlobDims][0];
2103
+ if (canonicalDim === null || canonicalDim === preservedDim) {
2104
+ await this.storeEmbeddingDimension(preservedDim);
2105
+ const staleMismatch = await this.db.getFirstAsync(
2106
+ `SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
2107
+ );
2108
+ if (staleMismatch && parseInt(staleMismatch.value, 10) !== preservedDim) {
1313
2109
  await this.db.runAsync(
1314
- `INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1315
- [task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
2110
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
2111
+ [String(preservedDim)]
1316
2112
  );
1317
- existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
1318
2113
  }
1319
- }
1320
- for (const event of bundle.events) {
2114
+ await this._reconcileEmbeddingDimension();
2115
+ } else {
1321
2116
  await this.db.runAsync(
1322
- `INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
1323
- VALUES (?, ?, ?, ?, ?, ?)`,
1324
- [event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
2117
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
2118
+ [String(canonicalDim)]
1325
2119
  );
1326
2120
  }
1327
- });
1328
- for (const fact of bundle.facts) {
1329
- if (!fact.deleted_at) {
1330
- await this.embedFact({
1331
- id: fact.id,
1332
- title: fact.title,
1333
- body: fact.body,
1334
- tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
1335
- });
2121
+ } else if (preservedBlobDims.size > 1) {
2122
+ if (canonicalDim === null) {
2123
+ const sortedPreservedBlobDims = [...preservedBlobDims].sort((a, b) => a - b);
2124
+ await this.storeEmbeddingDimension(sortedPreservedBlobDims[0]);
2125
+ await this.db.runAsync(
2126
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
2127
+ [String(sortedPreservedBlobDims[0])]
2128
+ );
2129
+ } else {
2130
+ await this.db.runAsync(
2131
+ `INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
2132
+ [String(canonicalDim)]
2133
+ );
1336
2134
  }
1337
2135
  }
2136
+ } finally {
2137
+ this.vectorCache.delete(entityId);
1338
2138
  }
1339
- await this.rebuildMiniSearchIndex();
1340
2139
  }
1341
2140
  async forget(entityId, params) {
1342
- const now = Date.now();
1343
- let deletedEntries = 0;
1344
- let deletedTasks = 0;
1345
- if (params.clearAll) {
1346
- const [entriesRes, tasksRes] = await Promise.all([
1347
- this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId]),
1348
- this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId])
1349
- ]);
1350
- await this.db.runAsync(`UPDATE ${this.prefix}checkpoints SET memory_checkpoint = 0, heal_checkpoint = 0 WHERE entity_id = ?`, [entityId]);
1351
- deletedEntries = entriesRes.changes;
1352
- deletedTasks = tasksRes.changes;
1353
- } else {
1354
- const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
1355
- const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
1356
- if (hasIdSelectors && hasSourceSelectors) {
1357
- throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
1358
- }
1359
- const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
1360
- if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
1361
- const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
1362
- if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
1363
- const entryPromise = params.entryId ? this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.entryId, entityId]) : null;
1364
- const taskPromise = params.taskId ? this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.taskId, entityId]) : null;
1365
- let refPromise = null;
1366
- if (sourceRef || sourceHash) {
1367
- let q = `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`;
1368
- const args = [now, now, entityId];
1369
- if (sourceRef) {
1370
- q += ` AND source_ref = ?`;
1371
- args.push(sourceRef);
2141
+ let blockingOperation = null;
2142
+ if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
2143
+ blockingOperation = "librarian";
2144
+ } else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
2145
+ blockingOperation = "heal";
2146
+ } else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
2147
+ blockingOperation = "prune";
2148
+ } else if (this._isReembedActive(entityId)) {
2149
+ blockingOperation = "reembed";
2150
+ } else if (this._isIngestActiveFor(entityId)) {
2151
+ blockingOperation = "ingest";
2152
+ } else if (this._isImportActiveFor(entityId)) {
2153
+ blockingOperation = "import";
2154
+ } else if (this._isForgetActiveFor(entityId)) {
2155
+ blockingOperation = "forget";
2156
+ }
2157
+ if (blockingOperation !== null) {
2158
+ throw new WikiBusyError(blockingOperation, entityId);
2159
+ }
2160
+ const forgetKey = this._forgetKey(entityId);
2161
+ this.activeMaintenanceJobs.add(forgetKey);
2162
+ try {
2163
+ const now = Date.now();
2164
+ let deletedEntries = 0;
2165
+ let deletedTasks = 0;
2166
+ const deletedEntryIds = [];
2167
+ if (params.clearAll) {
2168
+ const entriesToDelete = await this.db.getAllAsync(
2169
+ `SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
2170
+ [entityId]
2171
+ );
2172
+ deletedEntryIds.push(...entriesToDelete.map((e) => e.id));
2173
+ const [entriesRes, tasksRes] = await Promise.all([
2174
+ this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId]),
2175
+ this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId])
2176
+ ]);
2177
+ await this.db.runAsync(`UPDATE ${this.prefix}checkpoints SET memory_checkpoint = 0, heal_checkpoint = 0 WHERE entity_id = ?`, [entityId]);
2178
+ deletedEntries = entriesRes.changes;
2179
+ deletedTasks = tasksRes.changes;
2180
+ } else {
2181
+ const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
2182
+ const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
2183
+ if (hasIdSelectors && hasSourceSelectors) {
2184
+ throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
1372
2185
  }
1373
- if (sourceHash) {
1374
- q += ` AND source_hash = ?`;
1375
- args.push(sourceHash);
2186
+ const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
2187
+ if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
2188
+ const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
2189
+ if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
2190
+ if (params.entryId) {
2191
+ const entry = await this.db.getFirstAsync(
2192
+ `SELECT id FROM ${this.prefix}entries WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`,
2193
+ [params.entryId, entityId]
2194
+ );
2195
+ if (entry) deletedEntryIds.push(entry.id);
2196
+ }
2197
+ if (sourceRef || sourceHash) {
2198
+ let q = `SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`;
2199
+ const args = [entityId];
2200
+ if (sourceRef) {
2201
+ q += ` AND source_ref = ?`;
2202
+ args.push(sourceRef);
2203
+ }
2204
+ if (sourceHash) {
2205
+ q += ` AND source_hash = ?`;
2206
+ args.push(sourceHash);
2207
+ }
2208
+ const entriesToDelete = await this.db.getAllAsync(q, args);
2209
+ deletedEntryIds.push(...entriesToDelete.map((e) => e.id));
2210
+ }
2211
+ const entryPromise = params.entryId ? this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.entryId, entityId]) : null;
2212
+ const taskPromise = params.taskId ? this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.taskId, entityId]) : null;
2213
+ let refPromise = null;
2214
+ if (sourceRef || sourceHash) {
2215
+ let q = `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`;
2216
+ const args = [now, now, entityId];
2217
+ if (sourceRef) {
2218
+ q += ` AND source_ref = ?`;
2219
+ args.push(sourceRef);
2220
+ }
2221
+ if (sourceHash) {
2222
+ q += ` AND source_hash = ?`;
2223
+ args.push(sourceHash);
2224
+ }
2225
+ refPromise = this.db.runAsync(q, args);
1376
2226
  }
1377
- refPromise = this.db.runAsync(q, args);
2227
+ const [entryResult, taskResult, refResult] = await Promise.all([
2228
+ entryPromise ?? Promise.resolve(null),
2229
+ taskPromise ?? Promise.resolve(null),
2230
+ refPromise ?? Promise.resolve(null)
2231
+ ]);
2232
+ if (entryResult) deletedEntries += entryResult.changes;
2233
+ if (taskResult) deletedTasks += taskResult.changes;
2234
+ if (refResult) deletedEntries += refResult.changes;
1378
2235
  }
1379
- const [entryResult, taskResult, refResult] = await Promise.all([
1380
- entryPromise ?? Promise.resolve(null),
1381
- taskPromise ?? Promise.resolve(null),
1382
- refPromise ?? Promise.resolve(null)
1383
- ]);
1384
- if (entryResult) deletedEntries += entryResult.changes;
1385
- if (taskResult) deletedTasks += taskResult.changes;
1386
- if (refResult) deletedEntries += refResult.changes;
2236
+ await this.rebuildMiniSearchIndex(entityId);
2237
+ this.vectorCache.delete(entityId);
2238
+ const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
2239
+ for (const factId of uniqueDeletedIds) {
2240
+ try {
2241
+ await this._notifyEmbeddingPersisted(entityId, factId, null);
2242
+ } catch (hookErr) {
2243
+ console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during forget for ${factId}:`, hookErr);
2244
+ }
2245
+ }
2246
+ return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
2247
+ } finally {
2248
+ this.activeMaintenanceJobs.delete(forgetKey);
1387
2249
  }
1388
- await this.rebuildMiniSearchIndex(entityId);
1389
- return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
1390
2250
  }
1391
2251
  async ingestDocument(entityId, params) {
1392
2252
  const sourceRef = normalizeSourceRef(params.sourceRef);
@@ -1414,6 +2274,12 @@ The following document anchors are provided for contradiction detection only. Do
1414
2274
  if (this._isReembedActive(entityId)) {
1415
2275
  throw new WikiBusyError("reembed", entityId);
1416
2276
  }
2277
+ if (this._isImportActiveFor(entityId)) {
2278
+ throw new WikiBusyError("import", entityId);
2279
+ }
2280
+ if (this._isForgetActiveFor(entityId)) {
2281
+ throw new WikiBusyError("forget", entityId);
2282
+ }
1417
2283
  this.activeIngestJobs.add(jobKey);
1418
2284
  try {
1419
2285
  const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
@@ -1446,7 +2312,15 @@ ${chunk}`;
1446
2312
  }
1447
2313
  const now = Date.now();
1448
2314
  const insertedFacts = [];
2315
+ const deletedSourceFactIds = [];
1449
2316
  await this.db.withTransactionAsync(async () => {
2317
+ const existingSourceFacts = await this.db.getAllAsync(
2318
+ `SELECT id FROM ${this.prefix}entries WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
2319
+ [sourceRef, entityId]
2320
+ );
2321
+ for (const row of existingSourceFacts) {
2322
+ deletedSourceFactIds.push(row.id);
2323
+ }
1450
2324
  await this.db.runAsync(
1451
2325
  `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
1452
2326
  [now, now, sourceRef, entityId]
@@ -1458,19 +2332,42 @@ ${chunk}`;
1458
2332
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
1459
2333
  [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]
1460
2334
  );
1461
- insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
2335
+ insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
1462
2336
  }
1463
2337
  });
2338
+ await this.rebuildMiniSearchIndex(entityId);
2339
+ this.vectorCache.delete(entityId);
2340
+ const uniqueDeletedSourceFactIds = Array.from(new Set(deletedSourceFactIds));
2341
+ for (const factId of uniqueDeletedSourceFactIds) {
2342
+ try {
2343
+ await this._notifyEmbeddingPersisted(entityId, factId, null);
2344
+ } catch (hookErr) {
2345
+ console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during ingest for ${factId}:`, hookErr);
2346
+ }
2347
+ }
1464
2348
  for (const fact of insertedFacts) {
1465
2349
  await this.embedFact(fact);
1466
2350
  }
1467
- await this.rebuildMiniSearchIndex(entityId);
2351
+ this.vectorCache.delete(entityId);
1468
2352
  return { truncated, chunks: chunks.length };
1469
2353
  } finally {
1470
2354
  this.activeIngestJobs.delete(jobKey);
1471
2355
  }
1472
2356
  }
1473
2357
  };
2358
+ /**
2359
+ * Maximum number of entities whose parsed embedding vectors are held in
2360
+ * memory. This cap is intentionally conservative so the cache remains safe
2361
+ * on memory-constrained runtimes (e.g., mobile/Expo).
2362
+ */
2363
+ _WikiMemory.MAX_VECTOR_CACHE_ENTITIES = 16;
2364
+ /**
2365
+ * Maximum number of fact vectors cached per entity. Keep this high enough to
2366
+ * preserve the parsed-embedding reuse optimization for common mid-sized
2367
+ * entities while still maintaining a bounded memory footprint.
2368
+ */
2369
+ _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
2370
+ var WikiMemory = _WikiMemory;
1474
2371
 
1475
2372
  // src/utils/formatContext.ts
1476
2373
  function validateMaxOption(value, name) {
@@ -1666,8 +2563,23 @@ function formatMemoryDump(dump) {
1666
2563
  name: formatEntityFileName(entityId),
1667
2564
  content: renderEntity(entityId, bundle, dump.generatedAt)
1668
2565
  }));
2566
+ const manifestDump = {
2567
+ generatedAt: dump.generatedAt,
2568
+ entities: Object.fromEntries(
2569
+ Object.entries(dump.entities).map(([entityId, bundle]) => [
2570
+ entityId,
2571
+ {
2572
+ ...bundle,
2573
+ facts: bundle.facts.map((f) => {
2574
+ const { embedding_blob: _blob, ...rest } = f;
2575
+ return rest;
2576
+ })
2577
+ }
2578
+ ])
2579
+ )
2580
+ };
1669
2581
  return {
1670
- manifest: JSON.stringify(dump, null, 2),
2582
+ manifest: JSON.stringify(manifestDump, null, 2),
1671
2583
  files
1672
2584
  };
1673
2585
  }