@equationalapplications/core-llm-wiki 2.6.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +329 -1
- package/dist/index.d.mts +184 -11
- package/dist/index.d.ts +184 -11
- package/dist/index.js +1134 -222
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1134 -222
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -24,7 +24,8 @@ async function setupDatabase(db, prefix) {
|
|
|
24
24
|
last_accessed_at INTEGER,
|
|
25
25
|
access_count INTEGER NOT NULL DEFAULT 0,
|
|
26
26
|
deleted_at INTEGER,
|
|
27
|
-
embedding TEXT
|
|
27
|
+
embedding TEXT,
|
|
28
|
+
embedding_blob BLOB
|
|
28
29
|
);
|
|
29
30
|
|
|
30
31
|
CREATE INDEX IF NOT EXISTS ${prefix}entries_entity_idx ON ${prefix}entries(entity_id);
|
|
@@ -97,6 +98,20 @@ var MIGRATIONS = [
|
|
|
97
98
|
await db.execAsync(`ALTER TABLE ${prefix}entries ADD COLUMN embedding TEXT`);
|
|
98
99
|
}
|
|
99
100
|
}
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
version: 3,
|
|
104
|
+
description: "Add embedding_blob BLOB column for Float32Array vector storage",
|
|
105
|
+
run: async (db, prefix) => {
|
|
106
|
+
const cols = await db.getAllAsync(
|
|
107
|
+
`PRAGMA table_info(${prefix}entries)`
|
|
108
|
+
);
|
|
109
|
+
if (!cols.some((c) => c.name === "embedding_blob")) {
|
|
110
|
+
await db.execAsync(
|
|
111
|
+
`ALTER TABLE ${prefix}entries ADD COLUMN embedding_blob BLOB`
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
100
115
|
}
|
|
101
116
|
];
|
|
102
117
|
for (let i = 1; i < MIGRATIONS.length; i++) {
|
|
@@ -154,6 +169,34 @@ function cosineSimilarity(a, b) {
|
|
|
154
169
|
return denom === 0 ? 0 : dot / denom;
|
|
155
170
|
}
|
|
156
171
|
|
|
172
|
+
// src/utils/embedding.ts
|
|
173
|
+
function parseEmbedding(blob, text) {
|
|
174
|
+
if (blob && blob.byteLength > 0) {
|
|
175
|
+
if (blob.byteLength % 4 !== 0) return null;
|
|
176
|
+
const copy = new ArrayBuffer(blob.byteLength);
|
|
177
|
+
new Uint8Array(copy).set(blob);
|
|
178
|
+
const vector = new Float32Array(copy);
|
|
179
|
+
for (const value of vector) {
|
|
180
|
+
if (!Number.isFinite(value)) return null;
|
|
181
|
+
}
|
|
182
|
+
return vector;
|
|
183
|
+
}
|
|
184
|
+
if (text) {
|
|
185
|
+
try {
|
|
186
|
+
const arr = JSON.parse(text);
|
|
187
|
+
if (!Array.isArray(arr) || !arr.every((v) => typeof v === "number" && isFinite(v))) return null;
|
|
188
|
+
const vector = new Float32Array(arr);
|
|
189
|
+
for (const value of vector) {
|
|
190
|
+
if (!Number.isFinite(value)) return null;
|
|
191
|
+
}
|
|
192
|
+
return vector;
|
|
193
|
+
} catch {
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return null;
|
|
198
|
+
}
|
|
199
|
+
|
|
157
200
|
// src/WikiMemory.ts
|
|
158
201
|
function parseJsonResponse(text) {
|
|
159
202
|
const firstBrace = text.indexOf("{");
|
|
@@ -358,7 +401,7 @@ function jaccardScore(a, b) {
|
|
|
358
401
|
}
|
|
359
402
|
var FUZZY_THRESHOLD = 0.5;
|
|
360
403
|
var MIN_TOKENS_TO_QUALIFY = 3;
|
|
361
|
-
var
|
|
404
|
+
var _WikiMemory = class _WikiMemory {
|
|
362
405
|
constructor(db, options) {
|
|
363
406
|
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
364
407
|
this.activeIngestJobs = /* @__PURE__ */ new Set();
|
|
@@ -372,6 +415,7 @@ var WikiMemory = class {
|
|
|
372
415
|
}
|
|
373
416
|
});
|
|
374
417
|
this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
|
|
418
|
+
this.vectorCache = /* @__PURE__ */ new Map();
|
|
375
419
|
this.db = db;
|
|
376
420
|
this.options = options;
|
|
377
421
|
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
@@ -438,10 +482,6 @@ var WikiMemory = class {
|
|
|
438
482
|
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
439
483
|
[String(dim)]
|
|
440
484
|
);
|
|
441
|
-
} else {
|
|
442
|
-
await this.db.runAsync(
|
|
443
|
-
`DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
444
|
-
);
|
|
445
485
|
}
|
|
446
486
|
} else {
|
|
447
487
|
await this.db.runAsync(
|
|
@@ -460,7 +500,18 @@ var WikiMemory = class {
|
|
|
460
500
|
const mismatch = await this.db.getFirstAsync(
|
|
461
501
|
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
462
502
|
);
|
|
463
|
-
if (mismatch)
|
|
503
|
+
if (!mismatch) return;
|
|
504
|
+
const newDim = parseInt(mismatch.value, 10);
|
|
505
|
+
const residual = await this.db.getFirstAsync(
|
|
506
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
507
|
+
WHERE deleted_at IS NULL
|
|
508
|
+
AND (
|
|
509
|
+
(embedding_blob IS NOT NULL AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?)
|
|
510
|
+
OR (embedding_blob IS NULL AND embedding IS NOT NULL)
|
|
511
|
+
)`,
|
|
512
|
+
[newDim]
|
|
513
|
+
);
|
|
514
|
+
if (!residual || residual.cnt === 0) {
|
|
464
515
|
await this.db.runAsync(
|
|
465
516
|
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
|
|
466
517
|
[mismatch.value]
|
|
@@ -491,11 +542,29 @@ var WikiMemory = class {
|
|
|
491
542
|
console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
|
|
492
543
|
return false;
|
|
493
544
|
}
|
|
494
|
-
|
|
545
|
+
const float32Vector = new Float32Array(vector);
|
|
546
|
+
let hasNonFinite = false;
|
|
547
|
+
for (let i = 0; i < float32Vector.length; i++) {
|
|
548
|
+
if (!isFinite(float32Vector[i])) {
|
|
549
|
+
hasNonFinite = true;
|
|
550
|
+
break;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
if (hasNonFinite) {
|
|
554
|
+
console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
|
|
555
|
+
return false;
|
|
556
|
+
}
|
|
557
|
+
await this.storeEmbeddingDimension(float32Vector.length);
|
|
558
|
+
const blob = new Uint8Array(float32Vector.buffer);
|
|
495
559
|
await this.db.runAsync(
|
|
496
|
-
`UPDATE ${this.prefix}entries SET embedding =
|
|
497
|
-
[
|
|
560
|
+
`UPDATE ${this.prefix}entries SET embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
561
|
+
[blob, fact.id]
|
|
498
562
|
);
|
|
563
|
+
try {
|
|
564
|
+
await this._notifyEmbeddingPersisted(fact.entity_id, fact.id, float32Vector);
|
|
565
|
+
} catch (hookErr) {
|
|
566
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for ${fact.id}:`, hookErr);
|
|
567
|
+
}
|
|
499
568
|
return true;
|
|
500
569
|
} catch (err) {
|
|
501
570
|
console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
|
|
@@ -511,6 +580,9 @@ var WikiMemory = class {
|
|
|
511
580
|
_warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
|
|
512
581
|
console.warn(`[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`);
|
|
513
582
|
}
|
|
583
|
+
async _notifyEmbeddingPersisted(entityId, factId, vector) {
|
|
584
|
+
await this.options.vectorRanker?.onEmbeddingPersisted?.({ entityId, factId, vector });
|
|
585
|
+
}
|
|
514
586
|
async setup() {
|
|
515
587
|
const entriesExistedBeforeSetup = await this.db.getFirstAsync(
|
|
516
588
|
`SELECT name FROM sqlite_master WHERE type='table' AND name=?`,
|
|
@@ -614,9 +686,24 @@ var WikiMemory = class {
|
|
|
614
686
|
_globalReembedKey() {
|
|
615
687
|
return `${this.prefix}:reembed`;
|
|
616
688
|
}
|
|
689
|
+
_importKey(entityId) {
|
|
690
|
+
return `${this.prefix}:${entityId}:import`;
|
|
691
|
+
}
|
|
692
|
+
_globalImportKey() {
|
|
693
|
+
return `${this.prefix}:import`;
|
|
694
|
+
}
|
|
695
|
+
_forgetKey(entityId) {
|
|
696
|
+
return `${this.prefix}:${entityId}:forget`;
|
|
697
|
+
}
|
|
617
698
|
_isReembedActive(entityId) {
|
|
618
699
|
return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
|
|
619
700
|
}
|
|
701
|
+
_isImportActiveFor(entityId) {
|
|
702
|
+
return this.activeMaintenanceJobs.has(this._importKey(entityId)) || this.activeMaintenanceJobs.has(this._globalImportKey());
|
|
703
|
+
}
|
|
704
|
+
_isForgetActiveFor(entityId) {
|
|
705
|
+
return this.activeMaintenanceJobs.has(this._forgetKey(entityId));
|
|
706
|
+
}
|
|
620
707
|
/** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
|
|
621
708
|
_isAnyMaintenanceActiveWithSuffix(suffix) {
|
|
622
709
|
const entityKeyPrefix = `${this.prefix}:`;
|
|
@@ -659,6 +746,10 @@ var WikiMemory = class {
|
|
|
659
746
|
blockingOperation = "reembed";
|
|
660
747
|
} else if (isIngestRunning) {
|
|
661
748
|
blockingOperation = "ingest";
|
|
749
|
+
} else if (this._isImportActiveFor(entityId)) {
|
|
750
|
+
blockingOperation = "import";
|
|
751
|
+
} else if (this._isForgetActiveFor(entityId)) {
|
|
752
|
+
blockingOperation = "forget";
|
|
662
753
|
}
|
|
663
754
|
if (blockingOperation !== null) {
|
|
664
755
|
throw new WikiBusyError(blockingOperation, entityId);
|
|
@@ -674,8 +765,15 @@ var WikiMemory = class {
|
|
|
674
765
|
let deletedEntries = 0;
|
|
675
766
|
let deletedTasks = 0;
|
|
676
767
|
let deletedEvents = 0;
|
|
768
|
+
const deletedEntryIds = [];
|
|
677
769
|
if (retainSoftDeletedFor !== null) {
|
|
678
770
|
const cutoff = now - retainSoftDeletedFor * 864e5;
|
|
771
|
+
const entriesToDelete = await this.db.getAllAsync(
|
|
772
|
+
`SELECT id FROM ${this.prefix}entries
|
|
773
|
+
WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at < ?`,
|
|
774
|
+
[entityId, cutoff]
|
|
775
|
+
);
|
|
776
|
+
deletedEntryIds.push(...entriesToDelete.map((e) => e.id));
|
|
679
777
|
const entryResult = await this.db.runAsync(
|
|
680
778
|
`DELETE FROM ${this.prefix}entries
|
|
681
779
|
WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at < ?`,
|
|
@@ -703,19 +801,39 @@ var WikiMemory = class {
|
|
|
703
801
|
await this.db.execAsync(`VACUUM`);
|
|
704
802
|
}
|
|
705
803
|
await this.rebuildMiniSearchIndex(entityId);
|
|
804
|
+
this.vectorCache.delete(entityId);
|
|
805
|
+
const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
|
|
806
|
+
for (const factId of uniqueDeletedIds) {
|
|
807
|
+
try {
|
|
808
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
809
|
+
} catch (hookErr) {
|
|
810
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during prune for ${factId}:`, hookErr);
|
|
811
|
+
}
|
|
812
|
+
}
|
|
706
813
|
return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
|
|
707
814
|
} finally {
|
|
708
815
|
this.activeMaintenanceJobs.delete(pruneKey);
|
|
709
816
|
}
|
|
710
817
|
}
|
|
711
|
-
async read(entityId, query) {
|
|
712
|
-
const
|
|
818
|
+
async read(entityId, query, options) {
|
|
819
|
+
const config = this.options.config;
|
|
820
|
+
const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
|
|
821
|
+
const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
|
|
822
|
+
const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
|
|
823
|
+
const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
|
|
824
|
+
const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
|
|
825
|
+
const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
|
|
826
|
+
const skipEmbed = weight === 0;
|
|
713
827
|
const embedFn = this.options.llmProvider.embed;
|
|
714
828
|
const trimmedQuery = query.trim();
|
|
715
829
|
let facts = [];
|
|
716
|
-
if (trimmedQuery) {
|
|
830
|
+
if (maxResults === 0) ; else if (trimmedQuery) {
|
|
717
831
|
let usedEmbed = false;
|
|
718
|
-
if (embedFn) {
|
|
832
|
+
if (!skipEmbed && embedFn) {
|
|
833
|
+
let rankerShouldRethrow = false;
|
|
834
|
+
let pendingRankerFallbackError;
|
|
835
|
+
let usedKeywordFallback = false;
|
|
836
|
+
let scoredAlreadySortedAndLimited = false;
|
|
719
837
|
try {
|
|
720
838
|
const queryVec = await embedFn(trimmedQuery);
|
|
721
839
|
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
@@ -734,51 +852,350 @@ var WikiMemory = class {
|
|
|
734
852
|
);
|
|
735
853
|
}
|
|
736
854
|
}
|
|
737
|
-
const
|
|
738
|
-
`SELECT
|
|
739
|
-
|
|
855
|
+
const mismatchedCount = await this.db.getFirstAsync(
|
|
856
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
857
|
+
WHERE entity_id = ? AND deleted_at IS NULL
|
|
858
|
+
AND embedding_blob IS NOT NULL
|
|
859
|
+
AND (CAST(length(embedding_blob) AS INTEGER) % 4 = 0)
|
|
860
|
+
AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?`,
|
|
861
|
+
[entityId, queryVec.length]
|
|
740
862
|
);
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
863
|
+
if (mismatchedCount && mismatchedCount.cnt > 0) {
|
|
864
|
+
throw new Error(
|
|
865
|
+
`Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
|
|
866
|
+
);
|
|
867
|
+
}
|
|
868
|
+
const useRanker = Boolean(this.options.vectorRanker);
|
|
869
|
+
let candidateRows;
|
|
870
|
+
let populateCache = true;
|
|
871
|
+
let miniSearchScores;
|
|
872
|
+
if (effectivePreFilterLimit !== void 0) {
|
|
873
|
+
populateCache = false;
|
|
874
|
+
const preResults = this.miniSearch.search(trimmedQuery, {
|
|
875
|
+
filter: (r) => r.entity_id === entityId,
|
|
876
|
+
combineWith: "OR"
|
|
877
|
+
});
|
|
878
|
+
if (preResults.length === 0) {
|
|
879
|
+
candidateRows = null;
|
|
880
|
+
} else {
|
|
881
|
+
const topKResults = preResults.slice(0, effectivePreFilterLimit);
|
|
882
|
+
if (topKResults.length === 0) {
|
|
883
|
+
candidateRows = null;
|
|
884
|
+
} else {
|
|
885
|
+
const topKIds = topKResults.map((r) => r.id);
|
|
886
|
+
const inClauseChunkSize = 500;
|
|
887
|
+
if (useRanker) {
|
|
888
|
+
const rows = [];
|
|
889
|
+
for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
|
|
890
|
+
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
891
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
892
|
+
const chunkRows = await this.db.getAllAsync(
|
|
893
|
+
`SELECT id, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
894
|
+
idChunk
|
|
895
|
+
);
|
|
896
|
+
rows.push(...chunkRows);
|
|
897
|
+
}
|
|
898
|
+
candidateRows = rows;
|
|
899
|
+
} else {
|
|
900
|
+
const rows = [];
|
|
901
|
+
for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
|
|
902
|
+
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
903
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
904
|
+
const chunkRows = await this.db.getAllAsync(
|
|
905
|
+
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
906
|
+
idChunk
|
|
907
|
+
);
|
|
908
|
+
rows.push(...chunkRows);
|
|
909
|
+
}
|
|
910
|
+
candidateRows = rows;
|
|
911
|
+
}
|
|
912
|
+
if (weight !== void 0 && weight < 1) {
|
|
913
|
+
const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
|
|
914
|
+
miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
748
915
|
}
|
|
749
|
-
} catch {
|
|
750
916
|
}
|
|
751
917
|
}
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
918
|
+
} else {
|
|
919
|
+
if (useRanker) {
|
|
920
|
+
candidateRows = await this.db.getAllAsync(
|
|
921
|
+
`SELECT id, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
922
|
+
[entityId]
|
|
923
|
+
);
|
|
924
|
+
} else {
|
|
925
|
+
candidateRows = await this.db.getAllAsync(
|
|
926
|
+
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
927
|
+
[entityId]
|
|
928
|
+
);
|
|
758
929
|
}
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
930
|
+
if (weight !== void 0 && weight < 1) {
|
|
931
|
+
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
932
|
+
filter: (r) => r.entity_id === entityId,
|
|
933
|
+
combineWith: "OR"
|
|
934
|
+
});
|
|
935
|
+
const maxMsScore = Math.max(1, msResults[0]?.score ?? 1);
|
|
936
|
+
miniSearchScores = new Map(msResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
937
|
+
}
|
|
938
|
+
}
|
|
939
|
+
if (candidateRows === null) {
|
|
940
|
+
usedEmbed = true;
|
|
941
|
+
} else {
|
|
942
|
+
let scored;
|
|
943
|
+
if (useRanker) {
|
|
944
|
+
const candidateIds = effectivePreFilterLimit !== void 0 ? candidateRows.map((r) => r.id) : void 0;
|
|
945
|
+
try {
|
|
946
|
+
const oversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
947
|
+
scored = await this._rankWithVectorRanker({
|
|
948
|
+
entityId,
|
|
949
|
+
queryVec,
|
|
950
|
+
candidateIds,
|
|
951
|
+
weight,
|
|
952
|
+
miniSearchScores,
|
|
953
|
+
limit: oversampledLimit
|
|
954
|
+
});
|
|
955
|
+
if (scored.length > 0) {
|
|
956
|
+
const scoredIds2 = new Set(scored.map((s) => s.id));
|
|
957
|
+
const metaMap = /* @__PURE__ */ new Map();
|
|
958
|
+
for (const r of candidateRows) {
|
|
959
|
+
if (scoredIds2.has(r.id)) {
|
|
960
|
+
metaMap.set(r.id, { updated_at: r.updated_at, access_count: r.access_count });
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
scored = scored.map((s) => {
|
|
964
|
+
const meta = metaMap.get(s.id);
|
|
965
|
+
return { ...s, updated_at: meta?.updated_at ?? null, access_count: meta?.access_count ?? null };
|
|
966
|
+
});
|
|
967
|
+
}
|
|
968
|
+
const scoredIds = new Set(scored.map((s) => s.id));
|
|
969
|
+
const isHybrid = weight !== void 0 && weight < 1;
|
|
970
|
+
const maxBackfill = isHybrid ? maxResults : Math.max(0, maxResults - scored.length);
|
|
971
|
+
if (maxBackfill > 0) {
|
|
972
|
+
if (isHybrid) {
|
|
973
|
+
const topK = [];
|
|
974
|
+
for (const row of candidateRows) {
|
|
975
|
+
if (scoredIds.has(row.id)) continue;
|
|
976
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
977
|
+
const candidate = { row, kwScore };
|
|
978
|
+
if (topK.length < maxBackfill) {
|
|
979
|
+
let insertIdx = topK.length;
|
|
980
|
+
for (let i = 0; i < topK.length; i++) {
|
|
981
|
+
const cmp = this._compareScoredRows(
|
|
982
|
+
{
|
|
983
|
+
id: candidate.row.id,
|
|
984
|
+
score: candidate.kwScore,
|
|
985
|
+
updated_at: candidate.row.updated_at,
|
|
986
|
+
access_count: candidate.row.access_count
|
|
987
|
+
},
|
|
988
|
+
{
|
|
989
|
+
id: topK[i].row.id,
|
|
990
|
+
score: topK[i].kwScore,
|
|
991
|
+
updated_at: topK[i].row.updated_at,
|
|
992
|
+
access_count: topK[i].row.access_count
|
|
993
|
+
}
|
|
994
|
+
);
|
|
995
|
+
if (cmp < 0) {
|
|
996
|
+
insertIdx = i;
|
|
997
|
+
break;
|
|
998
|
+
}
|
|
999
|
+
}
|
|
1000
|
+
topK.splice(insertIdx, 0, candidate);
|
|
1001
|
+
} else {
|
|
1002
|
+
const cmpWorst = this._compareScoredRows(
|
|
1003
|
+
{
|
|
1004
|
+
id: candidate.row.id,
|
|
1005
|
+
score: candidate.kwScore,
|
|
1006
|
+
updated_at: candidate.row.updated_at,
|
|
1007
|
+
access_count: candidate.row.access_count
|
|
1008
|
+
},
|
|
1009
|
+
{
|
|
1010
|
+
id: topK[maxBackfill - 1].row.id,
|
|
1011
|
+
score: topK[maxBackfill - 1].kwScore,
|
|
1012
|
+
updated_at: topK[maxBackfill - 1].row.updated_at,
|
|
1013
|
+
access_count: topK[maxBackfill - 1].row.access_count
|
|
1014
|
+
}
|
|
1015
|
+
);
|
|
1016
|
+
if (cmpWorst < 0) {
|
|
1017
|
+
let insertIdx = maxBackfill - 1;
|
|
1018
|
+
for (let i = 0; i < topK.length; i++) {
|
|
1019
|
+
const cmp = this._compareScoredRows(
|
|
1020
|
+
{
|
|
1021
|
+
id: candidate.row.id,
|
|
1022
|
+
score: candidate.kwScore,
|
|
1023
|
+
updated_at: candidate.row.updated_at,
|
|
1024
|
+
access_count: candidate.row.access_count
|
|
1025
|
+
},
|
|
1026
|
+
{
|
|
1027
|
+
id: topK[i].row.id,
|
|
1028
|
+
score: topK[i].kwScore,
|
|
1029
|
+
updated_at: topK[i].row.updated_at,
|
|
1030
|
+
access_count: topK[i].row.access_count
|
|
1031
|
+
}
|
|
1032
|
+
);
|
|
1033
|
+
if (cmp < 0) {
|
|
1034
|
+
insertIdx = i;
|
|
1035
|
+
break;
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
topK.splice(insertIdx, 0, candidate);
|
|
1039
|
+
topK.pop();
|
|
1040
|
+
}
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
for (const { row, kwScore } of topK) {
|
|
1044
|
+
scored.push({
|
|
1045
|
+
id: row.id,
|
|
1046
|
+
score: (1 - weight) * kwScore,
|
|
1047
|
+
updated_at: row.updated_at,
|
|
1048
|
+
access_count: row.access_count
|
|
1049
|
+
});
|
|
1050
|
+
}
|
|
1051
|
+
} else {
|
|
1052
|
+
const omitted = [];
|
|
1053
|
+
for (const row of candidateRows) {
|
|
1054
|
+
if (scoredIds.has(row.id)) continue;
|
|
1055
|
+
omitted.push({ id: row.id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
|
|
1056
|
+
}
|
|
1057
|
+
if (omitted.length > 0) {
|
|
1058
|
+
this._tieBreakSort(omitted);
|
|
1059
|
+
scored.push(...omitted.slice(0, maxBackfill));
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
} catch (rankerErr) {
|
|
1064
|
+
const rankerError = rankerErr instanceof Error ? rankerErr : new Error(String(rankerErr));
|
|
1065
|
+
const policy = this.options.vectorRankerFallback ?? "js-cosine";
|
|
1066
|
+
this.options.onVectorRankerFallback?.({ error: rankerError, policy });
|
|
1067
|
+
if (policy === "throw") {
|
|
1068
|
+
rankerShouldRethrow = true;
|
|
1069
|
+
throw rankerError;
|
|
1070
|
+
} else if (policy === "js-cosine") {
|
|
1071
|
+
let fallbackRows = candidateRows;
|
|
1072
|
+
if (fallbackRows && fallbackRows.length > 0 && !("embedding_blob" in fallbackRows[0])) {
|
|
1073
|
+
const rowIds = fallbackRows.map((r) => r.id);
|
|
1074
|
+
const embeddingsMap = /* @__PURE__ */ new Map();
|
|
1075
|
+
const chunkSize = 500;
|
|
1076
|
+
for (let i = 0; i < rowIds.length; i += chunkSize) {
|
|
1077
|
+
const idChunk = rowIds.slice(i, i + chunkSize);
|
|
1078
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1079
|
+
const embeddingRows = await this.db.getAllAsync(
|
|
1080
|
+
`SELECT id, embedding_blob, embedding FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1081
|
+
[...idChunk, entityId]
|
|
1082
|
+
);
|
|
1083
|
+
for (const row of embeddingRows) {
|
|
1084
|
+
embeddingsMap.set(row.id, { embedding_blob: row.embedding_blob, embedding: row.embedding });
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
fallbackRows = fallbackRows.map((r) => ({
|
|
1088
|
+
...r,
|
|
1089
|
+
embedding_blob: embeddingsMap.get(r.id)?.embedding_blob ?? null,
|
|
1090
|
+
embedding: embeddingsMap.get(r.id)?.embedding ?? null
|
|
1091
|
+
}));
|
|
1092
|
+
}
|
|
1093
|
+
scored = await this._rankWithJsCosine({
|
|
1094
|
+
entityId,
|
|
1095
|
+
queryVec,
|
|
1096
|
+
candidateRows: fallbackRows,
|
|
1097
|
+
weight,
|
|
1098
|
+
miniSearchScores,
|
|
1099
|
+
populateCache,
|
|
1100
|
+
limit: maxResults
|
|
1101
|
+
});
|
|
1102
|
+
scoredAlreadySortedAndLimited = true;
|
|
1103
|
+
} else if (policy === "keyword") {
|
|
1104
|
+
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
1105
|
+
filter: (r) => r.entity_id === entityId,
|
|
1106
|
+
combineWith: "OR"
|
|
1107
|
+
});
|
|
1108
|
+
const topResults = msResults.slice(0, maxResults);
|
|
1109
|
+
const resultIds = new Set(topResults.map((r) => r.id));
|
|
1110
|
+
const candidateMap = /* @__PURE__ */ new Map();
|
|
1111
|
+
for (const r of candidateRows) {
|
|
1112
|
+
if (resultIds.has(r.id)) {
|
|
1113
|
+
candidateMap.set(r.id, { updated_at: r.updated_at, access_count: r.access_count });
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
scored = topResults.map((r) => {
|
|
1117
|
+
const meta = candidateMap.get(r.id);
|
|
1118
|
+
return {
|
|
1119
|
+
id: r.id,
|
|
1120
|
+
score: r.score ?? 0,
|
|
1121
|
+
access_count: meta?.access_count ?? null,
|
|
1122
|
+
updated_at: meta?.updated_at ?? null
|
|
1123
|
+
};
|
|
1124
|
+
});
|
|
1125
|
+
usedKeywordFallback = true;
|
|
1126
|
+
} else {
|
|
1127
|
+
scored = [];
|
|
1128
|
+
}
|
|
1129
|
+
if (this.options.propagateRankerFailureToRetrievalFallback) {
|
|
1130
|
+
const mirrored = new Error("Vector ranker failed, falling back");
|
|
1131
|
+
mirrored.cause = rankerError;
|
|
1132
|
+
pendingRankerFallbackError = mirrored;
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
} else {
|
|
1136
|
+
scored = await this._rankWithJsCosine({
|
|
1137
|
+
entityId,
|
|
1138
|
+
queryVec,
|
|
1139
|
+
candidateRows,
|
|
1140
|
+
weight,
|
|
1141
|
+
miniSearchScores,
|
|
1142
|
+
populateCache,
|
|
1143
|
+
limit: maxResults
|
|
1144
|
+
});
|
|
1145
|
+
scoredAlreadySortedAndLimited = true;
|
|
762
1146
|
}
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
1147
|
+
if (scored.length > 0) {
|
|
1148
|
+
if (!usedKeywordFallback && !scoredAlreadySortedAndLimited) {
|
|
1149
|
+
this._tieBreakSort(scored);
|
|
1150
|
+
}
|
|
1151
|
+
const topIds = (scoredAlreadySortedAndLimited ? scored : scored.slice(0, maxResults)).map((s) => s.id);
|
|
1152
|
+
if (topIds.length > 0) {
|
|
1153
|
+
const fullRows = [];
|
|
1154
|
+
const phase2ChunkSize = 500;
|
|
1155
|
+
for (let i = 0; i < topIds.length; i += phase2ChunkSize) {
|
|
1156
|
+
const idChunk = topIds.slice(i, i + phase2ChunkSize);
|
|
1157
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1158
|
+
const chunkRows = await this.db.getAllAsync(
|
|
1159
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1160
|
+
[...idChunk, entityId]
|
|
1161
|
+
);
|
|
1162
|
+
fullRows.push(...chunkRows);
|
|
1163
|
+
}
|
|
1164
|
+
const byId = new Map(fullRows.map((r) => [r.id, r]));
|
|
1165
|
+
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
1166
|
+
if (facts.length < topIds.length) {
|
|
1167
|
+
const missingIds = topIds.filter((id) => !byId.has(id));
|
|
1168
|
+
const missingCount = missingIds.length;
|
|
1169
|
+
const sample = missingIds.slice(0, 5);
|
|
1170
|
+
const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
|
|
1171
|
+
const error = new Error(
|
|
1172
|
+
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs for entity ${entityId}. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist for this entity.` + sampleSuffix
|
|
1173
|
+
);
|
|
1174
|
+
this.options.onRetrievalFallback?.(error);
|
|
1175
|
+
}
|
|
1176
|
+
}
|
|
1177
|
+
if (pendingRankerFallbackError) {
|
|
1178
|
+
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
1179
|
+
pendingRankerFallbackError = void 0;
|
|
1180
|
+
}
|
|
1181
|
+
usedEmbed = true;
|
|
1182
|
+
} else {
|
|
1183
|
+
if (pendingRankerFallbackError) {
|
|
1184
|
+
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
1185
|
+
pendingRankerFallbackError = void 0;
|
|
1186
|
+
}
|
|
1187
|
+
usedEmbed = true;
|
|
766
1188
|
}
|
|
767
|
-
return a.row.id.localeCompare(b.row.id);
|
|
768
|
-
});
|
|
769
|
-
const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
|
|
770
|
-
if (topIds.length > 0) {
|
|
771
|
-
const placeholders = topIds.map(() => "?").join(",");
|
|
772
|
-
const fullRows = await this.db.getAllAsync(
|
|
773
|
-
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
774
|
-
topIds
|
|
775
|
-
);
|
|
776
|
-
const byId = new Map(fullRows.map((r) => [r.id, r]));
|
|
777
|
-
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
778
1189
|
}
|
|
779
|
-
usedEmbed = true;
|
|
780
1190
|
} catch (err) {
|
|
781
1191
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
1192
|
+
if (rankerShouldRethrow) {
|
|
1193
|
+
throw error;
|
|
1194
|
+
}
|
|
1195
|
+
if (pendingRankerFallbackError) {
|
|
1196
|
+
error.cause = pendingRankerFallbackError;
|
|
1197
|
+
pendingRankerFallbackError = void 0;
|
|
1198
|
+
}
|
|
782
1199
|
this.options.onRetrievalFallback?.(error);
|
|
783
1200
|
}
|
|
784
1201
|
}
|
|
@@ -789,25 +1206,35 @@ var WikiMemory = class {
|
|
|
789
1206
|
});
|
|
790
1207
|
const topIds = results.slice(0, maxResults).map((r) => r.id);
|
|
791
1208
|
if (topIds.length > 0) {
|
|
792
|
-
const
|
|
793
|
-
const
|
|
794
|
-
|
|
795
|
-
topIds
|
|
796
|
-
|
|
797
|
-
|
|
1209
|
+
const kwRows = [];
|
|
1210
|
+
const kwChunkSize = 500;
|
|
1211
|
+
for (let i = 0; i < topIds.length; i += kwChunkSize) {
|
|
1212
|
+
const idChunk = topIds.slice(i, i + kwChunkSize);
|
|
1213
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1214
|
+
const chunkRows = await this.db.getAllAsync(
|
|
1215
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1216
|
+
[...idChunk, entityId]
|
|
1217
|
+
);
|
|
1218
|
+
kwRows.push(...chunkRows);
|
|
1219
|
+
}
|
|
1220
|
+
const byId = new Map(kwRows.map((r) => [r.id, r]));
|
|
798
1221
|
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
799
1222
|
}
|
|
800
1223
|
}
|
|
801
1224
|
if (facts.length > 0) {
|
|
802
1225
|
const ids = facts.map((f) => f.id);
|
|
803
|
-
const placeholders = ids.map(() => "?").join(",");
|
|
804
1226
|
const now = Date.now();
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
1227
|
+
const accessChunkSize = 500;
|
|
1228
|
+
for (let i = 0; i < ids.length; i += accessChunkSize) {
|
|
1229
|
+
const idChunk = ids.slice(i, i + accessChunkSize);
|
|
1230
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1231
|
+
await this.db.runAsync(
|
|
1232
|
+
`UPDATE ${this.prefix}entries
|
|
1233
|
+
SET access_count = access_count + 1, last_accessed_at = ?
|
|
1234
|
+
WHERE id IN (${placeholders})`,
|
|
1235
|
+
[now, ...idChunk]
|
|
1236
|
+
);
|
|
1237
|
+
}
|
|
811
1238
|
}
|
|
812
1239
|
} else {
|
|
813
1240
|
facts = await this.db.getAllAsync(
|
|
@@ -834,7 +1261,7 @@ var WikiMemory = class {
|
|
|
834
1261
|
)
|
|
835
1262
|
]);
|
|
836
1263
|
const parsedFacts = facts.map((f) => {
|
|
837
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1264
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
838
1265
|
return {
|
|
839
1266
|
...rest,
|
|
840
1267
|
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
@@ -842,6 +1269,113 @@ var WikiMemory = class {
|
|
|
842
1269
|
});
|
|
843
1270
|
return { facts: parsedFacts, tasks, events: events.reverse() };
|
|
844
1271
|
}
|
|
1272
|
+
/**
|
|
1273
|
+
* Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
|
|
1274
|
+
*/
|
|
1275
|
+
_tieBreakSort(items) {
|
|
1276
|
+
items.sort((a, b) => this._compareScoredRows(a, b));
|
|
1277
|
+
}
|
|
1278
|
+
/**
|
|
1279
|
+
* Comparator for score + deterministic tie-break fields.
|
|
1280
|
+
* Negative return means "a ranks ahead of b" for descending score order.
|
|
1281
|
+
*/
|
|
1282
|
+
_compareScoredRows(a, b) {
|
|
1283
|
+
const scoreDiff = b.score - a.score;
|
|
1284
|
+
if (scoreDiff !== 0) return scoreDiff;
|
|
1285
|
+
const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
|
|
1286
|
+
if (accessCountDiff !== 0) return accessCountDiff;
|
|
1287
|
+
const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
|
|
1288
|
+
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
1289
|
+
return a.id.localeCompare(b.id);
|
|
1290
|
+
}
|
|
1291
|
+
/**
|
|
1292
|
+
* Score candidate rows using in-process JS cosine similarity.
|
|
1293
|
+
* Applies hybrid blending (if weight set) and tie-break sorting before returning.
|
|
1294
|
+
*/
|
|
1295
|
+
async _rankWithJsCosine(args) {
|
|
1296
|
+
const { entityId, queryVec, candidateRows, weight, miniSearchScores, populateCache, limit } = args;
|
|
1297
|
+
let entityCache = this.vectorCache.get(entityId);
|
|
1298
|
+
const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
1299
|
+
if (tooLarge && entityCache) {
|
|
1300
|
+
this.vectorCache.delete(entityId);
|
|
1301
|
+
entityCache = void 0;
|
|
1302
|
+
}
|
|
1303
|
+
const canCache = populateCache && !tooLarge;
|
|
1304
|
+
if (canCache && !entityCache) {
|
|
1305
|
+
entityCache = /* @__PURE__ */ new Map();
|
|
1306
|
+
}
|
|
1307
|
+
const scored = candidateRows.map((row) => {
|
|
1308
|
+
let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
|
|
1309
|
+
if (vector && canCache && entityCache && !entityCache.has(row.id)) {
|
|
1310
|
+
entityCache.set(row.id, vector);
|
|
1311
|
+
}
|
|
1312
|
+
let score = 0;
|
|
1313
|
+
if (vector && vector.length === queryVec.length) {
|
|
1314
|
+
const cosSim = cosineSimilarity(queryVec, vector);
|
|
1315
|
+
if (weight !== void 0) {
|
|
1316
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1317
|
+
score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
|
|
1318
|
+
} else {
|
|
1319
|
+
score = cosSim;
|
|
1320
|
+
}
|
|
1321
|
+
} else if (weight !== void 0 && weight < 1) {
|
|
1322
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1323
|
+
score = (1 - weight) * kwScore;
|
|
1324
|
+
} else {
|
|
1325
|
+
score = -2;
|
|
1326
|
+
}
|
|
1327
|
+
return { id: row.id, score, updated_at: row.updated_at, access_count: row.access_count };
|
|
1328
|
+
});
|
|
1329
|
+
if (canCache && entityCache && entityCache.size > 0) {
|
|
1330
|
+
if (!this.vectorCache.has(entityId)) {
|
|
1331
|
+
if (this.vectorCache.size >= _WikiMemory.MAX_VECTOR_CACHE_ENTITIES) {
|
|
1332
|
+
const oldestKey = this.vectorCache.keys().next().value;
|
|
1333
|
+
if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
|
|
1334
|
+
}
|
|
1335
|
+
this.vectorCache.set(entityId, entityCache);
|
|
1336
|
+
}
|
|
1337
|
+
}
|
|
1338
|
+
this._tieBreakSort(scored);
|
|
1339
|
+
return scored.slice(0, limit);
|
|
1340
|
+
}
|
|
1341
|
+
/**
|
|
1342
|
+
* Delegate semantic ranking to the injected VectorRanker.
|
|
1343
|
+
* Caller should pass an oversampledLimit to preserve recall after re-ranking.
|
|
1344
|
+
* Returns scored results ready for hybrid blending and tie-break sorting.
|
|
1345
|
+
*/
|
|
1346
|
+
async _rankWithVectorRanker(args) {
|
|
1347
|
+
const { entityId, queryVec, candidateIds, weight, miniSearchScores, limit } = args;
|
|
1348
|
+
const ranker = this.options.vectorRanker;
|
|
1349
|
+
if (!ranker) {
|
|
1350
|
+
throw new Error("vectorRanker not configured");
|
|
1351
|
+
}
|
|
1352
|
+
const rankerResults = await ranker.rankBySimilarity({
|
|
1353
|
+
entityId,
|
|
1354
|
+
queryVec,
|
|
1355
|
+
candidateIds,
|
|
1356
|
+
limit
|
|
1357
|
+
});
|
|
1358
|
+
const allowedIds = candidateIds ? new Set(candidateIds) : void 0;
|
|
1359
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1360
|
+
const normalized = [];
|
|
1361
|
+
for (const r of rankerResults) {
|
|
1362
|
+
if (normalized.length >= limit) break;
|
|
1363
|
+
if (seen.has(r.id)) continue;
|
|
1364
|
+
if (allowedIds && !allowedIds.has(r.id)) continue;
|
|
1365
|
+
if (!Number.isFinite(r.semanticScore)) continue;
|
|
1366
|
+
seen.add(r.id);
|
|
1367
|
+
normalized.push(r);
|
|
1368
|
+
}
|
|
1369
|
+
const scored = normalized.map((r) => {
|
|
1370
|
+
let score = r.semanticScore;
|
|
1371
|
+
if (weight !== void 0) {
|
|
1372
|
+
const kwScore = miniSearchScores?.get(r.id) ?? 0;
|
|
1373
|
+
score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
|
|
1374
|
+
}
|
|
1375
|
+
return { id: r.id, score };
|
|
1376
|
+
});
|
|
1377
|
+
return scored;
|
|
1378
|
+
}
|
|
845
1379
|
async getMemoryBundle(entityId) {
|
|
846
1380
|
return this._getFullBundle(entityId, { maxEvents: 10 });
|
|
847
1381
|
}
|
|
@@ -866,7 +1400,7 @@ var WikiMemory = class {
|
|
|
866
1400
|
if (memoryCheckpoint > count) memoryCheckpoint = 0;
|
|
867
1401
|
if (count - memoryCheckpoint >= threshold) {
|
|
868
1402
|
const jobKey = this._librarianKey(entityId);
|
|
869
|
-
if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1403
|
+
if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId)) && !this._isReembedActive(entityId) && !this._isImportActiveFor(entityId) && !this._isForgetActiveFor(entityId)) {
|
|
870
1404
|
this.activeMaintenanceJobs.add(jobKey);
|
|
871
1405
|
this.runLibrarianThenMaybeHeal(entityId, count).catch(console.error).finally(() => this.activeMaintenanceJobs.delete(jobKey));
|
|
872
1406
|
}
|
|
@@ -914,7 +1448,7 @@ var WikiMemory = class {
|
|
|
914
1448
|
LIMIT 100
|
|
915
1449
|
`, [entityId]);
|
|
916
1450
|
const currentFacts = currentFactsRows.map((f) => {
|
|
917
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1451
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
918
1452
|
return {
|
|
919
1453
|
...rest,
|
|
920
1454
|
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
@@ -958,7 +1492,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
958
1492
|
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
|
|
959
1493
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
960
1494
|
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
|
|
961
|
-
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1495
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
962
1496
|
}
|
|
963
1497
|
for (const task of validTasks) {
|
|
964
1498
|
const id = generateId("task_");
|
|
@@ -968,10 +1502,12 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
968
1502
|
`, [id, entityId, task.description, "pending", task.priority, now, now]);
|
|
969
1503
|
}
|
|
970
1504
|
});
|
|
1505
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1506
|
+
this.vectorCache.delete(entityId);
|
|
971
1507
|
for (const fact of insertedFacts) {
|
|
972
1508
|
await this.embedFact(fact);
|
|
973
1509
|
}
|
|
974
|
-
|
|
1510
|
+
this.vectorCache.delete(entityId);
|
|
975
1511
|
}
|
|
976
1512
|
async _doRunHeal(entityId) {
|
|
977
1513
|
const now = Date.now();
|
|
@@ -1009,7 +1545,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
1009
1545
|
const documentAnchors = allFactsRows.filter((f) => f.source_type === "user_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
|
|
1010
1546
|
const userPrompt = `Heal Candidates:
|
|
1011
1547
|
${JSON.stringify(healCandidates.map((f) => {
|
|
1012
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1548
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
1013
1549
|
return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
|
|
1014
1550
|
}), null, 2)}
|
|
1015
1551
|
|
|
@@ -1036,6 +1572,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1036
1572
|
const safeDeleted = deleted.filter((id) => mutableIds.has(id));
|
|
1037
1573
|
const validNewFacts = newFacts.map(validateFact).filter((f) => f !== null);
|
|
1038
1574
|
const insertedFacts = [];
|
|
1575
|
+
const uniqueDeletedFactIds = Array.from(new Set(safeDeleted));
|
|
1039
1576
|
await this.db.withTransactionAsync(async () => {
|
|
1040
1577
|
for (const id of safeDowngraded) {
|
|
1041
1578
|
await this.db.runAsync(`UPDATE ${this.prefix}entries SET confidence = 'tentative', updated_at = ? WHERE id = ? AND entity_id = ?`, [now, id, entityId]);
|
|
@@ -1049,13 +1586,22 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1049
1586
|
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
|
|
1050
1587
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1051
1588
|
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
|
|
1052
|
-
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1589
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1053
1590
|
}
|
|
1054
1591
|
});
|
|
1592
|
+
this.vectorCache.delete(entityId);
|
|
1593
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1594
|
+
for (const factId of uniqueDeletedFactIds) {
|
|
1595
|
+
try {
|
|
1596
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
1597
|
+
} catch (hookErr) {
|
|
1598
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal for ${factId}:`, hookErr);
|
|
1599
|
+
}
|
|
1600
|
+
}
|
|
1055
1601
|
for (const fact of insertedFacts) {
|
|
1056
1602
|
await this.embedFact(fact);
|
|
1057
1603
|
}
|
|
1058
|
-
|
|
1604
|
+
this.vectorCache.delete(entityId);
|
|
1059
1605
|
}
|
|
1060
1606
|
async runLibrarian(entityId) {
|
|
1061
1607
|
const jobKey = this._librarianKey(entityId);
|
|
@@ -1068,6 +1614,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1068
1614
|
if (this._isReembedActive(entityId)) {
|
|
1069
1615
|
throw new WikiBusyError("reembed", entityId);
|
|
1070
1616
|
}
|
|
1617
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1618
|
+
throw new WikiBusyError("import", entityId);
|
|
1619
|
+
}
|
|
1620
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1621
|
+
throw new WikiBusyError("forget", entityId);
|
|
1622
|
+
}
|
|
1071
1623
|
this.activeMaintenanceJobs.add(jobKey);
|
|
1072
1624
|
try {
|
|
1073
1625
|
await this._doRunLibrarian(entityId);
|
|
@@ -1086,6 +1638,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1086
1638
|
if (this._isReembedActive(entityId)) {
|
|
1087
1639
|
throw new WikiBusyError("reembed", entityId);
|
|
1088
1640
|
}
|
|
1641
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1642
|
+
throw new WikiBusyError("import", entityId);
|
|
1643
|
+
}
|
|
1644
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1645
|
+
throw new WikiBusyError("forget", entityId);
|
|
1646
|
+
}
|
|
1089
1647
|
this.activeMaintenanceJobs.add(jobKey);
|
|
1090
1648
|
try {
|
|
1091
1649
|
await this._doRunHeal(entityId);
|
|
@@ -1093,9 +1651,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1093
1651
|
this.activeMaintenanceJobs.delete(jobKey);
|
|
1094
1652
|
}
|
|
1095
1653
|
}
|
|
1096
|
-
async runReembed(entityId) {
|
|
1654
|
+
async runReembed(entityId, opts) {
|
|
1097
1655
|
const embedFn = this.options.llmProvider.embed;
|
|
1098
|
-
if (!embedFn) return { embedded: 0, skipped: 0 };
|
|
1656
|
+
if (!embedFn) return { embedded: 0, skipped: 0, failed: 0 };
|
|
1099
1657
|
const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
|
|
1100
1658
|
if (this.activeMaintenanceJobs.has(reembedKey)) {
|
|
1101
1659
|
throw new WikiBusyError("reembed", entityId ?? "*");
|
|
@@ -1116,6 +1674,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1116
1674
|
if (this._isIngestActiveFor(entityId)) {
|
|
1117
1675
|
throw new WikiBusyError("ingest", entityId);
|
|
1118
1676
|
}
|
|
1677
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1678
|
+
throw new WikiBusyError("import", entityId);
|
|
1679
|
+
}
|
|
1680
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1681
|
+
throw new WikiBusyError("forget", entityId);
|
|
1682
|
+
}
|
|
1119
1683
|
} else {
|
|
1120
1684
|
if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
|
|
1121
1685
|
throw new WikiBusyError("reembed", "*");
|
|
@@ -1132,6 +1696,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1132
1696
|
if (this.activeIngestJobs.size > 0) {
|
|
1133
1697
|
throw new WikiBusyError("ingest", "*");
|
|
1134
1698
|
}
|
|
1699
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":import")) {
|
|
1700
|
+
throw new WikiBusyError("import", "*");
|
|
1701
|
+
}
|
|
1702
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":forget")) {
|
|
1703
|
+
throw new WikiBusyError("forget", "*");
|
|
1704
|
+
}
|
|
1135
1705
|
}
|
|
1136
1706
|
this.activeMaintenanceJobs.add(reembedKey);
|
|
1137
1707
|
try {
|
|
@@ -1141,17 +1711,64 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1141
1711
|
`SELECT * FROM ${this.prefix}entries WHERE ${where}`,
|
|
1142
1712
|
params
|
|
1143
1713
|
);
|
|
1714
|
+
if (entityId) {
|
|
1715
|
+
this.vectorCache.delete(entityId);
|
|
1716
|
+
} else {
|
|
1717
|
+
this.vectorCache.clear();
|
|
1718
|
+
}
|
|
1719
|
+
const skipExisting = opts?.skipExisting ?? false;
|
|
1720
|
+
let effectiveSkip = skipExisting;
|
|
1721
|
+
if (skipExisting) {
|
|
1722
|
+
const mismatchRow = await this.db.getFirstAsync(
|
|
1723
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
1724
|
+
);
|
|
1725
|
+
if (mismatchRow) {
|
|
1726
|
+
if (entityId) {
|
|
1727
|
+
const mismatchDim = parseInt(mismatchRow.value, 10);
|
|
1728
|
+
const staleForEntity = await this.db.getFirstAsync(
|
|
1729
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
1730
|
+
WHERE entity_id = ? AND deleted_at IS NULL
|
|
1731
|
+
AND (
|
|
1732
|
+
embedding_blob IS NULL
|
|
1733
|
+
OR (CAST(length(embedding_blob) AS INTEGER) / 4) != ?
|
|
1734
|
+
)`,
|
|
1735
|
+
[entityId, mismatchDim]
|
|
1736
|
+
);
|
|
1737
|
+
if (staleForEntity && staleForEntity.cnt > 0) effectiveSkip = false;
|
|
1738
|
+
} else {
|
|
1739
|
+
effectiveSkip = false;
|
|
1740
|
+
}
|
|
1741
|
+
}
|
|
1742
|
+
}
|
|
1144
1743
|
let embedded = 0;
|
|
1145
1744
|
let skipped = 0;
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1745
|
+
let failed = 0;
|
|
1746
|
+
try {
|
|
1747
|
+
for (const row of rows) {
|
|
1748
|
+
const existingBlob = row.embedding_blob;
|
|
1749
|
+
const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
|
|
1750
|
+
if (effectiveSkip && blobIsValid) {
|
|
1751
|
+
const vec = parseEmbedding(existingBlob, null);
|
|
1752
|
+
if (vec !== null && vec.every((v) => Number.isFinite(v))) {
|
|
1753
|
+
skipped++;
|
|
1754
|
+
continue;
|
|
1755
|
+
}
|
|
1756
|
+
}
|
|
1757
|
+
const success = await this.embedFact(row);
|
|
1758
|
+
if (success) embedded++;
|
|
1759
|
+
else failed++;
|
|
1760
|
+
}
|
|
1761
|
+
if (embedded > 0) {
|
|
1762
|
+
await this._reconcileEmbeddingDimension();
|
|
1763
|
+
}
|
|
1764
|
+
} finally {
|
|
1765
|
+
if (entityId) {
|
|
1766
|
+
this.vectorCache.delete(entityId);
|
|
1767
|
+
} else {
|
|
1768
|
+
this.vectorCache.clear();
|
|
1769
|
+
}
|
|
1153
1770
|
}
|
|
1154
|
-
return { embedded, skipped };
|
|
1771
|
+
return { embedded, skipped, failed };
|
|
1155
1772
|
} finally {
|
|
1156
1773
|
this.activeMaintenanceJobs.delete(reembedKey);
|
|
1157
1774
|
}
|
|
@@ -1171,6 +1788,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1171
1788
|
heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
|
|
1172
1789
|
};
|
|
1173
1790
|
}
|
|
1791
|
+
clearVectorCache() {
|
|
1792
|
+
this.vectorCache.clear();
|
|
1793
|
+
}
|
|
1174
1794
|
async _getFullBundle(entityId, opts) {
|
|
1175
1795
|
const maxEvents = opts?.maxEvents;
|
|
1176
1796
|
const eventsQuery = maxEvents != null ? `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at ASC`;
|
|
@@ -1187,10 +1807,16 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1187
1807
|
this.db.getAllAsync(eventsQuery, eventsParams)
|
|
1188
1808
|
]);
|
|
1189
1809
|
const facts = factsRaw.map((f) => {
|
|
1190
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1810
|
+
const { embedding: _embedding, embedding_blob, ...rest } = f;
|
|
1811
|
+
const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
|
|
1812
|
+
const c = new ArrayBuffer(embedding_blob.byteLength);
|
|
1813
|
+
new Uint8Array(c).set(embedding_blob);
|
|
1814
|
+
return new Uint8Array(c);
|
|
1815
|
+
})() : void 0;
|
|
1816
|
+
const factBase = safeBlobCopy ? { ...rest, embedding_blob: safeBlobCopy } : rest;
|
|
1191
1817
|
return {
|
|
1192
|
-
...
|
|
1193
|
-
tags: typeof
|
|
1818
|
+
...factBase,
|
|
1819
|
+
tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
|
|
1194
1820
|
};
|
|
1195
1821
|
});
|
|
1196
1822
|
const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
|
|
@@ -1217,7 +1843,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1217
1843
|
for (let i = 0; i < ids.length; i += BATCH) {
|
|
1218
1844
|
const batch = ids.slice(i, i + BATCH);
|
|
1219
1845
|
const batchResults = await Promise.all(
|
|
1220
|
-
batch.map(async (id) => [id, await this._getFullBundle(id)])
|
|
1846
|
+
batch.map(async (id) => [id, await this._getFullBundle(id, { includeBlobs: true })])
|
|
1221
1847
|
);
|
|
1222
1848
|
for (const [id, bundle] of batchResults) {
|
|
1223
1849
|
entities[id] = bundle;
|
|
@@ -1227,172 +1853,406 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1227
1853
|
}
|
|
1228
1854
|
async importDump(dump, opts) {
|
|
1229
1855
|
const merge = opts?.merge ?? false;
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1856
|
+
const entityIds = Object.keys(dump.entities);
|
|
1857
|
+
for (const entityId of entityIds) {
|
|
1858
|
+
if (this.activeMaintenanceJobs.has(this._importKey(entityId))) {
|
|
1859
|
+
throw new WikiBusyError("import", entityId);
|
|
1860
|
+
}
|
|
1861
|
+
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
1862
|
+
throw new WikiBusyError("librarian", entityId);
|
|
1863
|
+
}
|
|
1864
|
+
if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
1865
|
+
throw new WikiBusyError("heal", entityId);
|
|
1866
|
+
}
|
|
1867
|
+
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1868
|
+
throw new WikiBusyError("prune", entityId);
|
|
1869
|
+
}
|
|
1870
|
+
if (this._isReembedActive(entityId)) {
|
|
1871
|
+
throw new WikiBusyError("reembed", entityId);
|
|
1872
|
+
}
|
|
1873
|
+
if (this._isIngestActiveFor(entityId)) {
|
|
1874
|
+
throw new WikiBusyError("ingest", entityId);
|
|
1875
|
+
}
|
|
1876
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1877
|
+
throw new WikiBusyError("forget", entityId);
|
|
1878
|
+
}
|
|
1879
|
+
}
|
|
1880
|
+
if (this.activeMaintenanceJobs.has(this._globalImportKey())) {
|
|
1881
|
+
throw new WikiBusyError("import", "*");
|
|
1882
|
+
}
|
|
1883
|
+
this.activeMaintenanceJobs.add(this._globalImportKey());
|
|
1884
|
+
for (const entityId of entityIds) {
|
|
1885
|
+
this.activeMaintenanceJobs.add(this._importKey(entityId));
|
|
1886
|
+
}
|
|
1887
|
+
try {
|
|
1888
|
+
for (const [entityId, bundle] of Object.entries(dump.entities)) {
|
|
1889
|
+
await this._doImportEntity(entityId, bundle, merge);
|
|
1890
|
+
}
|
|
1891
|
+
} finally {
|
|
1892
|
+
this.activeMaintenanceJobs.delete(this._globalImportKey());
|
|
1893
|
+
for (const entityId of entityIds) {
|
|
1894
|
+
this.activeMaintenanceJobs.delete(this._importKey(entityId));
|
|
1895
|
+
}
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
async _doImportEntity(entityId, bundle, merge) {
|
|
1899
|
+
const upsertedFactIds = /* @__PURE__ */ new Set();
|
|
1900
|
+
const upsertedDeletedFactIds = /* @__PURE__ */ new Set();
|
|
1901
|
+
const factsWithPreservedBlob = /* @__PURE__ */ new Map();
|
|
1902
|
+
const preservedBlobDims = /* @__PURE__ */ new Set();
|
|
1903
|
+
const softDeletedFactIds = [];
|
|
1904
|
+
await this.db.withTransactionAsync(async () => {
|
|
1905
|
+
if (!merge) {
|
|
1906
|
+
const toDelete = await this.db.getAllAsync(
|
|
1907
|
+
`SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1908
|
+
[entityId]
|
|
1909
|
+
);
|
|
1910
|
+
softDeletedFactIds.push(...toDelete.map((r) => r.id));
|
|
1911
|
+
const now = Date.now();
|
|
1912
|
+
await this.db.runAsync(
|
|
1913
|
+
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1914
|
+
[now, now, entityId]
|
|
1915
|
+
);
|
|
1916
|
+
await this.db.runAsync(
|
|
1917
|
+
`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1918
|
+
[now, now, entityId]
|
|
1919
|
+
);
|
|
1920
|
+
await this.db.runAsync(
|
|
1921
|
+
`DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
|
|
1922
|
+
[entityId]
|
|
1923
|
+
);
|
|
1924
|
+
}
|
|
1925
|
+
const factIds = bundle.facts.map((fact) => fact.id);
|
|
1926
|
+
const existingFactsById = /* @__PURE__ */ new Map();
|
|
1927
|
+
const factLookupChunkSize = 500;
|
|
1928
|
+
for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
|
|
1929
|
+
const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
|
|
1930
|
+
if (factIdChunk.length === 0) continue;
|
|
1931
|
+
const placeholders = factIdChunk.map(() => "?").join(", ");
|
|
1932
|
+
const existingFacts = await this.db.getAllAsync(
|
|
1933
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
|
|
1934
|
+
factIdChunk
|
|
1935
|
+
);
|
|
1936
|
+
for (const existingFact of existingFacts) {
|
|
1937
|
+
existingFactsById.set(existingFact.id, existingFact);
|
|
1246
1938
|
}
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
const
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1939
|
+
}
|
|
1940
|
+
for (const fact of bundle.facts) {
|
|
1941
|
+
const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
|
|
1942
|
+
const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
|
|
1943
|
+
const existing = existingFactsById.get(fact.id);
|
|
1944
|
+
const rawBlobRaw = fact.embedding_blob;
|
|
1945
|
+
let rawBlob = null;
|
|
1946
|
+
if (rawBlobRaw instanceof Uint8Array) {
|
|
1947
|
+
rawBlob = rawBlobRaw;
|
|
1948
|
+
} else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
|
|
1949
|
+
const obj = rawBlobRaw;
|
|
1950
|
+
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
1951
|
+
rawBlob = new Uint8Array(obj["data"]);
|
|
1952
|
+
} else if (!Array.isArray(rawBlobRaw)) {
|
|
1953
|
+
const entries = Object.keys(obj);
|
|
1954
|
+
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
1955
|
+
const len = entries.length;
|
|
1956
|
+
rawBlob = new Uint8Array(len);
|
|
1957
|
+
for (let i = 0; i < len; i++) rawBlob[i] = obj[String(i)] ?? 0;
|
|
1958
|
+
}
|
|
1260
1959
|
}
|
|
1261
1960
|
}
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
const
|
|
1265
|
-
const
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1961
|
+
let blobData = null;
|
|
1962
|
+
if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
|
|
1963
|
+
const copy = new ArrayBuffer(rawBlob.byteLength);
|
|
1964
|
+
const alignedBlob = new Uint8Array(copy);
|
|
1965
|
+
alignedBlob.set(rawBlob);
|
|
1966
|
+
const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
|
|
1967
|
+
let allFinite = true;
|
|
1968
|
+
for (let i = 0; i < floats.length; i++) {
|
|
1969
|
+
if (!isFinite(floats[i])) {
|
|
1970
|
+
allFinite = false;
|
|
1971
|
+
break;
|
|
1273
1972
|
}
|
|
1973
|
+
}
|
|
1974
|
+
if (allFinite) {
|
|
1975
|
+
blobData = alignedBlob;
|
|
1976
|
+
}
|
|
1977
|
+
}
|
|
1978
|
+
if (existing) {
|
|
1979
|
+
if (existing.entity_id !== entityId) {
|
|
1980
|
+
this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
|
|
1981
|
+
continue;
|
|
1982
|
+
}
|
|
1983
|
+
if (merge) {
|
|
1984
|
+
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
1985
|
+
}
|
|
1986
|
+
if (blobData != null) {
|
|
1987
|
+
await this.db.runAsync(
|
|
1988
|
+
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
1989
|
+
[entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData, fact.id]
|
|
1990
|
+
);
|
|
1991
|
+
factsWithPreservedBlob.set(fact.id, blobData);
|
|
1992
|
+
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
1993
|
+
} else {
|
|
1274
1994
|
await this.db.runAsync(
|
|
1275
|
-
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at =
|
|
1995
|
+
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = NULL, embedding = NULL WHERE id = ?`,
|
|
1276
1996
|
[entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, fact.id]
|
|
1277
1997
|
);
|
|
1278
|
-
|
|
1998
|
+
}
|
|
1999
|
+
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
2000
|
+
upsertedFactIds.add(fact.id);
|
|
2001
|
+
if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
|
|
2002
|
+
} else {
|
|
2003
|
+
if (blobData != null) {
|
|
2004
|
+
await this.db.runAsync(
|
|
2005
|
+
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at, embedding_blob) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
2006
|
+
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData]
|
|
2007
|
+
);
|
|
2008
|
+
factsWithPreservedBlob.set(fact.id, blobData);
|
|
2009
|
+
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
1279
2010
|
} else {
|
|
1280
2011
|
await this.db.runAsync(
|
|
1281
2012
|
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1282
2013
|
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at]
|
|
1283
2014
|
);
|
|
1284
|
-
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1285
2015
|
}
|
|
2016
|
+
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
2017
|
+
upsertedFactIds.add(fact.id);
|
|
2018
|
+
if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
|
|
1286
2019
|
}
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
2020
|
+
}
|
|
2021
|
+
const taskIds = bundle.tasks.map((task) => task.id);
|
|
2022
|
+
const existingTasksById = /* @__PURE__ */ new Map();
|
|
2023
|
+
const taskLookupChunkSize = 500;
|
|
2024
|
+
for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
|
|
2025
|
+
const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
|
|
2026
|
+
if (taskIdChunk.length === 0) continue;
|
|
2027
|
+
const placeholders = taskIdChunk.map(() => "?").join(", ");
|
|
2028
|
+
const existingTasks = await this.db.getAllAsync(
|
|
2029
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
|
|
2030
|
+
taskIdChunk
|
|
2031
|
+
);
|
|
2032
|
+
for (const existingTask of existingTasks) {
|
|
2033
|
+
existingTasksById.set(existingTask.id, existingTask);
|
|
2034
|
+
}
|
|
2035
|
+
}
|
|
2036
|
+
for (const task of bundle.tasks) {
|
|
2037
|
+
const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
|
|
2038
|
+
const existing = existingTasksById.get(task.id);
|
|
2039
|
+
if (existing) {
|
|
2040
|
+
if (existing.entity_id !== entityId) {
|
|
2041
|
+
this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
|
|
2042
|
+
continue;
|
|
2043
|
+
}
|
|
2044
|
+
if (merge) {
|
|
2045
|
+
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
1300
2046
|
}
|
|
2047
|
+
await this.db.runAsync(
|
|
2048
|
+
`UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
|
|
2049
|
+
[entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
|
|
2050
|
+
);
|
|
2051
|
+
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
2052
|
+
} else {
|
|
2053
|
+
await this.db.runAsync(
|
|
2054
|
+
`INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
2055
|
+
[task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
|
|
2056
|
+
);
|
|
2057
|
+
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1301
2058
|
}
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
2059
|
+
}
|
|
2060
|
+
for (const event of bundle.events) {
|
|
2061
|
+
await this.db.runAsync(
|
|
2062
|
+
`INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
|
|
2063
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
2064
|
+
[event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
|
|
2065
|
+
);
|
|
2066
|
+
}
|
|
2067
|
+
});
|
|
2068
|
+
this.vectorCache.delete(entityId);
|
|
2069
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
2070
|
+
for (const fact of bundle.facts) {
|
|
2071
|
+
if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
|
|
2072
|
+
await this.embedFact({
|
|
2073
|
+
id: fact.id,
|
|
2074
|
+
entity_id: entityId,
|
|
2075
|
+
// Use authoritative entityId from dump key, not fact.entity_id
|
|
2076
|
+
title: fact.title,
|
|
2077
|
+
body: fact.body,
|
|
2078
|
+
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
2079
|
+
});
|
|
2080
|
+
}
|
|
2081
|
+
}
|
|
2082
|
+
for (const fact of bundle.facts) {
|
|
2083
|
+
const blobData = factsWithPreservedBlob.get(fact.id);
|
|
2084
|
+
if (blobData && !fact.deleted_at && upsertedFactIds.has(fact.id)) {
|
|
2085
|
+
try {
|
|
2086
|
+
const float32Vector = new Float32Array(blobData.buffer, blobData.byteOffset, blobData.byteLength / 4);
|
|
2087
|
+
await this._notifyEmbeddingPersisted(entityId, fact.id, float32Vector);
|
|
2088
|
+
} catch (hookErr) {
|
|
2089
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for preserved-blob fact ${fact.id}:`, hookErr);
|
|
2090
|
+
}
|
|
2091
|
+
}
|
|
2092
|
+
}
|
|
2093
|
+
for (const factId of softDeletedFactIds) {
|
|
2094
|
+
if (!upsertedFactIds.has(factId) || upsertedDeletedFactIds.has(factId)) {
|
|
2095
|
+
try {
|
|
2096
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
2097
|
+
} catch (hookErr) {
|
|
2098
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted(vector=null) hook failed for soft-deleted fact ${factId}:`, hookErr);
|
|
2099
|
+
}
|
|
2100
|
+
}
|
|
2101
|
+
}
|
|
2102
|
+
try {
|
|
2103
|
+
const canonicalRow = await this.db.getFirstAsync(
|
|
2104
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
|
|
2105
|
+
);
|
|
2106
|
+
const canonicalDim = canonicalRow ? parseInt(canonicalRow.value, 10) : null;
|
|
2107
|
+
if (preservedBlobDims.size === 1) {
|
|
2108
|
+
const preservedDim = [...preservedBlobDims][0];
|
|
2109
|
+
if (canonicalDim === null || canonicalDim === preservedDim) {
|
|
2110
|
+
await this.storeEmbeddingDimension(preservedDim);
|
|
2111
|
+
const staleMismatch = await this.db.getFirstAsync(
|
|
2112
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
2113
|
+
);
|
|
2114
|
+
if (staleMismatch && parseInt(staleMismatch.value, 10) !== preservedDim) {
|
|
1319
2115
|
await this.db.runAsync(
|
|
1320
|
-
`INSERT INTO ${this.prefix}
|
|
1321
|
-
[
|
|
2116
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
2117
|
+
[String(preservedDim)]
|
|
1322
2118
|
);
|
|
1323
|
-
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1324
2119
|
}
|
|
1325
|
-
|
|
1326
|
-
|
|
2120
|
+
await this._reconcileEmbeddingDimension();
|
|
2121
|
+
} else {
|
|
1327
2122
|
await this.db.runAsync(
|
|
1328
|
-
`INSERT OR
|
|
1329
|
-
|
|
1330
|
-
[event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
|
|
2123
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
2124
|
+
[String(canonicalDim)]
|
|
1331
2125
|
);
|
|
1332
2126
|
}
|
|
1333
|
-
})
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
await this.
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
2127
|
+
} else if (preservedBlobDims.size > 1) {
|
|
2128
|
+
if (canonicalDim === null) {
|
|
2129
|
+
const sortedPreservedBlobDims = [...preservedBlobDims].sort((a, b) => a - b);
|
|
2130
|
+
await this.storeEmbeddingDimension(sortedPreservedBlobDims[0]);
|
|
2131
|
+
await this.db.runAsync(
|
|
2132
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
2133
|
+
[String(sortedPreservedBlobDims[0])]
|
|
2134
|
+
);
|
|
2135
|
+
} else {
|
|
2136
|
+
await this.db.runAsync(
|
|
2137
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
2138
|
+
[String(canonicalDim)]
|
|
2139
|
+
);
|
|
1342
2140
|
}
|
|
1343
2141
|
}
|
|
2142
|
+
} finally {
|
|
2143
|
+
this.vectorCache.delete(entityId);
|
|
1344
2144
|
}
|
|
1345
|
-
await this.rebuildMiniSearchIndex();
|
|
1346
2145
|
}
|
|
1347
2146
|
async forget(entityId, params) {
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
if (
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
} else {
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
const
|
|
1371
|
-
let
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
2147
|
+
let blockingOperation = null;
|
|
2148
|
+
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
2149
|
+
blockingOperation = "librarian";
|
|
2150
|
+
} else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
2151
|
+
blockingOperation = "heal";
|
|
2152
|
+
} else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
2153
|
+
blockingOperation = "prune";
|
|
2154
|
+
} else if (this._isReembedActive(entityId)) {
|
|
2155
|
+
blockingOperation = "reembed";
|
|
2156
|
+
} else if (this._isIngestActiveFor(entityId)) {
|
|
2157
|
+
blockingOperation = "ingest";
|
|
2158
|
+
} else if (this._isImportActiveFor(entityId)) {
|
|
2159
|
+
blockingOperation = "import";
|
|
2160
|
+
} else if (this._isForgetActiveFor(entityId)) {
|
|
2161
|
+
blockingOperation = "forget";
|
|
2162
|
+
}
|
|
2163
|
+
if (blockingOperation !== null) {
|
|
2164
|
+
throw new WikiBusyError(blockingOperation, entityId);
|
|
2165
|
+
}
|
|
2166
|
+
const forgetKey = this._forgetKey(entityId);
|
|
2167
|
+
this.activeMaintenanceJobs.add(forgetKey);
|
|
2168
|
+
try {
|
|
2169
|
+
const now = Date.now();
|
|
2170
|
+
let deletedEntries = 0;
|
|
2171
|
+
let deletedTasks = 0;
|
|
2172
|
+
const deletedEntryIds = [];
|
|
2173
|
+
if (params.clearAll) {
|
|
2174
|
+
const entriesToDelete = await this.db.getAllAsync(
|
|
2175
|
+
`SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
2176
|
+
[entityId]
|
|
2177
|
+
);
|
|
2178
|
+
deletedEntryIds.push(...entriesToDelete.map((e) => e.id));
|
|
2179
|
+
const [entriesRes, tasksRes] = await Promise.all([
|
|
2180
|
+
this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId]),
|
|
2181
|
+
this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId])
|
|
2182
|
+
]);
|
|
2183
|
+
await this.db.runAsync(`UPDATE ${this.prefix}checkpoints SET memory_checkpoint = 0, heal_checkpoint = 0 WHERE entity_id = ?`, [entityId]);
|
|
2184
|
+
deletedEntries = entriesRes.changes;
|
|
2185
|
+
deletedTasks = tasksRes.changes;
|
|
2186
|
+
} else {
|
|
2187
|
+
const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
|
|
2188
|
+
const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
|
|
2189
|
+
if (hasIdSelectors && hasSourceSelectors) {
|
|
2190
|
+
throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
|
|
1378
2191
|
}
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
2192
|
+
const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
|
|
2193
|
+
if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
|
|
2194
|
+
const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
|
|
2195
|
+
if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
2196
|
+
if (params.entryId) {
|
|
2197
|
+
const entry = await this.db.getFirstAsync(
|
|
2198
|
+
`SELECT id FROM ${this.prefix}entries WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
2199
|
+
[params.entryId, entityId]
|
|
2200
|
+
);
|
|
2201
|
+
if (entry) deletedEntryIds.push(entry.id);
|
|
2202
|
+
}
|
|
2203
|
+
if (sourceRef || sourceHash) {
|
|
2204
|
+
let q = `SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`;
|
|
2205
|
+
const args = [entityId];
|
|
2206
|
+
if (sourceRef) {
|
|
2207
|
+
q += ` AND source_ref = ?`;
|
|
2208
|
+
args.push(sourceRef);
|
|
2209
|
+
}
|
|
2210
|
+
if (sourceHash) {
|
|
2211
|
+
q += ` AND source_hash = ?`;
|
|
2212
|
+
args.push(sourceHash);
|
|
2213
|
+
}
|
|
2214
|
+
const entriesToDelete = await this.db.getAllAsync(q, args);
|
|
2215
|
+
deletedEntryIds.push(...entriesToDelete.map((e) => e.id));
|
|
2216
|
+
}
|
|
2217
|
+
const entryPromise = params.entryId ? this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.entryId, entityId]) : null;
|
|
2218
|
+
const taskPromise = params.taskId ? this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.taskId, entityId]) : null;
|
|
2219
|
+
let refPromise = null;
|
|
2220
|
+
if (sourceRef || sourceHash) {
|
|
2221
|
+
let q = `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`;
|
|
2222
|
+
const args = [now, now, entityId];
|
|
2223
|
+
if (sourceRef) {
|
|
2224
|
+
q += ` AND source_ref = ?`;
|
|
2225
|
+
args.push(sourceRef);
|
|
2226
|
+
}
|
|
2227
|
+
if (sourceHash) {
|
|
2228
|
+
q += ` AND source_hash = ?`;
|
|
2229
|
+
args.push(sourceHash);
|
|
2230
|
+
}
|
|
2231
|
+
refPromise = this.db.runAsync(q, args);
|
|
1382
2232
|
}
|
|
1383
|
-
|
|
2233
|
+
const [entryResult, taskResult, refResult] = await Promise.all([
|
|
2234
|
+
entryPromise ?? Promise.resolve(null),
|
|
2235
|
+
taskPromise ?? Promise.resolve(null),
|
|
2236
|
+
refPromise ?? Promise.resolve(null)
|
|
2237
|
+
]);
|
|
2238
|
+
if (entryResult) deletedEntries += entryResult.changes;
|
|
2239
|
+
if (taskResult) deletedTasks += taskResult.changes;
|
|
2240
|
+
if (refResult) deletedEntries += refResult.changes;
|
|
1384
2241
|
}
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
2242
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
2243
|
+
this.vectorCache.delete(entityId);
|
|
2244
|
+
const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
|
|
2245
|
+
for (const factId of uniqueDeletedIds) {
|
|
2246
|
+
try {
|
|
2247
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
2248
|
+
} catch (hookErr) {
|
|
2249
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during forget for ${factId}:`, hookErr);
|
|
2250
|
+
}
|
|
2251
|
+
}
|
|
2252
|
+
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
2253
|
+
} finally {
|
|
2254
|
+
this.activeMaintenanceJobs.delete(forgetKey);
|
|
1393
2255
|
}
|
|
1394
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
1395
|
-
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
1396
2256
|
}
|
|
1397
2257
|
async ingestDocument(entityId, params) {
|
|
1398
2258
|
const sourceRef = normalizeSourceRef(params.sourceRef);
|
|
@@ -1420,6 +2280,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1420
2280
|
if (this._isReembedActive(entityId)) {
|
|
1421
2281
|
throw new WikiBusyError("reembed", entityId);
|
|
1422
2282
|
}
|
|
2283
|
+
if (this._isImportActiveFor(entityId)) {
|
|
2284
|
+
throw new WikiBusyError("import", entityId);
|
|
2285
|
+
}
|
|
2286
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
2287
|
+
throw new WikiBusyError("forget", entityId);
|
|
2288
|
+
}
|
|
1423
2289
|
this.activeIngestJobs.add(jobKey);
|
|
1424
2290
|
try {
|
|
1425
2291
|
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
@@ -1452,7 +2318,15 @@ ${chunk}`;
|
|
|
1452
2318
|
}
|
|
1453
2319
|
const now = Date.now();
|
|
1454
2320
|
const insertedFacts = [];
|
|
2321
|
+
const deletedSourceFactIds = [];
|
|
1455
2322
|
await this.db.withTransactionAsync(async () => {
|
|
2323
|
+
const existingSourceFacts = await this.db.getAllAsync(
|
|
2324
|
+
`SELECT id FROM ${this.prefix}entries WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
2325
|
+
[sourceRef, entityId]
|
|
2326
|
+
);
|
|
2327
|
+
for (const row of existingSourceFacts) {
|
|
2328
|
+
deletedSourceFactIds.push(row.id);
|
|
2329
|
+
}
|
|
1456
2330
|
await this.db.runAsync(
|
|
1457
2331
|
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
1458
2332
|
[now, now, sourceRef, entityId]
|
|
@@ -1464,19 +2338,42 @@ ${chunk}`;
|
|
|
1464
2338
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1465
2339
|
[id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]
|
|
1466
2340
|
);
|
|
1467
|
-
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2341
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1468
2342
|
}
|
|
1469
2343
|
});
|
|
2344
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
2345
|
+
this.vectorCache.delete(entityId);
|
|
2346
|
+
const uniqueDeletedSourceFactIds = Array.from(new Set(deletedSourceFactIds));
|
|
2347
|
+
for (const factId of uniqueDeletedSourceFactIds) {
|
|
2348
|
+
try {
|
|
2349
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
2350
|
+
} catch (hookErr) {
|
|
2351
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during ingest for ${factId}:`, hookErr);
|
|
2352
|
+
}
|
|
2353
|
+
}
|
|
1470
2354
|
for (const fact of insertedFacts) {
|
|
1471
2355
|
await this.embedFact(fact);
|
|
1472
2356
|
}
|
|
1473
|
-
|
|
2357
|
+
this.vectorCache.delete(entityId);
|
|
1474
2358
|
return { truncated, chunks: chunks.length };
|
|
1475
2359
|
} finally {
|
|
1476
2360
|
this.activeIngestJobs.delete(jobKey);
|
|
1477
2361
|
}
|
|
1478
2362
|
}
|
|
1479
2363
|
};
|
|
2364
|
+
/**
|
|
2365
|
+
* Maximum number of entities whose parsed embedding vectors are held in
|
|
2366
|
+
* memory. This cap is intentionally conservative so the cache remains safe
|
|
2367
|
+
* on memory-constrained runtimes (e.g., mobile/Expo).
|
|
2368
|
+
*/
|
|
2369
|
+
_WikiMemory.MAX_VECTOR_CACHE_ENTITIES = 16;
|
|
2370
|
+
/**
|
|
2371
|
+
* Maximum number of fact vectors cached per entity. Keep this high enough to
|
|
2372
|
+
* preserve the parsed-embedding reuse optimization for common mid-sized
|
|
2373
|
+
* entities while still maintaining a bounded memory footprint.
|
|
2374
|
+
*/
|
|
2375
|
+
_WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
|
|
2376
|
+
var WikiMemory = _WikiMemory;
|
|
1480
2377
|
|
|
1481
2378
|
// src/utils/formatContext.ts
|
|
1482
2379
|
function validateMaxOption(value, name) {
|
|
@@ -1672,8 +2569,23 @@ function formatMemoryDump(dump) {
|
|
|
1672
2569
|
name: formatEntityFileName(entityId),
|
|
1673
2570
|
content: renderEntity(entityId, bundle, dump.generatedAt)
|
|
1674
2571
|
}));
|
|
2572
|
+
const manifestDump = {
|
|
2573
|
+
generatedAt: dump.generatedAt,
|
|
2574
|
+
entities: Object.fromEntries(
|
|
2575
|
+
Object.entries(dump.entities).map(([entityId, bundle]) => [
|
|
2576
|
+
entityId,
|
|
2577
|
+
{
|
|
2578
|
+
...bundle,
|
|
2579
|
+
facts: bundle.facts.map((f) => {
|
|
2580
|
+
const { embedding_blob: _blob, ...rest } = f;
|
|
2581
|
+
return rest;
|
|
2582
|
+
})
|
|
2583
|
+
}
|
|
2584
|
+
])
|
|
2585
|
+
)
|
|
2586
|
+
};
|
|
1675
2587
|
return {
|
|
1676
|
-
manifest: JSON.stringify(
|
|
2588
|
+
manifest: JSON.stringify(manifestDump, null, 2),
|
|
1677
2589
|
files
|
|
1678
2590
|
};
|
|
1679
2591
|
}
|