@equationalapplications/core-llm-wiki 2.6.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +329 -1
- package/dist/index.d.mts +184 -11
- package/dist/index.d.ts +184 -11
- package/dist/index.js +1134 -222
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1134 -222
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -18,7 +18,8 @@ async function setupDatabase(db, prefix) {
|
|
|
18
18
|
last_accessed_at INTEGER,
|
|
19
19
|
access_count INTEGER NOT NULL DEFAULT 0,
|
|
20
20
|
deleted_at INTEGER,
|
|
21
|
-
embedding TEXT
|
|
21
|
+
embedding TEXT,
|
|
22
|
+
embedding_blob BLOB
|
|
22
23
|
);
|
|
23
24
|
|
|
24
25
|
CREATE INDEX IF NOT EXISTS ${prefix}entries_entity_idx ON ${prefix}entries(entity_id);
|
|
@@ -91,6 +92,20 @@ var MIGRATIONS = [
|
|
|
91
92
|
await db.execAsync(`ALTER TABLE ${prefix}entries ADD COLUMN embedding TEXT`);
|
|
92
93
|
}
|
|
93
94
|
}
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
version: 3,
|
|
98
|
+
description: "Add embedding_blob BLOB column for Float32Array vector storage",
|
|
99
|
+
run: async (db, prefix) => {
|
|
100
|
+
const cols = await db.getAllAsync(
|
|
101
|
+
`PRAGMA table_info(${prefix}entries)`
|
|
102
|
+
);
|
|
103
|
+
if (!cols.some((c) => c.name === "embedding_blob")) {
|
|
104
|
+
await db.execAsync(
|
|
105
|
+
`ALTER TABLE ${prefix}entries ADD COLUMN embedding_blob BLOB`
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
94
109
|
}
|
|
95
110
|
];
|
|
96
111
|
for (let i = 1; i < MIGRATIONS.length; i++) {
|
|
@@ -148,6 +163,34 @@ function cosineSimilarity(a, b) {
|
|
|
148
163
|
return denom === 0 ? 0 : dot / denom;
|
|
149
164
|
}
|
|
150
165
|
|
|
166
|
+
// src/utils/embedding.ts
|
|
167
|
+
function parseEmbedding(blob, text) {
|
|
168
|
+
if (blob && blob.byteLength > 0) {
|
|
169
|
+
if (blob.byteLength % 4 !== 0) return null;
|
|
170
|
+
const copy = new ArrayBuffer(blob.byteLength);
|
|
171
|
+
new Uint8Array(copy).set(blob);
|
|
172
|
+
const vector = new Float32Array(copy);
|
|
173
|
+
for (const value of vector) {
|
|
174
|
+
if (!Number.isFinite(value)) return null;
|
|
175
|
+
}
|
|
176
|
+
return vector;
|
|
177
|
+
}
|
|
178
|
+
if (text) {
|
|
179
|
+
try {
|
|
180
|
+
const arr = JSON.parse(text);
|
|
181
|
+
if (!Array.isArray(arr) || !arr.every((v) => typeof v === "number" && isFinite(v))) return null;
|
|
182
|
+
const vector = new Float32Array(arr);
|
|
183
|
+
for (const value of vector) {
|
|
184
|
+
if (!Number.isFinite(value)) return null;
|
|
185
|
+
}
|
|
186
|
+
return vector;
|
|
187
|
+
} catch {
|
|
188
|
+
return null;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return null;
|
|
192
|
+
}
|
|
193
|
+
|
|
151
194
|
// src/WikiMemory.ts
|
|
152
195
|
function parseJsonResponse(text) {
|
|
153
196
|
const firstBrace = text.indexOf("{");
|
|
@@ -352,7 +395,7 @@ function jaccardScore(a, b) {
|
|
|
352
395
|
}
|
|
353
396
|
var FUZZY_THRESHOLD = 0.5;
|
|
354
397
|
var MIN_TOKENS_TO_QUALIFY = 3;
|
|
355
|
-
var
|
|
398
|
+
var _WikiMemory = class _WikiMemory {
|
|
356
399
|
constructor(db, options) {
|
|
357
400
|
this.activeMaintenanceJobs = /* @__PURE__ */ new Set();
|
|
358
401
|
this.activeIngestJobs = /* @__PURE__ */ new Set();
|
|
@@ -366,6 +409,7 @@ var WikiMemory = class {
|
|
|
366
409
|
}
|
|
367
410
|
});
|
|
368
411
|
this.miniSearchEntryIdsByEntity = /* @__PURE__ */ new Map();
|
|
412
|
+
this.vectorCache = /* @__PURE__ */ new Map();
|
|
369
413
|
this.db = db;
|
|
370
414
|
this.options = options;
|
|
371
415
|
this.prefix = options.config?.tablePrefix || "llm_wiki_";
|
|
@@ -432,10 +476,6 @@ var WikiMemory = class {
|
|
|
432
476
|
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
433
477
|
[String(dim)]
|
|
434
478
|
);
|
|
435
|
-
} else {
|
|
436
|
-
await this.db.runAsync(
|
|
437
|
-
`DELETE FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
438
|
-
);
|
|
439
479
|
}
|
|
440
480
|
} else {
|
|
441
481
|
await this.db.runAsync(
|
|
@@ -454,7 +494,18 @@ var WikiMemory = class {
|
|
|
454
494
|
const mismatch = await this.db.getFirstAsync(
|
|
455
495
|
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
456
496
|
);
|
|
457
|
-
if (mismatch)
|
|
497
|
+
if (!mismatch) return;
|
|
498
|
+
const newDim = parseInt(mismatch.value, 10);
|
|
499
|
+
const residual = await this.db.getFirstAsync(
|
|
500
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
501
|
+
WHERE deleted_at IS NULL
|
|
502
|
+
AND (
|
|
503
|
+
(embedding_blob IS NOT NULL AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?)
|
|
504
|
+
OR (embedding_blob IS NULL AND embedding IS NOT NULL)
|
|
505
|
+
)`,
|
|
506
|
+
[newDim]
|
|
507
|
+
);
|
|
508
|
+
if (!residual || residual.cnt === 0) {
|
|
458
509
|
await this.db.runAsync(
|
|
459
510
|
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension', ?)`,
|
|
460
511
|
[mismatch.value]
|
|
@@ -485,11 +536,29 @@ var WikiMemory = class {
|
|
|
485
536
|
console.warn(`[WikiMemory] embedFact: embed() returned an invalid vector for ${fact.id}; skipping.`);
|
|
486
537
|
return false;
|
|
487
538
|
}
|
|
488
|
-
|
|
539
|
+
const float32Vector = new Float32Array(vector);
|
|
540
|
+
let hasNonFinite = false;
|
|
541
|
+
for (let i = 0; i < float32Vector.length; i++) {
|
|
542
|
+
if (!isFinite(float32Vector[i])) {
|
|
543
|
+
hasNonFinite = true;
|
|
544
|
+
break;
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
if (hasNonFinite) {
|
|
548
|
+
console.warn(`[WikiMemory] embedFact: embed() returned values that overflow float32 for ${fact.id}; skipping.`);
|
|
549
|
+
return false;
|
|
550
|
+
}
|
|
551
|
+
await this.storeEmbeddingDimension(float32Vector.length);
|
|
552
|
+
const blob = new Uint8Array(float32Vector.buffer);
|
|
489
553
|
await this.db.runAsync(
|
|
490
|
-
`UPDATE ${this.prefix}entries SET embedding =
|
|
491
|
-
[
|
|
554
|
+
`UPDATE ${this.prefix}entries SET embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
555
|
+
[blob, fact.id]
|
|
492
556
|
);
|
|
557
|
+
try {
|
|
558
|
+
await this._notifyEmbeddingPersisted(fact.entity_id, fact.id, float32Vector);
|
|
559
|
+
} catch (hookErr) {
|
|
560
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for ${fact.id}:`, hookErr);
|
|
561
|
+
}
|
|
493
562
|
return true;
|
|
494
563
|
} catch (err) {
|
|
495
564
|
console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
|
|
@@ -505,6 +574,9 @@ var WikiMemory = class {
|
|
|
505
574
|
_warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
|
|
506
575
|
console.warn(`[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`);
|
|
507
576
|
}
|
|
577
|
+
async _notifyEmbeddingPersisted(entityId, factId, vector) {
|
|
578
|
+
await this.options.vectorRanker?.onEmbeddingPersisted?.({ entityId, factId, vector });
|
|
579
|
+
}
|
|
508
580
|
async setup() {
|
|
509
581
|
const entriesExistedBeforeSetup = await this.db.getFirstAsync(
|
|
510
582
|
`SELECT name FROM sqlite_master WHERE type='table' AND name=?`,
|
|
@@ -608,9 +680,24 @@ var WikiMemory = class {
|
|
|
608
680
|
_globalReembedKey() {
|
|
609
681
|
return `${this.prefix}:reembed`;
|
|
610
682
|
}
|
|
683
|
+
_importKey(entityId) {
|
|
684
|
+
return `${this.prefix}:${entityId}:import`;
|
|
685
|
+
}
|
|
686
|
+
_globalImportKey() {
|
|
687
|
+
return `${this.prefix}:import`;
|
|
688
|
+
}
|
|
689
|
+
_forgetKey(entityId) {
|
|
690
|
+
return `${this.prefix}:${entityId}:forget`;
|
|
691
|
+
}
|
|
611
692
|
_isReembedActive(entityId) {
|
|
612
693
|
return this.activeMaintenanceJobs.has(this._reembedKey(entityId)) || this.activeMaintenanceJobs.has(this._globalReembedKey());
|
|
613
694
|
}
|
|
695
|
+
_isImportActiveFor(entityId) {
|
|
696
|
+
return this.activeMaintenanceJobs.has(this._importKey(entityId)) || this.activeMaintenanceJobs.has(this._globalImportKey());
|
|
697
|
+
}
|
|
698
|
+
_isForgetActiveFor(entityId) {
|
|
699
|
+
return this.activeMaintenanceJobs.has(this._forgetKey(entityId));
|
|
700
|
+
}
|
|
614
701
|
/** Returns true if any maintenance job has the given operation suffix (e.g. ':prune'). */
|
|
615
702
|
_isAnyMaintenanceActiveWithSuffix(suffix) {
|
|
616
703
|
const entityKeyPrefix = `${this.prefix}:`;
|
|
@@ -653,6 +740,10 @@ var WikiMemory = class {
|
|
|
653
740
|
blockingOperation = "reembed";
|
|
654
741
|
} else if (isIngestRunning) {
|
|
655
742
|
blockingOperation = "ingest";
|
|
743
|
+
} else if (this._isImportActiveFor(entityId)) {
|
|
744
|
+
blockingOperation = "import";
|
|
745
|
+
} else if (this._isForgetActiveFor(entityId)) {
|
|
746
|
+
blockingOperation = "forget";
|
|
656
747
|
}
|
|
657
748
|
if (blockingOperation !== null) {
|
|
658
749
|
throw new WikiBusyError(blockingOperation, entityId);
|
|
@@ -668,8 +759,15 @@ var WikiMemory = class {
|
|
|
668
759
|
let deletedEntries = 0;
|
|
669
760
|
let deletedTasks = 0;
|
|
670
761
|
let deletedEvents = 0;
|
|
762
|
+
const deletedEntryIds = [];
|
|
671
763
|
if (retainSoftDeletedFor !== null) {
|
|
672
764
|
const cutoff = now - retainSoftDeletedFor * 864e5;
|
|
765
|
+
const entriesToDelete = await this.db.getAllAsync(
|
|
766
|
+
`SELECT id FROM ${this.prefix}entries
|
|
767
|
+
WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at < ?`,
|
|
768
|
+
[entityId, cutoff]
|
|
769
|
+
);
|
|
770
|
+
deletedEntryIds.push(...entriesToDelete.map((e) => e.id));
|
|
673
771
|
const entryResult = await this.db.runAsync(
|
|
674
772
|
`DELETE FROM ${this.prefix}entries
|
|
675
773
|
WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at < ?`,
|
|
@@ -697,19 +795,39 @@ var WikiMemory = class {
|
|
|
697
795
|
await this.db.execAsync(`VACUUM`);
|
|
698
796
|
}
|
|
699
797
|
await this.rebuildMiniSearchIndex(entityId);
|
|
798
|
+
this.vectorCache.delete(entityId);
|
|
799
|
+
const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
|
|
800
|
+
for (const factId of uniqueDeletedIds) {
|
|
801
|
+
try {
|
|
802
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
803
|
+
} catch (hookErr) {
|
|
804
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during prune for ${factId}:`, hookErr);
|
|
805
|
+
}
|
|
806
|
+
}
|
|
700
807
|
return { entries: deletedEntries, tasks: deletedTasks, events: deletedEvents };
|
|
701
808
|
} finally {
|
|
702
809
|
this.activeMaintenanceJobs.delete(pruneKey);
|
|
703
810
|
}
|
|
704
811
|
}
|
|
705
|
-
async read(entityId, query) {
|
|
706
|
-
const
|
|
812
|
+
async read(entityId, query, options) {
|
|
813
|
+
const config = this.options.config;
|
|
814
|
+
const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
|
|
815
|
+
const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
|
|
816
|
+
const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
|
|
817
|
+
const effectivePreFilterLimit = rawPreFilterLimit === void 0 ? void 0 : Number.isFinite(rawPreFilterLimit) ? Math.max(0, Math.trunc(rawPreFilterLimit)) : void 0;
|
|
818
|
+
const hybridWeight = options?.hybridWeight ?? config?.hybridWeight;
|
|
819
|
+
const weight = hybridWeight !== void 0 && !Number.isNaN(hybridWeight) ? Math.max(0, Math.min(1, hybridWeight)) : void 0;
|
|
820
|
+
const skipEmbed = weight === 0;
|
|
707
821
|
const embedFn = this.options.llmProvider.embed;
|
|
708
822
|
const trimmedQuery = query.trim();
|
|
709
823
|
let facts = [];
|
|
710
|
-
if (trimmedQuery) {
|
|
824
|
+
if (maxResults === 0) ; else if (trimmedQuery) {
|
|
711
825
|
let usedEmbed = false;
|
|
712
|
-
if (embedFn) {
|
|
826
|
+
if (!skipEmbed && embedFn) {
|
|
827
|
+
let rankerShouldRethrow = false;
|
|
828
|
+
let pendingRankerFallbackError;
|
|
829
|
+
let usedKeywordFallback = false;
|
|
830
|
+
let scoredAlreadySortedAndLimited = false;
|
|
713
831
|
try {
|
|
714
832
|
const queryVec = await embedFn(trimmedQuery);
|
|
715
833
|
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
@@ -728,51 +846,350 @@ var WikiMemory = class {
|
|
|
728
846
|
);
|
|
729
847
|
}
|
|
730
848
|
}
|
|
731
|
-
const
|
|
732
|
-
`SELECT
|
|
733
|
-
|
|
849
|
+
const mismatchedCount = await this.db.getFirstAsync(
|
|
850
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
851
|
+
WHERE entity_id = ? AND deleted_at IS NULL
|
|
852
|
+
AND embedding_blob IS NOT NULL
|
|
853
|
+
AND (CAST(length(embedding_blob) AS INTEGER) % 4 = 0)
|
|
854
|
+
AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?`,
|
|
855
|
+
[entityId, queryVec.length]
|
|
734
856
|
);
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
857
|
+
if (mismatchedCount && mismatchedCount.cnt > 0) {
|
|
858
|
+
throw new Error(
|
|
859
|
+
`Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
|
|
860
|
+
);
|
|
861
|
+
}
|
|
862
|
+
const useRanker = Boolean(this.options.vectorRanker);
|
|
863
|
+
let candidateRows;
|
|
864
|
+
let populateCache = true;
|
|
865
|
+
let miniSearchScores;
|
|
866
|
+
if (effectivePreFilterLimit !== void 0) {
|
|
867
|
+
populateCache = false;
|
|
868
|
+
const preResults = this.miniSearch.search(trimmedQuery, {
|
|
869
|
+
filter: (r) => r.entity_id === entityId,
|
|
870
|
+
combineWith: "OR"
|
|
871
|
+
});
|
|
872
|
+
if (preResults.length === 0) {
|
|
873
|
+
candidateRows = null;
|
|
874
|
+
} else {
|
|
875
|
+
const topKResults = preResults.slice(0, effectivePreFilterLimit);
|
|
876
|
+
if (topKResults.length === 0) {
|
|
877
|
+
candidateRows = null;
|
|
878
|
+
} else {
|
|
879
|
+
const topKIds = topKResults.map((r) => r.id);
|
|
880
|
+
const inClauseChunkSize = 500;
|
|
881
|
+
if (useRanker) {
|
|
882
|
+
const rows = [];
|
|
883
|
+
for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
|
|
884
|
+
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
885
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
886
|
+
const chunkRows = await this.db.getAllAsync(
|
|
887
|
+
`SELECT id, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
888
|
+
idChunk
|
|
889
|
+
);
|
|
890
|
+
rows.push(...chunkRows);
|
|
891
|
+
}
|
|
892
|
+
candidateRows = rows;
|
|
893
|
+
} else {
|
|
894
|
+
const rows = [];
|
|
895
|
+
for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
|
|
896
|
+
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
897
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
898
|
+
const chunkRows = await this.db.getAllAsync(
|
|
899
|
+
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
900
|
+
idChunk
|
|
901
|
+
);
|
|
902
|
+
rows.push(...chunkRows);
|
|
903
|
+
}
|
|
904
|
+
candidateRows = rows;
|
|
905
|
+
}
|
|
906
|
+
if (weight !== void 0 && weight < 1) {
|
|
907
|
+
const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
|
|
908
|
+
miniSearchScores = new Map(topKResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
742
909
|
}
|
|
743
|
-
} catch {
|
|
744
910
|
}
|
|
745
911
|
}
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
912
|
+
} else {
|
|
913
|
+
if (useRanker) {
|
|
914
|
+
candidateRows = await this.db.getAllAsync(
|
|
915
|
+
`SELECT id, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
916
|
+
[entityId]
|
|
917
|
+
);
|
|
918
|
+
} else {
|
|
919
|
+
candidateRows = await this.db.getAllAsync(
|
|
920
|
+
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
921
|
+
[entityId]
|
|
922
|
+
);
|
|
752
923
|
}
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
924
|
+
if (weight !== void 0 && weight < 1) {
|
|
925
|
+
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
926
|
+
filter: (r) => r.entity_id === entityId,
|
|
927
|
+
combineWith: "OR"
|
|
928
|
+
});
|
|
929
|
+
const maxMsScore = Math.max(1, msResults[0]?.score ?? 1);
|
|
930
|
+
miniSearchScores = new Map(msResults.map((r) => [r.id, r.score / maxMsScore]));
|
|
931
|
+
}
|
|
932
|
+
}
|
|
933
|
+
if (candidateRows === null) {
|
|
934
|
+
usedEmbed = true;
|
|
935
|
+
} else {
|
|
936
|
+
let scored;
|
|
937
|
+
if (useRanker) {
|
|
938
|
+
const candidateIds = effectivePreFilterLimit !== void 0 ? candidateRows.map((r) => r.id) : void 0;
|
|
939
|
+
try {
|
|
940
|
+
const oversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
941
|
+
scored = await this._rankWithVectorRanker({
|
|
942
|
+
entityId,
|
|
943
|
+
queryVec,
|
|
944
|
+
candidateIds,
|
|
945
|
+
weight,
|
|
946
|
+
miniSearchScores,
|
|
947
|
+
limit: oversampledLimit
|
|
948
|
+
});
|
|
949
|
+
if (scored.length > 0) {
|
|
950
|
+
const scoredIds2 = new Set(scored.map((s) => s.id));
|
|
951
|
+
const metaMap = /* @__PURE__ */ new Map();
|
|
952
|
+
for (const r of candidateRows) {
|
|
953
|
+
if (scoredIds2.has(r.id)) {
|
|
954
|
+
metaMap.set(r.id, { updated_at: r.updated_at, access_count: r.access_count });
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
scored = scored.map((s) => {
|
|
958
|
+
const meta = metaMap.get(s.id);
|
|
959
|
+
return { ...s, updated_at: meta?.updated_at ?? null, access_count: meta?.access_count ?? null };
|
|
960
|
+
});
|
|
961
|
+
}
|
|
962
|
+
const scoredIds = new Set(scored.map((s) => s.id));
|
|
963
|
+
const isHybrid = weight !== void 0 && weight < 1;
|
|
964
|
+
const maxBackfill = isHybrid ? maxResults : Math.max(0, maxResults - scored.length);
|
|
965
|
+
if (maxBackfill > 0) {
|
|
966
|
+
if (isHybrid) {
|
|
967
|
+
const topK = [];
|
|
968
|
+
for (const row of candidateRows) {
|
|
969
|
+
if (scoredIds.has(row.id)) continue;
|
|
970
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
971
|
+
const candidate = { row, kwScore };
|
|
972
|
+
if (topK.length < maxBackfill) {
|
|
973
|
+
let insertIdx = topK.length;
|
|
974
|
+
for (let i = 0; i < topK.length; i++) {
|
|
975
|
+
const cmp = this._compareScoredRows(
|
|
976
|
+
{
|
|
977
|
+
id: candidate.row.id,
|
|
978
|
+
score: candidate.kwScore,
|
|
979
|
+
updated_at: candidate.row.updated_at,
|
|
980
|
+
access_count: candidate.row.access_count
|
|
981
|
+
},
|
|
982
|
+
{
|
|
983
|
+
id: topK[i].row.id,
|
|
984
|
+
score: topK[i].kwScore,
|
|
985
|
+
updated_at: topK[i].row.updated_at,
|
|
986
|
+
access_count: topK[i].row.access_count
|
|
987
|
+
}
|
|
988
|
+
);
|
|
989
|
+
if (cmp < 0) {
|
|
990
|
+
insertIdx = i;
|
|
991
|
+
break;
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
topK.splice(insertIdx, 0, candidate);
|
|
995
|
+
} else {
|
|
996
|
+
const cmpWorst = this._compareScoredRows(
|
|
997
|
+
{
|
|
998
|
+
id: candidate.row.id,
|
|
999
|
+
score: candidate.kwScore,
|
|
1000
|
+
updated_at: candidate.row.updated_at,
|
|
1001
|
+
access_count: candidate.row.access_count
|
|
1002
|
+
},
|
|
1003
|
+
{
|
|
1004
|
+
id: topK[maxBackfill - 1].row.id,
|
|
1005
|
+
score: topK[maxBackfill - 1].kwScore,
|
|
1006
|
+
updated_at: topK[maxBackfill - 1].row.updated_at,
|
|
1007
|
+
access_count: topK[maxBackfill - 1].row.access_count
|
|
1008
|
+
}
|
|
1009
|
+
);
|
|
1010
|
+
if (cmpWorst < 0) {
|
|
1011
|
+
let insertIdx = maxBackfill - 1;
|
|
1012
|
+
for (let i = 0; i < topK.length; i++) {
|
|
1013
|
+
const cmp = this._compareScoredRows(
|
|
1014
|
+
{
|
|
1015
|
+
id: candidate.row.id,
|
|
1016
|
+
score: candidate.kwScore,
|
|
1017
|
+
updated_at: candidate.row.updated_at,
|
|
1018
|
+
access_count: candidate.row.access_count
|
|
1019
|
+
},
|
|
1020
|
+
{
|
|
1021
|
+
id: topK[i].row.id,
|
|
1022
|
+
score: topK[i].kwScore,
|
|
1023
|
+
updated_at: topK[i].row.updated_at,
|
|
1024
|
+
access_count: topK[i].row.access_count
|
|
1025
|
+
}
|
|
1026
|
+
);
|
|
1027
|
+
if (cmp < 0) {
|
|
1028
|
+
insertIdx = i;
|
|
1029
|
+
break;
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
topK.splice(insertIdx, 0, candidate);
|
|
1033
|
+
topK.pop();
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
}
|
|
1037
|
+
for (const { row, kwScore } of topK) {
|
|
1038
|
+
scored.push({
|
|
1039
|
+
id: row.id,
|
|
1040
|
+
score: (1 - weight) * kwScore,
|
|
1041
|
+
updated_at: row.updated_at,
|
|
1042
|
+
access_count: row.access_count
|
|
1043
|
+
});
|
|
1044
|
+
}
|
|
1045
|
+
} else {
|
|
1046
|
+
const omitted = [];
|
|
1047
|
+
for (const row of candidateRows) {
|
|
1048
|
+
if (scoredIds.has(row.id)) continue;
|
|
1049
|
+
omitted.push({ id: row.id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
|
|
1050
|
+
}
|
|
1051
|
+
if (omitted.length > 0) {
|
|
1052
|
+
this._tieBreakSort(omitted);
|
|
1053
|
+
scored.push(...omitted.slice(0, maxBackfill));
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
}
|
|
1057
|
+
} catch (rankerErr) {
|
|
1058
|
+
const rankerError = rankerErr instanceof Error ? rankerErr : new Error(String(rankerErr));
|
|
1059
|
+
const policy = this.options.vectorRankerFallback ?? "js-cosine";
|
|
1060
|
+
this.options.onVectorRankerFallback?.({ error: rankerError, policy });
|
|
1061
|
+
if (policy === "throw") {
|
|
1062
|
+
rankerShouldRethrow = true;
|
|
1063
|
+
throw rankerError;
|
|
1064
|
+
} else if (policy === "js-cosine") {
|
|
1065
|
+
let fallbackRows = candidateRows;
|
|
1066
|
+
if (fallbackRows && fallbackRows.length > 0 && !("embedding_blob" in fallbackRows[0])) {
|
|
1067
|
+
const rowIds = fallbackRows.map((r) => r.id);
|
|
1068
|
+
const embeddingsMap = /* @__PURE__ */ new Map();
|
|
1069
|
+
const chunkSize = 500;
|
|
1070
|
+
for (let i = 0; i < rowIds.length; i += chunkSize) {
|
|
1071
|
+
const idChunk = rowIds.slice(i, i + chunkSize);
|
|
1072
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1073
|
+
const embeddingRows = await this.db.getAllAsync(
|
|
1074
|
+
`SELECT id, embedding_blob, embedding FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1075
|
+
[...idChunk, entityId]
|
|
1076
|
+
);
|
|
1077
|
+
for (const row of embeddingRows) {
|
|
1078
|
+
embeddingsMap.set(row.id, { embedding_blob: row.embedding_blob, embedding: row.embedding });
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
fallbackRows = fallbackRows.map((r) => ({
|
|
1082
|
+
...r,
|
|
1083
|
+
embedding_blob: embeddingsMap.get(r.id)?.embedding_blob ?? null,
|
|
1084
|
+
embedding: embeddingsMap.get(r.id)?.embedding ?? null
|
|
1085
|
+
}));
|
|
1086
|
+
}
|
|
1087
|
+
scored = await this._rankWithJsCosine({
|
|
1088
|
+
entityId,
|
|
1089
|
+
queryVec,
|
|
1090
|
+
candidateRows: fallbackRows,
|
|
1091
|
+
weight,
|
|
1092
|
+
miniSearchScores,
|
|
1093
|
+
populateCache,
|
|
1094
|
+
limit: maxResults
|
|
1095
|
+
});
|
|
1096
|
+
scoredAlreadySortedAndLimited = true;
|
|
1097
|
+
} else if (policy === "keyword") {
|
|
1098
|
+
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
1099
|
+
filter: (r) => r.entity_id === entityId,
|
|
1100
|
+
combineWith: "OR"
|
|
1101
|
+
});
|
|
1102
|
+
const topResults = msResults.slice(0, maxResults);
|
|
1103
|
+
const resultIds = new Set(topResults.map((r) => r.id));
|
|
1104
|
+
const candidateMap = /* @__PURE__ */ new Map();
|
|
1105
|
+
for (const r of candidateRows) {
|
|
1106
|
+
if (resultIds.has(r.id)) {
|
|
1107
|
+
candidateMap.set(r.id, { updated_at: r.updated_at, access_count: r.access_count });
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
scored = topResults.map((r) => {
|
|
1111
|
+
const meta = candidateMap.get(r.id);
|
|
1112
|
+
return {
|
|
1113
|
+
id: r.id,
|
|
1114
|
+
score: r.score ?? 0,
|
|
1115
|
+
access_count: meta?.access_count ?? null,
|
|
1116
|
+
updated_at: meta?.updated_at ?? null
|
|
1117
|
+
};
|
|
1118
|
+
});
|
|
1119
|
+
usedKeywordFallback = true;
|
|
1120
|
+
} else {
|
|
1121
|
+
scored = [];
|
|
1122
|
+
}
|
|
1123
|
+
if (this.options.propagateRankerFailureToRetrievalFallback) {
|
|
1124
|
+
const mirrored = new Error("Vector ranker failed, falling back");
|
|
1125
|
+
mirrored.cause = rankerError;
|
|
1126
|
+
pendingRankerFallbackError = mirrored;
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
} else {
|
|
1130
|
+
scored = await this._rankWithJsCosine({
|
|
1131
|
+
entityId,
|
|
1132
|
+
queryVec,
|
|
1133
|
+
candidateRows,
|
|
1134
|
+
weight,
|
|
1135
|
+
miniSearchScores,
|
|
1136
|
+
populateCache,
|
|
1137
|
+
limit: maxResults
|
|
1138
|
+
});
|
|
1139
|
+
scoredAlreadySortedAndLimited = true;
|
|
756
1140
|
}
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
1141
|
+
if (scored.length > 0) {
|
|
1142
|
+
if (!usedKeywordFallback && !scoredAlreadySortedAndLimited) {
|
|
1143
|
+
this._tieBreakSort(scored);
|
|
1144
|
+
}
|
|
1145
|
+
const topIds = (scoredAlreadySortedAndLimited ? scored : scored.slice(0, maxResults)).map((s) => s.id);
|
|
1146
|
+
if (topIds.length > 0) {
|
|
1147
|
+
const fullRows = [];
|
|
1148
|
+
const phase2ChunkSize = 500;
|
|
1149
|
+
for (let i = 0; i < topIds.length; i += phase2ChunkSize) {
|
|
1150
|
+
const idChunk = topIds.slice(i, i + phase2ChunkSize);
|
|
1151
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1152
|
+
const chunkRows = await this.db.getAllAsync(
|
|
1153
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1154
|
+
[...idChunk, entityId]
|
|
1155
|
+
);
|
|
1156
|
+
fullRows.push(...chunkRows);
|
|
1157
|
+
}
|
|
1158
|
+
const byId = new Map(fullRows.map((r) => [r.id, r]));
|
|
1159
|
+
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
1160
|
+
if (facts.length < topIds.length) {
|
|
1161
|
+
const missingIds = topIds.filter((id) => !byId.has(id));
|
|
1162
|
+
const missingCount = missingIds.length;
|
|
1163
|
+
const sample = missingIds.slice(0, 5);
|
|
1164
|
+
const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
|
|
1165
|
+
const error = new Error(
|
|
1166
|
+
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs for entity ${entityId}. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist for this entity.` + sampleSuffix
|
|
1167
|
+
);
|
|
1168
|
+
this.options.onRetrievalFallback?.(error);
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1171
|
+
if (pendingRankerFallbackError) {
|
|
1172
|
+
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
1173
|
+
pendingRankerFallbackError = void 0;
|
|
1174
|
+
}
|
|
1175
|
+
usedEmbed = true;
|
|
1176
|
+
} else {
|
|
1177
|
+
if (pendingRankerFallbackError) {
|
|
1178
|
+
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
1179
|
+
pendingRankerFallbackError = void 0;
|
|
1180
|
+
}
|
|
1181
|
+
usedEmbed = true;
|
|
760
1182
|
}
|
|
761
|
-
return a.row.id.localeCompare(b.row.id);
|
|
762
|
-
});
|
|
763
|
-
const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
|
|
764
|
-
if (topIds.length > 0) {
|
|
765
|
-
const placeholders = topIds.map(() => "?").join(",");
|
|
766
|
-
const fullRows = await this.db.getAllAsync(
|
|
767
|
-
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
768
|
-
topIds
|
|
769
|
-
);
|
|
770
|
-
const byId = new Map(fullRows.map((r) => [r.id, r]));
|
|
771
|
-
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
772
1183
|
}
|
|
773
|
-
usedEmbed = true;
|
|
774
1184
|
} catch (err) {
|
|
775
1185
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
1186
|
+
if (rankerShouldRethrow) {
|
|
1187
|
+
throw error;
|
|
1188
|
+
}
|
|
1189
|
+
if (pendingRankerFallbackError) {
|
|
1190
|
+
error.cause = pendingRankerFallbackError;
|
|
1191
|
+
pendingRankerFallbackError = void 0;
|
|
1192
|
+
}
|
|
776
1193
|
this.options.onRetrievalFallback?.(error);
|
|
777
1194
|
}
|
|
778
1195
|
}
|
|
@@ -783,25 +1200,35 @@ var WikiMemory = class {
|
|
|
783
1200
|
});
|
|
784
1201
|
const topIds = results.slice(0, maxResults).map((r) => r.id);
|
|
785
1202
|
if (topIds.length > 0) {
|
|
786
|
-
const
|
|
787
|
-
const
|
|
788
|
-
|
|
789
|
-
topIds
|
|
790
|
-
|
|
791
|
-
|
|
1203
|
+
const kwRows = [];
|
|
1204
|
+
const kwChunkSize = 500;
|
|
1205
|
+
for (let i = 0; i < topIds.length; i += kwChunkSize) {
|
|
1206
|
+
const idChunk = topIds.slice(i, i + kwChunkSize);
|
|
1207
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1208
|
+
const chunkRows = await this.db.getAllAsync(
|
|
1209
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1210
|
+
[...idChunk, entityId]
|
|
1211
|
+
);
|
|
1212
|
+
kwRows.push(...chunkRows);
|
|
1213
|
+
}
|
|
1214
|
+
const byId = new Map(kwRows.map((r) => [r.id, r]));
|
|
792
1215
|
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
793
1216
|
}
|
|
794
1217
|
}
|
|
795
1218
|
if (facts.length > 0) {
|
|
796
1219
|
const ids = facts.map((f) => f.id);
|
|
797
|
-
const placeholders = ids.map(() => "?").join(",");
|
|
798
1220
|
const now = Date.now();
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
1221
|
+
const accessChunkSize = 500;
|
|
1222
|
+
for (let i = 0; i < ids.length; i += accessChunkSize) {
|
|
1223
|
+
const idChunk = ids.slice(i, i + accessChunkSize);
|
|
1224
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1225
|
+
await this.db.runAsync(
|
|
1226
|
+
`UPDATE ${this.prefix}entries
|
|
1227
|
+
SET access_count = access_count + 1, last_accessed_at = ?
|
|
1228
|
+
WHERE id IN (${placeholders})`,
|
|
1229
|
+
[now, ...idChunk]
|
|
1230
|
+
);
|
|
1231
|
+
}
|
|
805
1232
|
}
|
|
806
1233
|
} else {
|
|
807
1234
|
facts = await this.db.getAllAsync(
|
|
@@ -828,7 +1255,7 @@ var WikiMemory = class {
|
|
|
828
1255
|
)
|
|
829
1256
|
]);
|
|
830
1257
|
const parsedFacts = facts.map((f) => {
|
|
831
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1258
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
832
1259
|
return {
|
|
833
1260
|
...rest,
|
|
834
1261
|
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
@@ -836,6 +1263,113 @@ var WikiMemory = class {
|
|
|
836
1263
|
});
|
|
837
1264
|
return { facts: parsedFacts, tasks, events: events.reverse() };
|
|
838
1265
|
}
|
|
1266
|
+
/**
|
|
1267
|
+
* Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
|
|
1268
|
+
*/
|
|
1269
|
+
_tieBreakSort(items) {
|
|
1270
|
+
items.sort((a, b) => this._compareScoredRows(a, b));
|
|
1271
|
+
}
|
|
1272
|
+
/**
|
|
1273
|
+
* Comparator for score + deterministic tie-break fields.
|
|
1274
|
+
* Negative return means "a ranks ahead of b" for descending score order.
|
|
1275
|
+
*/
|
|
1276
|
+
_compareScoredRows(a, b) {
|
|
1277
|
+
const scoreDiff = b.score - a.score;
|
|
1278
|
+
if (scoreDiff !== 0) return scoreDiff;
|
|
1279
|
+
const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
|
|
1280
|
+
if (accessCountDiff !== 0) return accessCountDiff;
|
|
1281
|
+
const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
|
|
1282
|
+
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
1283
|
+
return a.id.localeCompare(b.id);
|
|
1284
|
+
}
|
|
1285
|
+
/**
|
|
1286
|
+
* Score candidate rows using in-process JS cosine similarity.
|
|
1287
|
+
* Applies hybrid blending (if weight set) and tie-break sorting before returning.
|
|
1288
|
+
*/
|
|
1289
|
+
async _rankWithJsCosine(args) {
|
|
1290
|
+
const { entityId, queryVec, candidateRows, weight, miniSearchScores, populateCache, limit } = args;
|
|
1291
|
+
let entityCache = this.vectorCache.get(entityId);
|
|
1292
|
+
const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
1293
|
+
if (tooLarge && entityCache) {
|
|
1294
|
+
this.vectorCache.delete(entityId);
|
|
1295
|
+
entityCache = void 0;
|
|
1296
|
+
}
|
|
1297
|
+
const canCache = populateCache && !tooLarge;
|
|
1298
|
+
if (canCache && !entityCache) {
|
|
1299
|
+
entityCache = /* @__PURE__ */ new Map();
|
|
1300
|
+
}
|
|
1301
|
+
const scored = candidateRows.map((row) => {
|
|
1302
|
+
let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
|
|
1303
|
+
if (vector && canCache && entityCache && !entityCache.has(row.id)) {
|
|
1304
|
+
entityCache.set(row.id, vector);
|
|
1305
|
+
}
|
|
1306
|
+
let score = 0;
|
|
1307
|
+
if (vector && vector.length === queryVec.length) {
|
|
1308
|
+
const cosSim = cosineSimilarity(queryVec, vector);
|
|
1309
|
+
if (weight !== void 0) {
|
|
1310
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1311
|
+
score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
|
|
1312
|
+
} else {
|
|
1313
|
+
score = cosSim;
|
|
1314
|
+
}
|
|
1315
|
+
} else if (weight !== void 0 && weight < 1) {
|
|
1316
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1317
|
+
score = (1 - weight) * kwScore;
|
|
1318
|
+
} else {
|
|
1319
|
+
score = -2;
|
|
1320
|
+
}
|
|
1321
|
+
return { id: row.id, score, updated_at: row.updated_at, access_count: row.access_count };
|
|
1322
|
+
});
|
|
1323
|
+
if (canCache && entityCache && entityCache.size > 0) {
|
|
1324
|
+
if (!this.vectorCache.has(entityId)) {
|
|
1325
|
+
if (this.vectorCache.size >= _WikiMemory.MAX_VECTOR_CACHE_ENTITIES) {
|
|
1326
|
+
const oldestKey = this.vectorCache.keys().next().value;
|
|
1327
|
+
if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
|
|
1328
|
+
}
|
|
1329
|
+
this.vectorCache.set(entityId, entityCache);
|
|
1330
|
+
}
|
|
1331
|
+
}
|
|
1332
|
+
this._tieBreakSort(scored);
|
|
1333
|
+
return scored.slice(0, limit);
|
|
1334
|
+
}
|
|
1335
|
+
/**
|
|
1336
|
+
* Delegate semantic ranking to the injected VectorRanker.
|
|
1337
|
+
* Caller should pass an oversampledLimit to preserve recall after re-ranking.
|
|
1338
|
+
* Returns scored results ready for hybrid blending and tie-break sorting.
|
|
1339
|
+
*/
|
|
1340
|
+
async _rankWithVectorRanker(args) {
|
|
1341
|
+
const { entityId, queryVec, candidateIds, weight, miniSearchScores, limit } = args;
|
|
1342
|
+
const ranker = this.options.vectorRanker;
|
|
1343
|
+
if (!ranker) {
|
|
1344
|
+
throw new Error("vectorRanker not configured");
|
|
1345
|
+
}
|
|
1346
|
+
const rankerResults = await ranker.rankBySimilarity({
|
|
1347
|
+
entityId,
|
|
1348
|
+
queryVec,
|
|
1349
|
+
candidateIds,
|
|
1350
|
+
limit
|
|
1351
|
+
});
|
|
1352
|
+
const allowedIds = candidateIds ? new Set(candidateIds) : void 0;
|
|
1353
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1354
|
+
const normalized = [];
|
|
1355
|
+
for (const r of rankerResults) {
|
|
1356
|
+
if (normalized.length >= limit) break;
|
|
1357
|
+
if (seen.has(r.id)) continue;
|
|
1358
|
+
if (allowedIds && !allowedIds.has(r.id)) continue;
|
|
1359
|
+
if (!Number.isFinite(r.semanticScore)) continue;
|
|
1360
|
+
seen.add(r.id);
|
|
1361
|
+
normalized.push(r);
|
|
1362
|
+
}
|
|
1363
|
+
const scored = normalized.map((r) => {
|
|
1364
|
+
let score = r.semanticScore;
|
|
1365
|
+
if (weight !== void 0) {
|
|
1366
|
+
const kwScore = miniSearchScores?.get(r.id) ?? 0;
|
|
1367
|
+
score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
|
|
1368
|
+
}
|
|
1369
|
+
return { id: r.id, score };
|
|
1370
|
+
});
|
|
1371
|
+
return scored;
|
|
1372
|
+
}
|
|
839
1373
|
async getMemoryBundle(entityId) {
|
|
840
1374
|
return this._getFullBundle(entityId, { maxEvents: 10 });
|
|
841
1375
|
}
|
|
@@ -860,7 +1394,7 @@ var WikiMemory = class {
|
|
|
860
1394
|
if (memoryCheckpoint > count) memoryCheckpoint = 0;
|
|
861
1395
|
if (count - memoryCheckpoint >= threshold) {
|
|
862
1396
|
const jobKey = this._librarianKey(entityId);
|
|
863
|
-
if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1397
|
+
if (!this.activeMaintenanceJobs.has(jobKey) && !this.activeMaintenanceJobs.has(this._pruneKey(entityId)) && !this._isReembedActive(entityId) && !this._isImportActiveFor(entityId) && !this._isForgetActiveFor(entityId)) {
|
|
864
1398
|
this.activeMaintenanceJobs.add(jobKey);
|
|
865
1399
|
this.runLibrarianThenMaybeHeal(entityId, count).catch(console.error).finally(() => this.activeMaintenanceJobs.delete(jobKey));
|
|
866
1400
|
}
|
|
@@ -908,7 +1442,7 @@ var WikiMemory = class {
|
|
|
908
1442
|
LIMIT 100
|
|
909
1443
|
`, [entityId]);
|
|
910
1444
|
const currentFacts = currentFactsRows.map((f) => {
|
|
911
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1445
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
912
1446
|
return {
|
|
913
1447
|
...rest,
|
|
914
1448
|
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
@@ -952,7 +1486,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
952
1486
|
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
|
|
953
1487
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
954
1488
|
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
|
|
955
|
-
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1489
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
956
1490
|
}
|
|
957
1491
|
for (const task of validTasks) {
|
|
958
1492
|
const id = generateId("task_");
|
|
@@ -962,10 +1496,12 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
962
1496
|
`, [id, entityId, task.description, "pending", task.priority, now, now]);
|
|
963
1497
|
}
|
|
964
1498
|
});
|
|
1499
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1500
|
+
this.vectorCache.delete(entityId);
|
|
965
1501
|
for (const fact of insertedFacts) {
|
|
966
1502
|
await this.embedFact(fact);
|
|
967
1503
|
}
|
|
968
|
-
|
|
1504
|
+
this.vectorCache.delete(entityId);
|
|
969
1505
|
}
|
|
970
1506
|
async _doRunHeal(entityId) {
|
|
971
1507
|
const now = Date.now();
|
|
@@ -1003,7 +1539,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
1003
1539
|
const documentAnchors = allFactsRows.filter((f) => f.source_type === "user_document").map(({ id, title, source_ref }) => ({ id, title, source_ref }));
|
|
1004
1540
|
const userPrompt = `Heal Candidates:
|
|
1005
1541
|
${JSON.stringify(healCandidates.map((f) => {
|
|
1006
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1542
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
1007
1543
|
return { ...rest, tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags };
|
|
1008
1544
|
}), null, 2)}
|
|
1009
1545
|
|
|
@@ -1030,6 +1566,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1030
1566
|
const safeDeleted = deleted.filter((id) => mutableIds.has(id));
|
|
1031
1567
|
const validNewFacts = newFacts.map(validateFact).filter((f) => f !== null);
|
|
1032
1568
|
const insertedFacts = [];
|
|
1569
|
+
const uniqueDeletedFactIds = Array.from(new Set(safeDeleted));
|
|
1033
1570
|
await this.db.withTransactionAsync(async () => {
|
|
1034
1571
|
for (const id of safeDowngraded) {
|
|
1035
1572
|
await this.db.runAsync(`UPDATE ${this.prefix}entries SET confidence = 'tentative', updated_at = ? WHERE id = ? AND entity_id = ?`, [now, id, entityId]);
|
|
@@ -1043,13 +1580,22 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1043
1580
|
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
|
|
1044
1581
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1045
1582
|
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
|
|
1046
|
-
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1583
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1047
1584
|
}
|
|
1048
1585
|
});
|
|
1586
|
+
this.vectorCache.delete(entityId);
|
|
1587
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
1588
|
+
for (const factId of uniqueDeletedFactIds) {
|
|
1589
|
+
try {
|
|
1590
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
1591
|
+
} catch (hookErr) {
|
|
1592
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal for ${factId}:`, hookErr);
|
|
1593
|
+
}
|
|
1594
|
+
}
|
|
1049
1595
|
for (const fact of insertedFacts) {
|
|
1050
1596
|
await this.embedFact(fact);
|
|
1051
1597
|
}
|
|
1052
|
-
|
|
1598
|
+
this.vectorCache.delete(entityId);
|
|
1053
1599
|
}
|
|
1054
1600
|
async runLibrarian(entityId) {
|
|
1055
1601
|
const jobKey = this._librarianKey(entityId);
|
|
@@ -1062,6 +1608,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1062
1608
|
if (this._isReembedActive(entityId)) {
|
|
1063
1609
|
throw new WikiBusyError("reembed", entityId);
|
|
1064
1610
|
}
|
|
1611
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1612
|
+
throw new WikiBusyError("import", entityId);
|
|
1613
|
+
}
|
|
1614
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1615
|
+
throw new WikiBusyError("forget", entityId);
|
|
1616
|
+
}
|
|
1065
1617
|
this.activeMaintenanceJobs.add(jobKey);
|
|
1066
1618
|
try {
|
|
1067
1619
|
await this._doRunLibrarian(entityId);
|
|
@@ -1080,6 +1632,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1080
1632
|
if (this._isReembedActive(entityId)) {
|
|
1081
1633
|
throw new WikiBusyError("reembed", entityId);
|
|
1082
1634
|
}
|
|
1635
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1636
|
+
throw new WikiBusyError("import", entityId);
|
|
1637
|
+
}
|
|
1638
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1639
|
+
throw new WikiBusyError("forget", entityId);
|
|
1640
|
+
}
|
|
1083
1641
|
this.activeMaintenanceJobs.add(jobKey);
|
|
1084
1642
|
try {
|
|
1085
1643
|
await this._doRunHeal(entityId);
|
|
@@ -1087,9 +1645,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1087
1645
|
this.activeMaintenanceJobs.delete(jobKey);
|
|
1088
1646
|
}
|
|
1089
1647
|
}
|
|
1090
|
-
async runReembed(entityId) {
|
|
1648
|
+
async runReembed(entityId, opts) {
|
|
1091
1649
|
const embedFn = this.options.llmProvider.embed;
|
|
1092
|
-
if (!embedFn) return { embedded: 0, skipped: 0 };
|
|
1650
|
+
if (!embedFn) return { embedded: 0, skipped: 0, failed: 0 };
|
|
1093
1651
|
const reembedKey = entityId ? this._reembedKey(entityId) : this._globalReembedKey();
|
|
1094
1652
|
if (this.activeMaintenanceJobs.has(reembedKey)) {
|
|
1095
1653
|
throw new WikiBusyError("reembed", entityId ?? "*");
|
|
@@ -1110,6 +1668,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1110
1668
|
if (this._isIngestActiveFor(entityId)) {
|
|
1111
1669
|
throw new WikiBusyError("ingest", entityId);
|
|
1112
1670
|
}
|
|
1671
|
+
if (this._isImportActiveFor(entityId)) {
|
|
1672
|
+
throw new WikiBusyError("import", entityId);
|
|
1673
|
+
}
|
|
1674
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1675
|
+
throw new WikiBusyError("forget", entityId);
|
|
1676
|
+
}
|
|
1113
1677
|
} else {
|
|
1114
1678
|
if (this._isAnyMaintenanceActiveWithSuffix(":reembed")) {
|
|
1115
1679
|
throw new WikiBusyError("reembed", "*");
|
|
@@ -1126,6 +1690,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1126
1690
|
if (this.activeIngestJobs.size > 0) {
|
|
1127
1691
|
throw new WikiBusyError("ingest", "*");
|
|
1128
1692
|
}
|
|
1693
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":import")) {
|
|
1694
|
+
throw new WikiBusyError("import", "*");
|
|
1695
|
+
}
|
|
1696
|
+
if (this._isAnyMaintenanceActiveWithSuffix(":forget")) {
|
|
1697
|
+
throw new WikiBusyError("forget", "*");
|
|
1698
|
+
}
|
|
1129
1699
|
}
|
|
1130
1700
|
this.activeMaintenanceJobs.add(reembedKey);
|
|
1131
1701
|
try {
|
|
@@ -1135,17 +1705,64 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1135
1705
|
`SELECT * FROM ${this.prefix}entries WHERE ${where}`,
|
|
1136
1706
|
params
|
|
1137
1707
|
);
|
|
1708
|
+
if (entityId) {
|
|
1709
|
+
this.vectorCache.delete(entityId);
|
|
1710
|
+
} else {
|
|
1711
|
+
this.vectorCache.clear();
|
|
1712
|
+
}
|
|
1713
|
+
const skipExisting = opts?.skipExisting ?? false;
|
|
1714
|
+
let effectiveSkip = skipExisting;
|
|
1715
|
+
if (skipExisting) {
|
|
1716
|
+
const mismatchRow = await this.db.getFirstAsync(
|
|
1717
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
1718
|
+
);
|
|
1719
|
+
if (mismatchRow) {
|
|
1720
|
+
if (entityId) {
|
|
1721
|
+
const mismatchDim = parseInt(mismatchRow.value, 10);
|
|
1722
|
+
const staleForEntity = await this.db.getFirstAsync(
|
|
1723
|
+
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
1724
|
+
WHERE entity_id = ? AND deleted_at IS NULL
|
|
1725
|
+
AND (
|
|
1726
|
+
embedding_blob IS NULL
|
|
1727
|
+
OR (CAST(length(embedding_blob) AS INTEGER) / 4) != ?
|
|
1728
|
+
)`,
|
|
1729
|
+
[entityId, mismatchDim]
|
|
1730
|
+
);
|
|
1731
|
+
if (staleForEntity && staleForEntity.cnt > 0) effectiveSkip = false;
|
|
1732
|
+
} else {
|
|
1733
|
+
effectiveSkip = false;
|
|
1734
|
+
}
|
|
1735
|
+
}
|
|
1736
|
+
}
|
|
1138
1737
|
let embedded = 0;
|
|
1139
1738
|
let skipped = 0;
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1739
|
+
let failed = 0;
|
|
1740
|
+
try {
|
|
1741
|
+
for (const row of rows) {
|
|
1742
|
+
const existingBlob = row.embedding_blob;
|
|
1743
|
+
const blobIsValid = !!existingBlob && existingBlob.byteLength > 0 && existingBlob.byteLength % 4 === 0;
|
|
1744
|
+
if (effectiveSkip && blobIsValid) {
|
|
1745
|
+
const vec = parseEmbedding(existingBlob, null);
|
|
1746
|
+
if (vec !== null && vec.every((v) => Number.isFinite(v))) {
|
|
1747
|
+
skipped++;
|
|
1748
|
+
continue;
|
|
1749
|
+
}
|
|
1750
|
+
}
|
|
1751
|
+
const success = await this.embedFact(row);
|
|
1752
|
+
if (success) embedded++;
|
|
1753
|
+
else failed++;
|
|
1754
|
+
}
|
|
1755
|
+
if (embedded > 0) {
|
|
1756
|
+
await this._reconcileEmbeddingDimension();
|
|
1757
|
+
}
|
|
1758
|
+
} finally {
|
|
1759
|
+
if (entityId) {
|
|
1760
|
+
this.vectorCache.delete(entityId);
|
|
1761
|
+
} else {
|
|
1762
|
+
this.vectorCache.clear();
|
|
1763
|
+
}
|
|
1147
1764
|
}
|
|
1148
|
-
return { embedded, skipped };
|
|
1765
|
+
return { embedded, skipped, failed };
|
|
1149
1766
|
} finally {
|
|
1150
1767
|
this.activeMaintenanceJobs.delete(reembedKey);
|
|
1151
1768
|
}
|
|
@@ -1165,6 +1782,9 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1165
1782
|
heal: this.activeMaintenanceJobs.has(this._healKey(entityId))
|
|
1166
1783
|
};
|
|
1167
1784
|
}
|
|
1785
|
+
clearVectorCache() {
|
|
1786
|
+
this.vectorCache.clear();
|
|
1787
|
+
}
|
|
1168
1788
|
async _getFullBundle(entityId, opts) {
|
|
1169
1789
|
const maxEvents = opts?.maxEvents;
|
|
1170
1790
|
const eventsQuery = maxEvents != null ? `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at DESC LIMIT ?` : `SELECT * FROM ${this.prefix}events WHERE entity_id = ? ORDER BY created_at ASC`;
|
|
@@ -1181,10 +1801,16 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1181
1801
|
this.db.getAllAsync(eventsQuery, eventsParams)
|
|
1182
1802
|
]);
|
|
1183
1803
|
const facts = factsRaw.map((f) => {
|
|
1184
|
-
const { embedding: _embedding, ...rest } = f;
|
|
1804
|
+
const { embedding: _embedding, embedding_blob, ...rest } = f;
|
|
1805
|
+
const safeBlobCopy = opts?.includeBlobs && embedding_blob ? (() => {
|
|
1806
|
+
const c = new ArrayBuffer(embedding_blob.byteLength);
|
|
1807
|
+
new Uint8Array(c).set(embedding_blob);
|
|
1808
|
+
return new Uint8Array(c);
|
|
1809
|
+
})() : void 0;
|
|
1810
|
+
const factBase = safeBlobCopy ? { ...rest, embedding_blob: safeBlobCopy } : rest;
|
|
1185
1811
|
return {
|
|
1186
|
-
...
|
|
1187
|
-
tags: typeof
|
|
1812
|
+
...factBase,
|
|
1813
|
+
tags: typeof factBase.tags === "string" ? JSON.parse(factBase.tags) : factBase.tags
|
|
1188
1814
|
};
|
|
1189
1815
|
});
|
|
1190
1816
|
const events = maxEvents != null ? eventsRaw.slice().reverse() : eventsRaw;
|
|
@@ -1211,7 +1837,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1211
1837
|
for (let i = 0; i < ids.length; i += BATCH) {
|
|
1212
1838
|
const batch = ids.slice(i, i + BATCH);
|
|
1213
1839
|
const batchResults = await Promise.all(
|
|
1214
|
-
batch.map(async (id) => [id, await this._getFullBundle(id)])
|
|
1840
|
+
batch.map(async (id) => [id, await this._getFullBundle(id, { includeBlobs: true })])
|
|
1215
1841
|
);
|
|
1216
1842
|
for (const [id, bundle] of batchResults) {
|
|
1217
1843
|
entities[id] = bundle;
|
|
@@ -1221,172 +1847,406 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1221
1847
|
}
|
|
1222
1848
|
async importDump(dump, opts) {
|
|
1223
1849
|
const merge = opts?.merge ?? false;
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1850
|
+
const entityIds = Object.keys(dump.entities);
|
|
1851
|
+
for (const entityId of entityIds) {
|
|
1852
|
+
if (this.activeMaintenanceJobs.has(this._importKey(entityId))) {
|
|
1853
|
+
throw new WikiBusyError("import", entityId);
|
|
1854
|
+
}
|
|
1855
|
+
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
1856
|
+
throw new WikiBusyError("librarian", entityId);
|
|
1857
|
+
}
|
|
1858
|
+
if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
1859
|
+
throw new WikiBusyError("heal", entityId);
|
|
1860
|
+
}
|
|
1861
|
+
if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
1862
|
+
throw new WikiBusyError("prune", entityId);
|
|
1863
|
+
}
|
|
1864
|
+
if (this._isReembedActive(entityId)) {
|
|
1865
|
+
throw new WikiBusyError("reembed", entityId);
|
|
1866
|
+
}
|
|
1867
|
+
if (this._isIngestActiveFor(entityId)) {
|
|
1868
|
+
throw new WikiBusyError("ingest", entityId);
|
|
1869
|
+
}
|
|
1870
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
1871
|
+
throw new WikiBusyError("forget", entityId);
|
|
1872
|
+
}
|
|
1873
|
+
}
|
|
1874
|
+
if (this.activeMaintenanceJobs.has(this._globalImportKey())) {
|
|
1875
|
+
throw new WikiBusyError("import", "*");
|
|
1876
|
+
}
|
|
1877
|
+
this.activeMaintenanceJobs.add(this._globalImportKey());
|
|
1878
|
+
for (const entityId of entityIds) {
|
|
1879
|
+
this.activeMaintenanceJobs.add(this._importKey(entityId));
|
|
1880
|
+
}
|
|
1881
|
+
try {
|
|
1882
|
+
for (const [entityId, bundle] of Object.entries(dump.entities)) {
|
|
1883
|
+
await this._doImportEntity(entityId, bundle, merge);
|
|
1884
|
+
}
|
|
1885
|
+
} finally {
|
|
1886
|
+
this.activeMaintenanceJobs.delete(this._globalImportKey());
|
|
1887
|
+
for (const entityId of entityIds) {
|
|
1888
|
+
this.activeMaintenanceJobs.delete(this._importKey(entityId));
|
|
1889
|
+
}
|
|
1890
|
+
}
|
|
1891
|
+
}
|
|
1892
|
+
async _doImportEntity(entityId, bundle, merge) {
|
|
1893
|
+
const upsertedFactIds = /* @__PURE__ */ new Set();
|
|
1894
|
+
const upsertedDeletedFactIds = /* @__PURE__ */ new Set();
|
|
1895
|
+
const factsWithPreservedBlob = /* @__PURE__ */ new Map();
|
|
1896
|
+
const preservedBlobDims = /* @__PURE__ */ new Set();
|
|
1897
|
+
const softDeletedFactIds = [];
|
|
1898
|
+
await this.db.withTransactionAsync(async () => {
|
|
1899
|
+
if (!merge) {
|
|
1900
|
+
const toDelete = await this.db.getAllAsync(
|
|
1901
|
+
`SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1902
|
+
[entityId]
|
|
1903
|
+
);
|
|
1904
|
+
softDeletedFactIds.push(...toDelete.map((r) => r.id));
|
|
1905
|
+
const now = Date.now();
|
|
1906
|
+
await this.db.runAsync(
|
|
1907
|
+
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1908
|
+
[now, now, entityId]
|
|
1909
|
+
);
|
|
1910
|
+
await this.db.runAsync(
|
|
1911
|
+
`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1912
|
+
[now, now, entityId]
|
|
1913
|
+
);
|
|
1914
|
+
await this.db.runAsync(
|
|
1915
|
+
`DELETE FROM ${this.prefix}checkpoints WHERE entity_id = ?`,
|
|
1916
|
+
[entityId]
|
|
1917
|
+
);
|
|
1918
|
+
}
|
|
1919
|
+
const factIds = bundle.facts.map((fact) => fact.id);
|
|
1920
|
+
const existingFactsById = /* @__PURE__ */ new Map();
|
|
1921
|
+
const factLookupChunkSize = 500;
|
|
1922
|
+
for (let i = 0; i < factIds.length; i += factLookupChunkSize) {
|
|
1923
|
+
const factIdChunk = factIds.slice(i, i + factLookupChunkSize);
|
|
1924
|
+
if (factIdChunk.length === 0) continue;
|
|
1925
|
+
const placeholders = factIdChunk.map(() => "?").join(", ");
|
|
1926
|
+
const existingFacts = await this.db.getAllAsync(
|
|
1927
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}entries WHERE id IN (${placeholders})`,
|
|
1928
|
+
factIdChunk
|
|
1929
|
+
);
|
|
1930
|
+
for (const existingFact of existingFacts) {
|
|
1931
|
+
existingFactsById.set(existingFact.id, existingFact);
|
|
1240
1932
|
}
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
const
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1933
|
+
}
|
|
1934
|
+
for (const fact of bundle.facts) {
|
|
1935
|
+
const tagsJson = JSON.stringify(Array.isArray(fact.tags) ? fact.tags : []);
|
|
1936
|
+
const safeUpdatedAt = Number.isFinite(fact.updated_at) ? fact.updated_at : 0;
|
|
1937
|
+
const existing = existingFactsById.get(fact.id);
|
|
1938
|
+
const rawBlobRaw = fact.embedding_blob;
|
|
1939
|
+
let rawBlob = null;
|
|
1940
|
+
if (rawBlobRaw instanceof Uint8Array) {
|
|
1941
|
+
rawBlob = rawBlobRaw;
|
|
1942
|
+
} else if (rawBlobRaw !== null && rawBlobRaw !== void 0 && typeof rawBlobRaw === "object") {
|
|
1943
|
+
const obj = rawBlobRaw;
|
|
1944
|
+
if (obj["type"] === "Buffer" && Array.isArray(obj["data"])) {
|
|
1945
|
+
rawBlob = new Uint8Array(obj["data"]);
|
|
1946
|
+
} else if (!Array.isArray(rawBlobRaw)) {
|
|
1947
|
+
const entries = Object.keys(obj);
|
|
1948
|
+
if (entries.length > 0 && entries.every((k) => /^\d+$/.test(k))) {
|
|
1949
|
+
const len = entries.length;
|
|
1950
|
+
rawBlob = new Uint8Array(len);
|
|
1951
|
+
for (let i = 0; i < len; i++) rawBlob[i] = obj[String(i)] ?? 0;
|
|
1952
|
+
}
|
|
1254
1953
|
}
|
|
1255
1954
|
}
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
const
|
|
1259
|
-
const
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1955
|
+
let blobData = null;
|
|
1956
|
+
if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
|
|
1957
|
+
const copy = new ArrayBuffer(rawBlob.byteLength);
|
|
1958
|
+
const alignedBlob = new Uint8Array(copy);
|
|
1959
|
+
alignedBlob.set(rawBlob);
|
|
1960
|
+
const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
|
|
1961
|
+
let allFinite = true;
|
|
1962
|
+
for (let i = 0; i < floats.length; i++) {
|
|
1963
|
+
if (!isFinite(floats[i])) {
|
|
1964
|
+
allFinite = false;
|
|
1965
|
+
break;
|
|
1267
1966
|
}
|
|
1967
|
+
}
|
|
1968
|
+
if (allFinite) {
|
|
1969
|
+
blobData = alignedBlob;
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
if (existing) {
|
|
1973
|
+
if (existing.entity_id !== entityId) {
|
|
1974
|
+
this._warnCrossEntityCollision("entry", fact.id, existing.entity_id, entityId);
|
|
1975
|
+
continue;
|
|
1976
|
+
}
|
|
1977
|
+
if (merge) {
|
|
1978
|
+
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
1979
|
+
}
|
|
1980
|
+
if (blobData != null) {
|
|
1981
|
+
await this.db.runAsync(
|
|
1982
|
+
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
1983
|
+
[entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData, fact.id]
|
|
1984
|
+
);
|
|
1985
|
+
factsWithPreservedBlob.set(fact.id, blobData);
|
|
1986
|
+
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
1987
|
+
} else {
|
|
1268
1988
|
await this.db.runAsync(
|
|
1269
|
-
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at =
|
|
1989
|
+
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = NULL, embedding = NULL WHERE id = ?`,
|
|
1270
1990
|
[entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, fact.id]
|
|
1271
1991
|
);
|
|
1272
|
-
|
|
1992
|
+
}
|
|
1993
|
+
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1994
|
+
upsertedFactIds.add(fact.id);
|
|
1995
|
+
if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
|
|
1996
|
+
} else {
|
|
1997
|
+
if (blobData != null) {
|
|
1998
|
+
await this.db.runAsync(
|
|
1999
|
+
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at, embedding_blob) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
2000
|
+
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData]
|
|
2001
|
+
);
|
|
2002
|
+
factsWithPreservedBlob.set(fact.id, blobData);
|
|
2003
|
+
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
1273
2004
|
} else {
|
|
1274
2005
|
await this.db.runAsync(
|
|
1275
2006
|
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1276
2007
|
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at]
|
|
1277
2008
|
);
|
|
1278
|
-
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1279
2009
|
}
|
|
2010
|
+
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
2011
|
+
upsertedFactIds.add(fact.id);
|
|
2012
|
+
if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
|
|
1280
2013
|
}
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
2014
|
+
}
|
|
2015
|
+
const taskIds = bundle.tasks.map((task) => task.id);
|
|
2016
|
+
const existingTasksById = /* @__PURE__ */ new Map();
|
|
2017
|
+
const taskLookupChunkSize = 500;
|
|
2018
|
+
for (let i = 0; i < taskIds.length; i += taskLookupChunkSize) {
|
|
2019
|
+
const taskIdChunk = taskIds.slice(i, i + taskLookupChunkSize);
|
|
2020
|
+
if (taskIdChunk.length === 0) continue;
|
|
2021
|
+
const placeholders = taskIdChunk.map(() => "?").join(", ");
|
|
2022
|
+
const existingTasks = await this.db.getAllAsync(
|
|
2023
|
+
`SELECT id, entity_id, updated_at FROM ${this.prefix}tasks WHERE id IN (${placeholders})`,
|
|
2024
|
+
taskIdChunk
|
|
2025
|
+
);
|
|
2026
|
+
for (const existingTask of existingTasks) {
|
|
2027
|
+
existingTasksById.set(existingTask.id, existingTask);
|
|
2028
|
+
}
|
|
2029
|
+
}
|
|
2030
|
+
for (const task of bundle.tasks) {
|
|
2031
|
+
const safeUpdatedAt = Number.isFinite(task.updated_at) ? task.updated_at : 0;
|
|
2032
|
+
const existing = existingTasksById.get(task.id);
|
|
2033
|
+
if (existing) {
|
|
2034
|
+
if (existing.entity_id !== entityId) {
|
|
2035
|
+
this._warnCrossEntityCollision("task", task.id, existing.entity_id, entityId);
|
|
2036
|
+
continue;
|
|
2037
|
+
}
|
|
2038
|
+
if (merge) {
|
|
2039
|
+
if (safeUpdatedAt <= existing.updated_at) continue;
|
|
1294
2040
|
}
|
|
2041
|
+
await this.db.runAsync(
|
|
2042
|
+
`UPDATE ${this.prefix}tasks SET entity_id = ?, description = ?, status = ?, priority = ?, created_at = ?, updated_at = ?, resolved_at = ?, deleted_at = ? WHERE id = ?`,
|
|
2043
|
+
[entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at, task.id]
|
|
2044
|
+
);
|
|
2045
|
+
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
2046
|
+
} else {
|
|
2047
|
+
await this.db.runAsync(
|
|
2048
|
+
`INSERT INTO ${this.prefix}tasks (id, entity_id, description, status, priority, created_at, updated_at, resolved_at, deleted_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
2049
|
+
[task.id, entityId, task.description, task.status, task.priority, task.created_at, safeUpdatedAt, task.resolved_at, task.deleted_at]
|
|
2050
|
+
);
|
|
2051
|
+
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1295
2052
|
}
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
2053
|
+
}
|
|
2054
|
+
for (const event of bundle.events) {
|
|
2055
|
+
await this.db.runAsync(
|
|
2056
|
+
`INSERT OR IGNORE INTO ${this.prefix}events (id, entity_id, event_type, summary, related_entry_id, created_at)
|
|
2057
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
2058
|
+
[event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
|
|
2059
|
+
);
|
|
2060
|
+
}
|
|
2061
|
+
});
|
|
2062
|
+
this.vectorCache.delete(entityId);
|
|
2063
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
2064
|
+
for (const fact of bundle.facts) {
|
|
2065
|
+
if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
|
|
2066
|
+
await this.embedFact({
|
|
2067
|
+
id: fact.id,
|
|
2068
|
+
entity_id: entityId,
|
|
2069
|
+
// Use authoritative entityId from dump key, not fact.entity_id
|
|
2070
|
+
title: fact.title,
|
|
2071
|
+
body: fact.body,
|
|
2072
|
+
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
2073
|
+
});
|
|
2074
|
+
}
|
|
2075
|
+
}
|
|
2076
|
+
for (const fact of bundle.facts) {
|
|
2077
|
+
const blobData = factsWithPreservedBlob.get(fact.id);
|
|
2078
|
+
if (blobData && !fact.deleted_at && upsertedFactIds.has(fact.id)) {
|
|
2079
|
+
try {
|
|
2080
|
+
const float32Vector = new Float32Array(blobData.buffer, blobData.byteOffset, blobData.byteLength / 4);
|
|
2081
|
+
await this._notifyEmbeddingPersisted(entityId, fact.id, float32Vector);
|
|
2082
|
+
} catch (hookErr) {
|
|
2083
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for preserved-blob fact ${fact.id}:`, hookErr);
|
|
2084
|
+
}
|
|
2085
|
+
}
|
|
2086
|
+
}
|
|
2087
|
+
for (const factId of softDeletedFactIds) {
|
|
2088
|
+
if (!upsertedFactIds.has(factId) || upsertedDeletedFactIds.has(factId)) {
|
|
2089
|
+
try {
|
|
2090
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
2091
|
+
} catch (hookErr) {
|
|
2092
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted(vector=null) hook failed for soft-deleted fact ${factId}:`, hookErr);
|
|
2093
|
+
}
|
|
2094
|
+
}
|
|
2095
|
+
}
|
|
2096
|
+
try {
|
|
2097
|
+
const canonicalRow = await this.db.getFirstAsync(
|
|
2098
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
|
|
2099
|
+
);
|
|
2100
|
+
const canonicalDim = canonicalRow ? parseInt(canonicalRow.value, 10) : null;
|
|
2101
|
+
if (preservedBlobDims.size === 1) {
|
|
2102
|
+
const preservedDim = [...preservedBlobDims][0];
|
|
2103
|
+
if (canonicalDim === null || canonicalDim === preservedDim) {
|
|
2104
|
+
await this.storeEmbeddingDimension(preservedDim);
|
|
2105
|
+
const staleMismatch = await this.db.getFirstAsync(
|
|
2106
|
+
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension_mismatch'`
|
|
2107
|
+
);
|
|
2108
|
+
if (staleMismatch && parseInt(staleMismatch.value, 10) !== preservedDim) {
|
|
1313
2109
|
await this.db.runAsync(
|
|
1314
|
-
`INSERT INTO ${this.prefix}
|
|
1315
|
-
[
|
|
2110
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
2111
|
+
[String(preservedDim)]
|
|
1316
2112
|
);
|
|
1317
|
-
existingTasksById.set(task.id, { id: task.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1318
2113
|
}
|
|
1319
|
-
|
|
1320
|
-
|
|
2114
|
+
await this._reconcileEmbeddingDimension();
|
|
2115
|
+
} else {
|
|
1321
2116
|
await this.db.runAsync(
|
|
1322
|
-
`INSERT OR
|
|
1323
|
-
|
|
1324
|
-
[event.id, entityId, event.event_type, event.summary, event.related_entry_id ?? null, event.created_at]
|
|
2117
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
2118
|
+
[String(canonicalDim)]
|
|
1325
2119
|
);
|
|
1326
2120
|
}
|
|
1327
|
-
})
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
await this.
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
2121
|
+
} else if (preservedBlobDims.size > 1) {
|
|
2122
|
+
if (canonicalDim === null) {
|
|
2123
|
+
const sortedPreservedBlobDims = [...preservedBlobDims].sort((a, b) => a - b);
|
|
2124
|
+
await this.storeEmbeddingDimension(sortedPreservedBlobDims[0]);
|
|
2125
|
+
await this.db.runAsync(
|
|
2126
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
2127
|
+
[String(sortedPreservedBlobDims[0])]
|
|
2128
|
+
);
|
|
2129
|
+
} else {
|
|
2130
|
+
await this.db.runAsync(
|
|
2131
|
+
`INSERT OR REPLACE INTO ${this.prefix}meta (key, value) VALUES ('embedding_dimension_mismatch', ?)`,
|
|
2132
|
+
[String(canonicalDim)]
|
|
2133
|
+
);
|
|
1336
2134
|
}
|
|
1337
2135
|
}
|
|
2136
|
+
} finally {
|
|
2137
|
+
this.vectorCache.delete(entityId);
|
|
1338
2138
|
}
|
|
1339
|
-
await this.rebuildMiniSearchIndex();
|
|
1340
2139
|
}
|
|
1341
2140
|
async forget(entityId, params) {
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
if (
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
} else {
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
const
|
|
1365
|
-
let
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
2141
|
+
let blockingOperation = null;
|
|
2142
|
+
if (this.activeMaintenanceJobs.has(this._librarianKey(entityId))) {
|
|
2143
|
+
blockingOperation = "librarian";
|
|
2144
|
+
} else if (this.activeMaintenanceJobs.has(this._healKey(entityId))) {
|
|
2145
|
+
blockingOperation = "heal";
|
|
2146
|
+
} else if (this.activeMaintenanceJobs.has(this._pruneKey(entityId))) {
|
|
2147
|
+
blockingOperation = "prune";
|
|
2148
|
+
} else if (this._isReembedActive(entityId)) {
|
|
2149
|
+
blockingOperation = "reembed";
|
|
2150
|
+
} else if (this._isIngestActiveFor(entityId)) {
|
|
2151
|
+
blockingOperation = "ingest";
|
|
2152
|
+
} else if (this._isImportActiveFor(entityId)) {
|
|
2153
|
+
blockingOperation = "import";
|
|
2154
|
+
} else if (this._isForgetActiveFor(entityId)) {
|
|
2155
|
+
blockingOperation = "forget";
|
|
2156
|
+
}
|
|
2157
|
+
if (blockingOperation !== null) {
|
|
2158
|
+
throw new WikiBusyError(blockingOperation, entityId);
|
|
2159
|
+
}
|
|
2160
|
+
const forgetKey = this._forgetKey(entityId);
|
|
2161
|
+
this.activeMaintenanceJobs.add(forgetKey);
|
|
2162
|
+
try {
|
|
2163
|
+
const now = Date.now();
|
|
2164
|
+
let deletedEntries = 0;
|
|
2165
|
+
let deletedTasks = 0;
|
|
2166
|
+
const deletedEntryIds = [];
|
|
2167
|
+
if (params.clearAll) {
|
|
2168
|
+
const entriesToDelete = await this.db.getAllAsync(
|
|
2169
|
+
`SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
2170
|
+
[entityId]
|
|
2171
|
+
);
|
|
2172
|
+
deletedEntryIds.push(...entriesToDelete.map((e) => e.id));
|
|
2173
|
+
const [entriesRes, tasksRes] = await Promise.all([
|
|
2174
|
+
this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId]),
|
|
2175
|
+
this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId])
|
|
2176
|
+
]);
|
|
2177
|
+
await this.db.runAsync(`UPDATE ${this.prefix}checkpoints SET memory_checkpoint = 0, heal_checkpoint = 0 WHERE entity_id = ?`, [entityId]);
|
|
2178
|
+
deletedEntries = entriesRes.changes;
|
|
2179
|
+
deletedTasks = tasksRes.changes;
|
|
2180
|
+
} else {
|
|
2181
|
+
const hasIdSelectors = params.entryId !== void 0 || params.taskId !== void 0;
|
|
2182
|
+
const hasSourceSelectors = params.sourceRef !== void 0 || params.sourceHash !== void 0;
|
|
2183
|
+
if (hasIdSelectors && hasSourceSelectors) {
|
|
2184
|
+
throw new Error("forget() params are mutually exclusive: use entryId/taskId together, or sourceRef/sourceHash together, but not both in the same call");
|
|
1372
2185
|
}
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
2186
|
+
const sourceRef = params.sourceRef !== void 0 ? normalizeSourceRef(params.sourceRef) : null;
|
|
2187
|
+
if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
|
|
2188
|
+
const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
|
|
2189
|
+
if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
2190
|
+
if (params.entryId) {
|
|
2191
|
+
const entry = await this.db.getFirstAsync(
|
|
2192
|
+
`SELECT id FROM ${this.prefix}entries WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
2193
|
+
[params.entryId, entityId]
|
|
2194
|
+
);
|
|
2195
|
+
if (entry) deletedEntryIds.push(entry.id);
|
|
2196
|
+
}
|
|
2197
|
+
if (sourceRef || sourceHash) {
|
|
2198
|
+
let q = `SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`;
|
|
2199
|
+
const args = [entityId];
|
|
2200
|
+
if (sourceRef) {
|
|
2201
|
+
q += ` AND source_ref = ?`;
|
|
2202
|
+
args.push(sourceRef);
|
|
2203
|
+
}
|
|
2204
|
+
if (sourceHash) {
|
|
2205
|
+
q += ` AND source_hash = ?`;
|
|
2206
|
+
args.push(sourceHash);
|
|
2207
|
+
}
|
|
2208
|
+
const entriesToDelete = await this.db.getAllAsync(q, args);
|
|
2209
|
+
deletedEntryIds.push(...entriesToDelete.map((e) => e.id));
|
|
2210
|
+
}
|
|
2211
|
+
const entryPromise = params.entryId ? this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.entryId, entityId]) : null;
|
|
2212
|
+
const taskPromise = params.taskId ? this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.taskId, entityId]) : null;
|
|
2213
|
+
let refPromise = null;
|
|
2214
|
+
if (sourceRef || sourceHash) {
|
|
2215
|
+
let q = `UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`;
|
|
2216
|
+
const args = [now, now, entityId];
|
|
2217
|
+
if (sourceRef) {
|
|
2218
|
+
q += ` AND source_ref = ?`;
|
|
2219
|
+
args.push(sourceRef);
|
|
2220
|
+
}
|
|
2221
|
+
if (sourceHash) {
|
|
2222
|
+
q += ` AND source_hash = ?`;
|
|
2223
|
+
args.push(sourceHash);
|
|
2224
|
+
}
|
|
2225
|
+
refPromise = this.db.runAsync(q, args);
|
|
1376
2226
|
}
|
|
1377
|
-
|
|
2227
|
+
const [entryResult, taskResult, refResult] = await Promise.all([
|
|
2228
|
+
entryPromise ?? Promise.resolve(null),
|
|
2229
|
+
taskPromise ?? Promise.resolve(null),
|
|
2230
|
+
refPromise ?? Promise.resolve(null)
|
|
2231
|
+
]);
|
|
2232
|
+
if (entryResult) deletedEntries += entryResult.changes;
|
|
2233
|
+
if (taskResult) deletedTasks += taskResult.changes;
|
|
2234
|
+
if (refResult) deletedEntries += refResult.changes;
|
|
1378
2235
|
}
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
2236
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
2237
|
+
this.vectorCache.delete(entityId);
|
|
2238
|
+
const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
|
|
2239
|
+
for (const factId of uniqueDeletedIds) {
|
|
2240
|
+
try {
|
|
2241
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
2242
|
+
} catch (hookErr) {
|
|
2243
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during forget for ${factId}:`, hookErr);
|
|
2244
|
+
}
|
|
2245
|
+
}
|
|
2246
|
+
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
2247
|
+
} finally {
|
|
2248
|
+
this.activeMaintenanceJobs.delete(forgetKey);
|
|
1387
2249
|
}
|
|
1388
|
-
await this.rebuildMiniSearchIndex(entityId);
|
|
1389
|
-
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
1390
2250
|
}
|
|
1391
2251
|
async ingestDocument(entityId, params) {
|
|
1392
2252
|
const sourceRef = normalizeSourceRef(params.sourceRef);
|
|
@@ -1414,6 +2274,12 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1414
2274
|
if (this._isReembedActive(entityId)) {
|
|
1415
2275
|
throw new WikiBusyError("reembed", entityId);
|
|
1416
2276
|
}
|
|
2277
|
+
if (this._isImportActiveFor(entityId)) {
|
|
2278
|
+
throw new WikiBusyError("import", entityId);
|
|
2279
|
+
}
|
|
2280
|
+
if (this._isForgetActiveFor(entityId)) {
|
|
2281
|
+
throw new WikiBusyError("forget", entityId);
|
|
2282
|
+
}
|
|
1417
2283
|
this.activeIngestJobs.add(jobKey);
|
|
1418
2284
|
try {
|
|
1419
2285
|
const { chunks, truncated } = chunkText(params.documentChunk, maxChunkLength, chunkOverlap);
|
|
@@ -1446,7 +2312,15 @@ ${chunk}`;
|
|
|
1446
2312
|
}
|
|
1447
2313
|
const now = Date.now();
|
|
1448
2314
|
const insertedFacts = [];
|
|
2315
|
+
const deletedSourceFactIds = [];
|
|
1449
2316
|
await this.db.withTransactionAsync(async () => {
|
|
2317
|
+
const existingSourceFacts = await this.db.getAllAsync(
|
|
2318
|
+
`SELECT id FROM ${this.prefix}entries WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
2319
|
+
[sourceRef, entityId]
|
|
2320
|
+
);
|
|
2321
|
+
for (const row of existingSourceFacts) {
|
|
2322
|
+
deletedSourceFactIds.push(row.id);
|
|
2323
|
+
}
|
|
1450
2324
|
await this.db.runAsync(
|
|
1451
2325
|
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
1452
2326
|
[now, now, sourceRef, entityId]
|
|
@@ -1458,19 +2332,42 @@ ${chunk}`;
|
|
|
1458
2332
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1459
2333
|
[id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]
|
|
1460
2334
|
);
|
|
1461
|
-
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2335
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1462
2336
|
}
|
|
1463
2337
|
});
|
|
2338
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
2339
|
+
this.vectorCache.delete(entityId);
|
|
2340
|
+
const uniqueDeletedSourceFactIds = Array.from(new Set(deletedSourceFactIds));
|
|
2341
|
+
for (const factId of uniqueDeletedSourceFactIds) {
|
|
2342
|
+
try {
|
|
2343
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
2344
|
+
} catch (hookErr) {
|
|
2345
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during ingest for ${factId}:`, hookErr);
|
|
2346
|
+
}
|
|
2347
|
+
}
|
|
1464
2348
|
for (const fact of insertedFacts) {
|
|
1465
2349
|
await this.embedFact(fact);
|
|
1466
2350
|
}
|
|
1467
|
-
|
|
2351
|
+
this.vectorCache.delete(entityId);
|
|
1468
2352
|
return { truncated, chunks: chunks.length };
|
|
1469
2353
|
} finally {
|
|
1470
2354
|
this.activeIngestJobs.delete(jobKey);
|
|
1471
2355
|
}
|
|
1472
2356
|
}
|
|
1473
2357
|
};
|
|
2358
|
+
/**
|
|
2359
|
+
* Maximum number of entities whose parsed embedding vectors are held in
|
|
2360
|
+
* memory. This cap is intentionally conservative so the cache remains safe
|
|
2361
|
+
* on memory-constrained runtimes (e.g., mobile/Expo).
|
|
2362
|
+
*/
|
|
2363
|
+
_WikiMemory.MAX_VECTOR_CACHE_ENTITIES = 16;
|
|
2364
|
+
/**
|
|
2365
|
+
* Maximum number of fact vectors cached per entity. Keep this high enough to
|
|
2366
|
+
* preserve the parsed-embedding reuse optimization for common mid-sized
|
|
2367
|
+
* entities while still maintaining a bounded memory footprint.
|
|
2368
|
+
*/
|
|
2369
|
+
_WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY = 500;
|
|
2370
|
+
var WikiMemory = _WikiMemory;
|
|
1474
2371
|
|
|
1475
2372
|
// src/utils/formatContext.ts
|
|
1476
2373
|
function validateMaxOption(value, name) {
|
|
@@ -1666,8 +2563,23 @@ function formatMemoryDump(dump) {
|
|
|
1666
2563
|
name: formatEntityFileName(entityId),
|
|
1667
2564
|
content: renderEntity(entityId, bundle, dump.generatedAt)
|
|
1668
2565
|
}));
|
|
2566
|
+
const manifestDump = {
|
|
2567
|
+
generatedAt: dump.generatedAt,
|
|
2568
|
+
entities: Object.fromEntries(
|
|
2569
|
+
Object.entries(dump.entities).map(([entityId, bundle]) => [
|
|
2570
|
+
entityId,
|
|
2571
|
+
{
|
|
2572
|
+
...bundle,
|
|
2573
|
+
facts: bundle.facts.map((f) => {
|
|
2574
|
+
const { embedding_blob: _blob, ...rest } = f;
|
|
2575
|
+
return rest;
|
|
2576
|
+
})
|
|
2577
|
+
}
|
|
2578
|
+
])
|
|
2579
|
+
)
|
|
2580
|
+
};
|
|
1669
2581
|
return {
|
|
1670
|
-
manifest: JSON.stringify(
|
|
2582
|
+
manifest: JSON.stringify(manifestDump, null, 2),
|
|
1671
2583
|
files
|
|
1672
2584
|
};
|
|
1673
2585
|
}
|