@equationalapplications/core-llm-wiki 3.0.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +192 -1
- package/dist/index.d.mts +164 -7
- package/dist/index.d.ts +164 -7
- package/dist/index.js +664 -86
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +664 -87
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -132,6 +132,18 @@ var WikiBusyError = class extends Error {
|
|
|
132
132
|
this.entityId = entityId;
|
|
133
133
|
}
|
|
134
134
|
};
|
|
135
|
+
var PrunePartialFailureError = class extends Error {
|
|
136
|
+
constructor(deleted, failedAt, remaining, cause, deletedTasks = 0, deletedEvents = 0) {
|
|
137
|
+
super(`Prune partially failed: deleted ${deleted}, failed at ${failedAt}, ${remaining} remaining`);
|
|
138
|
+
this.name = "PrunePartialFailureError";
|
|
139
|
+
this.deleted = deleted;
|
|
140
|
+
this.failedAt = failedAt;
|
|
141
|
+
this.remaining = remaining;
|
|
142
|
+
this.deletedTasks = deletedTasks;
|
|
143
|
+
this.deletedEvents = deletedEvents;
|
|
144
|
+
this.cause = cause;
|
|
145
|
+
}
|
|
146
|
+
};
|
|
135
147
|
|
|
136
148
|
// src/prompts.ts
|
|
137
149
|
var LIBRARIAN_SYSTEM_PROMPT = `You are a knowledge extraction agent. Your job is to analyze recent episodic events and extract stable facts and actionable tasks about the user or entity.
|
|
@@ -198,6 +210,7 @@ function parseEmbedding(blob, text) {
|
|
|
198
210
|
}
|
|
199
211
|
|
|
200
212
|
// src/WikiMemory.ts
|
|
213
|
+
var HOOK_TIMEOUT_MARKER = /* @__PURE__ */ Symbol("WikiMemoryHookTimeout");
|
|
201
214
|
function parseJsonResponse(text) {
|
|
202
215
|
const firstBrace = text.indexOf("{");
|
|
203
216
|
const firstBracket = text.indexOf("[");
|
|
@@ -560,6 +573,11 @@ var _WikiMemory = class _WikiMemory {
|
|
|
560
573
|
`UPDATE ${this.prefix}entries SET embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
561
574
|
[blob, fact.id]
|
|
562
575
|
);
|
|
576
|
+
try {
|
|
577
|
+
await this._notifyEmbeddingPersisted(fact.entity_id, fact.id, float32Vector);
|
|
578
|
+
} catch (hookErr) {
|
|
579
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for ${fact.id}:`, hookErr);
|
|
580
|
+
}
|
|
563
581
|
return true;
|
|
564
582
|
} catch (err) {
|
|
565
583
|
console.warn(`[WikiMemory] embedFact failed for ${fact.id}:`, err);
|
|
@@ -575,6 +593,57 @@ var _WikiMemory = class _WikiMemory {
|
|
|
575
593
|
_warnCrossEntityCollision(type, id, existingEntityId, targetEntityId) {
|
|
576
594
|
console.warn(`[WikiMemory] importDump: ${type} id "${id}" already belongs to entity "${existingEntityId}"; skipping for entity "${targetEntityId}"`);
|
|
577
595
|
}
|
|
596
|
+
async _notifyEmbeddingPersisted(entityId, factId, vector) {
|
|
597
|
+
if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
|
|
598
|
+
const vectorCopy = vector ? vector.slice() : null;
|
|
599
|
+
await this.options.vectorRanker.onEmbeddingPersisted({
|
|
600
|
+
entityId,
|
|
601
|
+
factId,
|
|
602
|
+
vector: vectorCopy
|
|
603
|
+
});
|
|
604
|
+
}
|
|
605
|
+
/**
|
|
606
|
+
* GDPR-critical variant: awaits the hook with a timeout and rethrows failures.
|
|
607
|
+
* Use ONLY on deletion paths. forget() calls after soft-delete UPDATE; runPrune()
|
|
608
|
+
* calls before hard DELETE. For best-effort sync, use _notifyEmbeddingPersisted.
|
|
609
|
+
*/
|
|
610
|
+
async _notifyEmbeddingPersistedOrThrow(entityId, factId, vector) {
|
|
611
|
+
if (!this.options.vectorRanker?.onEmbeddingPersisted) return;
|
|
612
|
+
if (this.options.forceDeleteIgnoreRankerHook === true) return;
|
|
613
|
+
const vectorCopy = vector ? vector.slice() : null;
|
|
614
|
+
const rawTimeout = this.options.deletionHookTimeoutMs ?? 3e4;
|
|
615
|
+
if (typeof rawTimeout !== "number" || !Number.isFinite(rawTimeout) || rawTimeout <= 0) {
|
|
616
|
+
throw new Error("Invalid deletionHookTimeoutMs: must be a positive finite number");
|
|
617
|
+
}
|
|
618
|
+
const timeoutMs = rawTimeout;
|
|
619
|
+
let timeoutHandle;
|
|
620
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
621
|
+
timeoutHandle = setTimeout(
|
|
622
|
+
() => {
|
|
623
|
+
const timeoutError = new Error(`onEmbeddingPersisted timed out after ${timeoutMs}ms`);
|
|
624
|
+
timeoutError[HOOK_TIMEOUT_MARKER] = true;
|
|
625
|
+
reject(timeoutError);
|
|
626
|
+
},
|
|
627
|
+
timeoutMs
|
|
628
|
+
);
|
|
629
|
+
});
|
|
630
|
+
const hookPromise = Promise.resolve(
|
|
631
|
+
this.options.vectorRanker.onEmbeddingPersisted({
|
|
632
|
+
entityId,
|
|
633
|
+
factId,
|
|
634
|
+
vector: vectorCopy
|
|
635
|
+
})
|
|
636
|
+
);
|
|
637
|
+
try {
|
|
638
|
+
await Promise.race([hookPromise, timeoutPromise]);
|
|
639
|
+
} catch (err) {
|
|
640
|
+
hookPromise.catch(() => {
|
|
641
|
+
});
|
|
642
|
+
throw err;
|
|
643
|
+
} finally {
|
|
644
|
+
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
645
|
+
}
|
|
646
|
+
}
|
|
578
647
|
async setup() {
|
|
579
648
|
const entriesExistedBeforeSetup = await this.db.getFirstAsync(
|
|
580
649
|
`SELECT name FROM sqlite_master WHERE type='table' AND name=?`,
|
|
@@ -759,18 +828,69 @@ var _WikiMemory = class _WikiMemory {
|
|
|
759
828
|
let deletedEvents = 0;
|
|
760
829
|
if (retainSoftDeletedFor !== null) {
|
|
761
830
|
const cutoff = now - retainSoftDeletedFor * 864e5;
|
|
762
|
-
const
|
|
763
|
-
`
|
|
831
|
+
const entriesToDelete = await this.db.getAllAsync(
|
|
832
|
+
`SELECT id, entity_id FROM ${this.prefix}entries
|
|
764
833
|
WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at < ?`,
|
|
765
834
|
[entityId, cutoff]
|
|
766
835
|
);
|
|
767
|
-
|
|
836
|
+
const succeeded = [];
|
|
837
|
+
let failure = null;
|
|
838
|
+
for (const row of entriesToDelete) {
|
|
839
|
+
try {
|
|
840
|
+
await this._notifyEmbeddingPersistedOrThrow(row.entity_id, row.id, null);
|
|
841
|
+
succeeded.push({ entity_id: row.entity_id, id: row.id });
|
|
842
|
+
} catch (err) {
|
|
843
|
+
failure = { factId: row.id, cause: err };
|
|
844
|
+
break;
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
if (succeeded.length > 0) {
|
|
848
|
+
const chunkSize = 500;
|
|
849
|
+
for (let i = 0; i < succeeded.length; i += chunkSize) {
|
|
850
|
+
const chunk = succeeded.slice(i, i + chunkSize);
|
|
851
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
852
|
+
const entryResult = await this.db.runAsync(
|
|
853
|
+
`DELETE FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at < ? AND id IN (${placeholders})`,
|
|
854
|
+
[entityId, cutoff, ...chunk.map((r) => r.id)]
|
|
855
|
+
);
|
|
856
|
+
deletedEntries += entryResult.changes;
|
|
857
|
+
}
|
|
858
|
+
}
|
|
768
859
|
const taskResult = await this.db.runAsync(
|
|
769
860
|
`DELETE FROM ${this.prefix}tasks
|
|
770
861
|
WHERE entity_id = ? AND deleted_at IS NOT NULL AND deleted_at < ?`,
|
|
771
862
|
[entityId, cutoff]
|
|
772
863
|
);
|
|
773
864
|
deletedTasks = taskResult.changes;
|
|
865
|
+
if (failure) {
|
|
866
|
+
await this.rebuildMiniSearchIndex(entityId);
|
|
867
|
+
this.vectorCache.delete(entityId);
|
|
868
|
+
const remaining = entriesToDelete.length - succeeded.length - 1;
|
|
869
|
+
const isTimeout = failure.cause?.[HOOK_TIMEOUT_MARKER] === true;
|
|
870
|
+
if (isTimeout) {
|
|
871
|
+
throw new PrunePartialFailureError(
|
|
872
|
+
succeeded.length,
|
|
873
|
+
failure.factId,
|
|
874
|
+
remaining,
|
|
875
|
+
new Error("Deletion hook timed out"),
|
|
876
|
+
deletedTasks,
|
|
877
|
+
0
|
|
878
|
+
// events not yet deleted at this point
|
|
879
|
+
);
|
|
880
|
+
}
|
|
881
|
+
const errMsg = failure.cause?.message ?? "";
|
|
882
|
+
const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
|
|
883
|
+
const sanitizedCause = isValidationError ? failure.cause : this._sanitizeRankerError(failure.cause);
|
|
884
|
+
throw new PrunePartialFailureError(
|
|
885
|
+
succeeded.length,
|
|
886
|
+
failure.factId,
|
|
887
|
+
remaining,
|
|
888
|
+
sanitizedCause,
|
|
889
|
+
deletedTasks,
|
|
890
|
+
0
|
|
891
|
+
// events not yet deleted at this point
|
|
892
|
+
);
|
|
893
|
+
}
|
|
774
894
|
}
|
|
775
895
|
if (retainEventsFor !== null) {
|
|
776
896
|
const cutoff = now - retainEventsFor * 864e5;
|
|
@@ -807,6 +927,10 @@ var _WikiMemory = class _WikiMemory {
|
|
|
807
927
|
if (maxResults === 0) ; else if (trimmedQuery) {
|
|
808
928
|
let usedEmbed = false;
|
|
809
929
|
if (!skipEmbed && embedFn) {
|
|
930
|
+
let rankerShouldRethrow = false;
|
|
931
|
+
let pendingRankerFallbackError;
|
|
932
|
+
let usedKeywordFallback = false;
|
|
933
|
+
let scoredAlreadySortedAndLimited = false;
|
|
810
934
|
try {
|
|
811
935
|
const queryVec = await embedFn(trimmedQuery);
|
|
812
936
|
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
@@ -838,6 +962,7 @@ var _WikiMemory = class _WikiMemory {
|
|
|
838
962
|
`Some facts have embeddings that do not match the current model dimension. Call runReembed() to rebuild all embeddings consistently.`
|
|
839
963
|
);
|
|
840
964
|
}
|
|
965
|
+
const useRanker = Boolean(this.options.vectorRanker);
|
|
841
966
|
let candidateRows;
|
|
842
967
|
let populateCache = true;
|
|
843
968
|
let miniSearchScores;
|
|
@@ -856,15 +981,30 @@ var _WikiMemory = class _WikiMemory {
|
|
|
856
981
|
} else {
|
|
857
982
|
const topKIds = topKResults.map((r) => r.id);
|
|
858
983
|
const inClauseChunkSize = 500;
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
984
|
+
if (useRanker) {
|
|
985
|
+
const rows = [];
|
|
986
|
+
for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
|
|
987
|
+
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
988
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
989
|
+
const chunkRows = await this.db.getAllAsync(
|
|
990
|
+
`SELECT id, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
991
|
+
idChunk
|
|
992
|
+
);
|
|
993
|
+
rows.push(...chunkRows);
|
|
994
|
+
}
|
|
995
|
+
candidateRows = rows;
|
|
996
|
+
} else {
|
|
997
|
+
const rows = [];
|
|
998
|
+
for (let i = 0; i < topKIds.length; i += inClauseChunkSize) {
|
|
999
|
+
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
1000
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1001
|
+
const chunkRows = await this.db.getAllAsync(
|
|
1002
|
+
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1003
|
+
idChunk
|
|
1004
|
+
);
|
|
1005
|
+
rows.push(...chunkRows);
|
|
1006
|
+
}
|
|
1007
|
+
candidateRows = rows;
|
|
868
1008
|
}
|
|
869
1009
|
if (weight !== void 0 && weight < 1) {
|
|
870
1010
|
const maxMsScore = Math.max(1, topKResults[0]?.score ?? 1);
|
|
@@ -873,10 +1013,17 @@ var _WikiMemory = class _WikiMemory {
|
|
|
873
1013
|
}
|
|
874
1014
|
}
|
|
875
1015
|
} else {
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
1016
|
+
if (useRanker) {
|
|
1017
|
+
candidateRows = await this.db.getAllAsync(
|
|
1018
|
+
`SELECT id, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1019
|
+
[entityId]
|
|
1020
|
+
);
|
|
1021
|
+
} else {
|
|
1022
|
+
candidateRows = await this.db.getAllAsync(
|
|
1023
|
+
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
1024
|
+
[entityId]
|
|
1025
|
+
);
|
|
1026
|
+
}
|
|
880
1027
|
if (weight !== void 0 && weight < 1) {
|
|
881
1028
|
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
882
1029
|
filter: (r) => r.entity_id === entityId,
|
|
@@ -889,76 +1036,267 @@ var _WikiMemory = class _WikiMemory {
|
|
|
889
1036
|
if (candidateRows === null) {
|
|
890
1037
|
usedEmbed = true;
|
|
891
1038
|
} else {
|
|
892
|
-
let
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
1039
|
+
let scored;
|
|
1040
|
+
if (useRanker) {
|
|
1041
|
+
const candidateIds = effectivePreFilterLimit !== void 0 ? candidateRows.map((r) => r.id) : void 0;
|
|
1042
|
+
try {
|
|
1043
|
+
const oversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
1044
|
+
scored = await this._rankWithVectorRanker({
|
|
1045
|
+
entityId,
|
|
1046
|
+
queryVec,
|
|
1047
|
+
candidateIds,
|
|
1048
|
+
weight,
|
|
1049
|
+
miniSearchScores,
|
|
1050
|
+
limit: oversampledLimit
|
|
1051
|
+
});
|
|
1052
|
+
if (scored.length > 0) {
|
|
1053
|
+
const scoredIds2 = new Set(scored.map((s) => s.id));
|
|
1054
|
+
const metaMap = /* @__PURE__ */ new Map();
|
|
1055
|
+
for (const r of candidateRows) {
|
|
1056
|
+
if (scoredIds2.has(r.id)) {
|
|
1057
|
+
metaMap.set(r.id, { updated_at: r.updated_at, access_count: r.access_count });
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
scored = scored.map((s) => {
|
|
1061
|
+
const meta = metaMap.get(s.id);
|
|
1062
|
+
return { ...s, updated_at: meta?.updated_at ?? null, access_count: meta?.access_count ?? null };
|
|
1063
|
+
});
|
|
1064
|
+
}
|
|
1065
|
+
const scoredIds = new Set(scored.map((s) => s.id));
|
|
1066
|
+
const isHybrid = weight !== void 0 && weight < 1;
|
|
1067
|
+
const maxBackfill = isHybrid ? maxResults : Math.max(0, maxResults - scored.length);
|
|
1068
|
+
if (maxBackfill > 0) {
|
|
1069
|
+
if (isHybrid) {
|
|
1070
|
+
const topK = [];
|
|
1071
|
+
for (const row of candidateRows) {
|
|
1072
|
+
if (scoredIds.has(row.id)) continue;
|
|
1073
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1074
|
+
const candidate = { row, kwScore };
|
|
1075
|
+
if (topK.length < maxBackfill) {
|
|
1076
|
+
let insertIdx = topK.length;
|
|
1077
|
+
for (let i = 0; i < topK.length; i++) {
|
|
1078
|
+
const cmp = this._compareScoredRows(
|
|
1079
|
+
{
|
|
1080
|
+
id: candidate.row.id,
|
|
1081
|
+
score: candidate.kwScore,
|
|
1082
|
+
updated_at: candidate.row.updated_at,
|
|
1083
|
+
access_count: candidate.row.access_count
|
|
1084
|
+
},
|
|
1085
|
+
{
|
|
1086
|
+
id: topK[i].row.id,
|
|
1087
|
+
score: topK[i].kwScore,
|
|
1088
|
+
updated_at: topK[i].row.updated_at,
|
|
1089
|
+
access_count: topK[i].row.access_count
|
|
1090
|
+
}
|
|
1091
|
+
);
|
|
1092
|
+
if (cmp < 0) {
|
|
1093
|
+
insertIdx = i;
|
|
1094
|
+
break;
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
topK.splice(insertIdx, 0, candidate);
|
|
1098
|
+
} else {
|
|
1099
|
+
const cmpWorst = this._compareScoredRows(
|
|
1100
|
+
{
|
|
1101
|
+
id: candidate.row.id,
|
|
1102
|
+
score: candidate.kwScore,
|
|
1103
|
+
updated_at: candidate.row.updated_at,
|
|
1104
|
+
access_count: candidate.row.access_count
|
|
1105
|
+
},
|
|
1106
|
+
{
|
|
1107
|
+
id: topK[maxBackfill - 1].row.id,
|
|
1108
|
+
score: topK[maxBackfill - 1].kwScore,
|
|
1109
|
+
updated_at: topK[maxBackfill - 1].row.updated_at,
|
|
1110
|
+
access_count: topK[maxBackfill - 1].row.access_count
|
|
1111
|
+
}
|
|
1112
|
+
);
|
|
1113
|
+
if (cmpWorst < 0) {
|
|
1114
|
+
let insertIdx = maxBackfill - 1;
|
|
1115
|
+
for (let i = 0; i < topK.length; i++) {
|
|
1116
|
+
const cmp = this._compareScoredRows(
|
|
1117
|
+
{
|
|
1118
|
+
id: candidate.row.id,
|
|
1119
|
+
score: candidate.kwScore,
|
|
1120
|
+
updated_at: candidate.row.updated_at,
|
|
1121
|
+
access_count: candidate.row.access_count
|
|
1122
|
+
},
|
|
1123
|
+
{
|
|
1124
|
+
id: topK[i].row.id,
|
|
1125
|
+
score: topK[i].kwScore,
|
|
1126
|
+
updated_at: topK[i].row.updated_at,
|
|
1127
|
+
access_count: topK[i].row.access_count
|
|
1128
|
+
}
|
|
1129
|
+
);
|
|
1130
|
+
if (cmp < 0) {
|
|
1131
|
+
insertIdx = i;
|
|
1132
|
+
break;
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
topK.splice(insertIdx, 0, candidate);
|
|
1136
|
+
topK.pop();
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
for (const { row, kwScore } of topK) {
|
|
1141
|
+
scored.push({
|
|
1142
|
+
id: row.id,
|
|
1143
|
+
score: (1 - weight) * kwScore,
|
|
1144
|
+
updated_at: row.updated_at,
|
|
1145
|
+
access_count: row.access_count
|
|
1146
|
+
});
|
|
1147
|
+
}
|
|
1148
|
+
} else {
|
|
1149
|
+
const omitted = [];
|
|
1150
|
+
for (const row of candidateRows) {
|
|
1151
|
+
if (scoredIds.has(row.id)) continue;
|
|
1152
|
+
omitted.push({ id: row.id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
|
|
1153
|
+
}
|
|
1154
|
+
if (omitted.length > 0) {
|
|
1155
|
+
this._tieBreakSort(omitted);
|
|
1156
|
+
scored.push(...omitted.slice(0, maxBackfill));
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
} catch (rankerErr) {
|
|
1161
|
+
const rankerError = rankerErr instanceof Error ? rankerErr : new Error(String(rankerErr));
|
|
1162
|
+
const policy = this.options.vectorRankerFallback ?? "js-cosine";
|
|
1163
|
+
this.options.onVectorRankerFallback?.({
|
|
1164
|
+
error: this._sanitizeRankerError(rankerError),
|
|
1165
|
+
policy
|
|
1166
|
+
});
|
|
1167
|
+
if (policy === "throw") {
|
|
1168
|
+
rankerShouldRethrow = true;
|
|
1169
|
+
throw rankerError;
|
|
1170
|
+
} else if (policy === "js-cosine") {
|
|
1171
|
+
let fallbackRows = candidateRows;
|
|
1172
|
+
if (fallbackRows && fallbackRows.length > 0 && !("embedding_blob" in fallbackRows[0])) {
|
|
1173
|
+
const rowIds = fallbackRows.map((r) => r.id);
|
|
1174
|
+
const embeddingsMap = /* @__PURE__ */ new Map();
|
|
1175
|
+
const chunkSize = 500;
|
|
1176
|
+
for (let i = 0; i < rowIds.length; i += chunkSize) {
|
|
1177
|
+
const idChunk = rowIds.slice(i, i + chunkSize);
|
|
1178
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1179
|
+
const embeddingRows = await this.db.getAllAsync(
|
|
1180
|
+
`SELECT id, embedding_blob, embedding FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1181
|
+
[...idChunk, entityId]
|
|
1182
|
+
);
|
|
1183
|
+
for (const row of embeddingRows) {
|
|
1184
|
+
embeddingsMap.set(row.id, { embedding_blob: row.embedding_blob, embedding: row.embedding });
|
|
1185
|
+
}
|
|
1186
|
+
}
|
|
1187
|
+
fallbackRows = fallbackRows.map((r) => ({
|
|
1188
|
+
...r,
|
|
1189
|
+
embedding_blob: embeddingsMap.get(r.id)?.embedding_blob ?? null,
|
|
1190
|
+
embedding: embeddingsMap.get(r.id)?.embedding ?? null
|
|
1191
|
+
}));
|
|
1192
|
+
}
|
|
1193
|
+
scored = await this._rankWithJsCosine({
|
|
1194
|
+
entityId,
|
|
1195
|
+
queryVec,
|
|
1196
|
+
candidateRows: fallbackRows,
|
|
1197
|
+
weight,
|
|
1198
|
+
miniSearchScores,
|
|
1199
|
+
populateCache,
|
|
1200
|
+
limit: maxResults
|
|
1201
|
+
});
|
|
1202
|
+
scoredAlreadySortedAndLimited = true;
|
|
1203
|
+
} else if (policy === "keyword") {
|
|
1204
|
+
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
1205
|
+
filter: (r) => r.entity_id === entityId,
|
|
1206
|
+
combineWith: "OR"
|
|
1207
|
+
});
|
|
1208
|
+
const topResults = msResults.slice(0, maxResults);
|
|
1209
|
+
const resultIds = new Set(topResults.map((r) => r.id));
|
|
1210
|
+
const candidateMap = /* @__PURE__ */ new Map();
|
|
1211
|
+
for (const r of candidateRows) {
|
|
1212
|
+
if (resultIds.has(r.id)) {
|
|
1213
|
+
candidateMap.set(r.id, { updated_at: r.updated_at, access_count: r.access_count });
|
|
1214
|
+
}
|
|
1215
|
+
}
|
|
1216
|
+
scored = topResults.map((r) => {
|
|
1217
|
+
const meta = candidateMap.get(r.id);
|
|
1218
|
+
return {
|
|
1219
|
+
id: r.id,
|
|
1220
|
+
score: r.score ?? 0,
|
|
1221
|
+
access_count: meta?.access_count ?? null,
|
|
1222
|
+
updated_at: meta?.updated_at ?? null
|
|
1223
|
+
};
|
|
1224
|
+
});
|
|
1225
|
+
usedKeywordFallback = true;
|
|
913
1226
|
} else {
|
|
914
|
-
|
|
1227
|
+
scored = [];
|
|
915
1228
|
}
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
}
|
|
922
|
-
return { row, score };
|
|
923
|
-
});
|
|
924
|
-
if (canCache && entityCache && entityCache.size > 0) {
|
|
925
|
-
if (!this.vectorCache.has(entityId)) {
|
|
926
|
-
if (this.vectorCache.size >= _WikiMemory.MAX_VECTOR_CACHE_ENTITIES) {
|
|
927
|
-
const oldestKey = this.vectorCache.keys().next().value;
|
|
928
|
-
if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
|
|
1229
|
+
if (this.options.propagateRankerFailureToRetrievalFallback) {
|
|
1230
|
+
const mirrored = new Error("Vector ranker failed, falling back", {
|
|
1231
|
+
cause: this._sanitizeRankerError(rankerErr)
|
|
1232
|
+
});
|
|
1233
|
+
pendingRankerFallbackError = mirrored;
|
|
929
1234
|
}
|
|
930
|
-
this.vectorCache.set(entityId, entityCache);
|
|
931
1235
|
}
|
|
1236
|
+
} else {
|
|
1237
|
+
scored = await this._rankWithJsCosine({
|
|
1238
|
+
entityId,
|
|
1239
|
+
queryVec,
|
|
1240
|
+
candidateRows,
|
|
1241
|
+
weight,
|
|
1242
|
+
miniSearchScores,
|
|
1243
|
+
populateCache,
|
|
1244
|
+
limit: maxResults
|
|
1245
|
+
});
|
|
1246
|
+
scoredAlreadySortedAndLimited = true;
|
|
932
1247
|
}
|
|
933
|
-
scored.
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
const accessCountDiff = (b.row.access_count ?? 0) - (a.row.access_count ?? 0);
|
|
937
|
-
if (accessCountDiff !== 0) return accessCountDiff;
|
|
938
|
-
const updatedAtDiff = (b.row.updated_at ?? 0) - (a.row.updated_at ?? 0);
|
|
939
|
-
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
940
|
-
return a.row.id.localeCompare(b.row.id);
|
|
941
|
-
});
|
|
942
|
-
const topIds = scored.slice(0, maxResults).map((s) => s.row.id);
|
|
943
|
-
if (topIds.length > 0) {
|
|
944
|
-
const fullRows = [];
|
|
945
|
-
const phase2ChunkSize = 500;
|
|
946
|
-
for (let i = 0; i < topIds.length; i += phase2ChunkSize) {
|
|
947
|
-
const idChunk = topIds.slice(i, i + phase2ChunkSize);
|
|
948
|
-
const placeholders = idChunk.map(() => "?").join(",");
|
|
949
|
-
const chunkRows = await this.db.getAllAsync(
|
|
950
|
-
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
951
|
-
idChunk
|
|
952
|
-
);
|
|
953
|
-
fullRows.push(...chunkRows);
|
|
1248
|
+
if (scored.length > 0) {
|
|
1249
|
+
if (!usedKeywordFallback && !scoredAlreadySortedAndLimited) {
|
|
1250
|
+
this._tieBreakSort(scored);
|
|
954
1251
|
}
|
|
955
|
-
const
|
|
956
|
-
|
|
1252
|
+
const topIds = (scoredAlreadySortedAndLimited ? scored : scored.slice(0, maxResults)).map((s) => s.id);
|
|
1253
|
+
if (topIds.length > 0) {
|
|
1254
|
+
const fullRows = [];
|
|
1255
|
+
const phase2ChunkSize = 500;
|
|
1256
|
+
for (let i = 0; i < topIds.length; i += phase2ChunkSize) {
|
|
1257
|
+
const idChunk = topIds.slice(i, i + phase2ChunkSize);
|
|
1258
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1259
|
+
const chunkRows = await this.db.getAllAsync(
|
|
1260
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1261
|
+
[...idChunk, entityId]
|
|
1262
|
+
);
|
|
1263
|
+
fullRows.push(...chunkRows);
|
|
1264
|
+
}
|
|
1265
|
+
const byId = new Map(fullRows.map((r) => [r.id, r]));
|
|
1266
|
+
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
1267
|
+
if (facts.length < topIds.length) {
|
|
1268
|
+
const missingIds = topIds.filter((id) => !byId.has(id));
|
|
1269
|
+
const missingCount = missingIds.length;
|
|
1270
|
+
const sample = missingIds.slice(0, 5);
|
|
1271
|
+
const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
|
|
1272
|
+
const error = new Error(
|
|
1273
|
+
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs for entity ${entityId}. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist for this entity.` + sampleSuffix
|
|
1274
|
+
);
|
|
1275
|
+
this.options.onRetrievalFallback?.(error);
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1278
|
+
if (pendingRankerFallbackError) {
|
|
1279
|
+
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
1280
|
+
pendingRankerFallbackError = void 0;
|
|
1281
|
+
}
|
|
1282
|
+
usedEmbed = true;
|
|
1283
|
+
} else {
|
|
1284
|
+
if (pendingRankerFallbackError) {
|
|
1285
|
+
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
1286
|
+
pendingRankerFallbackError = void 0;
|
|
1287
|
+
}
|
|
1288
|
+
usedEmbed = true;
|
|
957
1289
|
}
|
|
958
|
-
usedEmbed = true;
|
|
959
1290
|
}
|
|
960
1291
|
} catch (err) {
|
|
961
1292
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
1293
|
+
if (rankerShouldRethrow) {
|
|
1294
|
+
throw error;
|
|
1295
|
+
}
|
|
1296
|
+
if (pendingRankerFallbackError) {
|
|
1297
|
+
error.cause = pendingRankerFallbackError;
|
|
1298
|
+
pendingRankerFallbackError = void 0;
|
|
1299
|
+
}
|
|
962
1300
|
this.options.onRetrievalFallback?.(error);
|
|
963
1301
|
}
|
|
964
1302
|
}
|
|
@@ -975,8 +1313,8 @@ var _WikiMemory = class _WikiMemory {
|
|
|
975
1313
|
const idChunk = topIds.slice(i, i + kwChunkSize);
|
|
976
1314
|
const placeholders = idChunk.map(() => "?").join(",");
|
|
977
1315
|
const chunkRows = await this.db.getAllAsync(
|
|
978
|
-
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
979
|
-
idChunk
|
|
1316
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1317
|
+
[...idChunk, entityId]
|
|
980
1318
|
);
|
|
981
1319
|
kwRows.push(...chunkRows);
|
|
982
1320
|
}
|
|
@@ -1032,6 +1370,133 @@ var _WikiMemory = class _WikiMemory {
|
|
|
1032
1370
|
});
|
|
1033
1371
|
return { facts: parsedFacts, tasks, events: events.reverse() };
|
|
1034
1372
|
}
|
|
1373
|
+
/**
|
|
1374
|
+
* Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
|
|
1375
|
+
*/
|
|
1376
|
+
_tieBreakSort(items) {
|
|
1377
|
+
items.sort((a, b) => this._compareScoredRows(a, b));
|
|
1378
|
+
}
|
|
1379
|
+
/**
|
|
1380
|
+
* Comparator for score + deterministic tie-break fields.
|
|
1381
|
+
* Negative return means "a ranks ahead of b" for descending score order.
|
|
1382
|
+
*/
|
|
1383
|
+
_compareScoredRows(a, b) {
|
|
1384
|
+
const scoreDiff = b.score - a.score;
|
|
1385
|
+
if (scoreDiff !== 0) return scoreDiff;
|
|
1386
|
+
const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
|
|
1387
|
+
if (accessCountDiff !== 0) return accessCountDiff;
|
|
1388
|
+
const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
|
|
1389
|
+
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
1390
|
+
return a.id.localeCompare(b.id);
|
|
1391
|
+
}
|
|
1392
|
+
/**
|
|
1393
|
+
* Strip potentially sensitive data from ranker errors before exposing to host callbacks.
|
|
1394
|
+
* Preserves error type for debugging but removes message/stack that may contain credentials.
|
|
1395
|
+
* Recursively sanitizes one level of .cause; deeper chains collapse to type only.
|
|
1396
|
+
*/
|
|
1397
|
+
_sanitizeRankerError(err) {
|
|
1398
|
+
if (this.options.sanitizeRankerErrors === false) {
|
|
1399
|
+
return err instanceof Error ? err : new Error(String(err));
|
|
1400
|
+
}
|
|
1401
|
+
const typeName = err instanceof Error ? err.constructor?.name ?? "Error" : typeof err;
|
|
1402
|
+
const innerCause = err instanceof Error && err.cause !== void 0 ? new Error(`Caused by: ${err.cause?.constructor?.name ?? typeof err.cause}`) : void 0;
|
|
1403
|
+
const sanitized = new Error(
|
|
1404
|
+
`VectorRanker ${typeName} (message scrubbed for security)`,
|
|
1405
|
+
innerCause ? { cause: innerCause } : void 0
|
|
1406
|
+
);
|
|
1407
|
+
sanitized.name = typeName;
|
|
1408
|
+
return sanitized;
|
|
1409
|
+
}
|
|
1410
|
+
/**
|
|
1411
|
+
* Score candidate rows using in-process JS cosine similarity.
|
|
1412
|
+
* Applies hybrid blending (if weight set) and tie-break sorting before returning.
|
|
1413
|
+
*/
|
|
1414
|
+
async _rankWithJsCosine(args) {
|
|
1415
|
+
const queryVec = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
|
|
1416
|
+
const { entityId, candidateRows, weight, miniSearchScores, populateCache, limit } = args;
|
|
1417
|
+
let entityCache = this.vectorCache.get(entityId);
|
|
1418
|
+
const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
1419
|
+
if (tooLarge && entityCache) {
|
|
1420
|
+
this.vectorCache.delete(entityId);
|
|
1421
|
+
entityCache = void 0;
|
|
1422
|
+
}
|
|
1423
|
+
const canCache = populateCache && !tooLarge;
|
|
1424
|
+
if (canCache && !entityCache) {
|
|
1425
|
+
entityCache = /* @__PURE__ */ new Map();
|
|
1426
|
+
}
|
|
1427
|
+
const scored = candidateRows.map((row) => {
|
|
1428
|
+
let vector = entityCache?.get(row.id) ?? parseEmbedding(row.embedding_blob, row.embedding);
|
|
1429
|
+
if (vector && canCache && entityCache && !entityCache.has(row.id)) {
|
|
1430
|
+
entityCache.set(row.id, vector);
|
|
1431
|
+
}
|
|
1432
|
+
let score = 0;
|
|
1433
|
+
if (vector && vector.length === queryVec.length) {
|
|
1434
|
+
const cosSim = cosineSimilarity(queryVec, vector);
|
|
1435
|
+
if (weight !== void 0) {
|
|
1436
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1437
|
+
score = weight * Math.max(0, cosSim) + (1 - weight) * kwScore;
|
|
1438
|
+
} else {
|
|
1439
|
+
score = cosSim;
|
|
1440
|
+
}
|
|
1441
|
+
} else if (weight !== void 0 && weight < 1) {
|
|
1442
|
+
const kwScore = miniSearchScores?.get(row.id) ?? 0;
|
|
1443
|
+
score = (1 - weight) * kwScore;
|
|
1444
|
+
} else {
|
|
1445
|
+
score = -2;
|
|
1446
|
+
}
|
|
1447
|
+
return { id: row.id, score, updated_at: row.updated_at, access_count: row.access_count };
|
|
1448
|
+
});
|
|
1449
|
+
if (canCache && entityCache && entityCache.size > 0) {
|
|
1450
|
+
if (!this.vectorCache.has(entityId)) {
|
|
1451
|
+
if (this.vectorCache.size >= _WikiMemory.MAX_VECTOR_CACHE_ENTITIES) {
|
|
1452
|
+
const oldestKey = this.vectorCache.keys().next().value;
|
|
1453
|
+
if (oldestKey !== void 0) this.vectorCache.delete(oldestKey);
|
|
1454
|
+
}
|
|
1455
|
+
this.vectorCache.set(entityId, entityCache);
|
|
1456
|
+
}
|
|
1457
|
+
}
|
|
1458
|
+
this._tieBreakSort(scored);
|
|
1459
|
+
return scored.slice(0, limit);
|
|
1460
|
+
}
|
|
1461
|
+
/**
|
|
1462
|
+
* Delegate semantic ranking to the injected VectorRanker.
|
|
1463
|
+
* Caller should pass an oversampledLimit to preserve recall after re-ranking.
|
|
1464
|
+
* Returns scored results ready for hybrid blending and tie-break sorting.
|
|
1465
|
+
*/
|
|
1466
|
+
async _rankWithVectorRanker(args) {
|
|
1467
|
+
const { entityId, candidateIds, weight, miniSearchScores, limit } = args;
|
|
1468
|
+
const ranker = this.options.vectorRanker;
|
|
1469
|
+
if (!ranker) {
|
|
1470
|
+
throw new Error("vectorRanker not configured");
|
|
1471
|
+
}
|
|
1472
|
+
const queryVecCopy = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
|
|
1473
|
+
const rankerResults = await ranker.rankBySimilarity({
|
|
1474
|
+
entityId,
|
|
1475
|
+
queryVec: queryVecCopy,
|
|
1476
|
+
candidateIds,
|
|
1477
|
+
limit
|
|
1478
|
+
});
|
|
1479
|
+
const allowedIds = candidateIds ? new Set(candidateIds) : void 0;
|
|
1480
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1481
|
+
const normalized = [];
|
|
1482
|
+
for (const r of rankerResults) {
|
|
1483
|
+
if (normalized.length >= limit) break;
|
|
1484
|
+
if (seen.has(r.id)) continue;
|
|
1485
|
+
if (allowedIds && !allowedIds.has(r.id)) continue;
|
|
1486
|
+
if (!Number.isFinite(r.semanticScore)) continue;
|
|
1487
|
+
seen.add(r.id);
|
|
1488
|
+
normalized.push(r);
|
|
1489
|
+
}
|
|
1490
|
+
const scored = normalized.map((r) => {
|
|
1491
|
+
let score = r.semanticScore;
|
|
1492
|
+
if (weight !== void 0) {
|
|
1493
|
+
const kwScore = miniSearchScores?.get(r.id) ?? 0;
|
|
1494
|
+
score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
|
|
1495
|
+
}
|
|
1496
|
+
return { id: r.id, score };
|
|
1497
|
+
});
|
|
1498
|
+
return scored;
|
|
1499
|
+
}
|
|
1035
1500
|
async getMemoryBundle(entityId) {
|
|
1036
1501
|
return this._getFullBundle(entityId, { maxEvents: 10 });
|
|
1037
1502
|
}
|
|
@@ -1148,7 +1613,7 @@ ${JSON.stringify(currentFacts, null, 2)}`;
|
|
|
1148
1613
|
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
|
|
1149
1614
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1150
1615
|
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
|
|
1151
|
-
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1616
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1152
1617
|
}
|
|
1153
1618
|
for (const task of validTasks) {
|
|
1154
1619
|
const id = generateId("task_");
|
|
@@ -1228,6 +1693,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1228
1693
|
const safeDeleted = deleted.filter((id) => mutableIds.has(id));
|
|
1229
1694
|
const validNewFacts = newFacts.map(validateFact).filter((f) => f !== null);
|
|
1230
1695
|
const insertedFacts = [];
|
|
1696
|
+
const uniqueDeletedFactIds = Array.from(new Set(safeDeleted));
|
|
1231
1697
|
await this.db.withTransactionAsync(async () => {
|
|
1232
1698
|
for (const id of safeDowngraded) {
|
|
1233
1699
|
await this.db.runAsync(`UPDATE ${this.prefix}entries SET confidence = 'tentative', updated_at = ? WHERE id = ? AND entity_id = ?`, [now, id, entityId]);
|
|
@@ -1241,11 +1707,18 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1241
1707
|
INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, created_at, updated_at)
|
|
1242
1708
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1243
1709
|
`, [id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "agent_inferred", now, now]);
|
|
1244
|
-
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1710
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1245
1711
|
}
|
|
1246
1712
|
});
|
|
1247
1713
|
this.vectorCache.delete(entityId);
|
|
1248
1714
|
await this.rebuildMiniSearchIndex(entityId);
|
|
1715
|
+
for (const factId of uniqueDeletedFactIds) {
|
|
1716
|
+
try {
|
|
1717
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
1718
|
+
} catch (hookErr) {
|
|
1719
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during heal for ${factId}:`, hookErr);
|
|
1720
|
+
}
|
|
1721
|
+
}
|
|
1249
1722
|
for (const fact of insertedFacts) {
|
|
1250
1723
|
await this.embedFact(fact);
|
|
1251
1724
|
}
|
|
@@ -1545,10 +2018,17 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1545
2018
|
}
|
|
1546
2019
|
async _doImportEntity(entityId, bundle, merge) {
|
|
1547
2020
|
const upsertedFactIds = /* @__PURE__ */ new Set();
|
|
1548
|
-
const
|
|
2021
|
+
const upsertedDeletedFactIds = /* @__PURE__ */ new Set();
|
|
2022
|
+
const factsWithPreservedBlob = /* @__PURE__ */ new Map();
|
|
1549
2023
|
const preservedBlobDims = /* @__PURE__ */ new Set();
|
|
2024
|
+
const softDeletedFactIds = [];
|
|
1550
2025
|
await this.db.withTransactionAsync(async () => {
|
|
1551
2026
|
if (!merge) {
|
|
2027
|
+
const toDelete = await this.db.getAllAsync(
|
|
2028
|
+
`SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
2029
|
+
[entityId]
|
|
2030
|
+
);
|
|
2031
|
+
softDeletedFactIds.push(...toDelete.map((r) => r.id));
|
|
1552
2032
|
const now = Date.now();
|
|
1553
2033
|
await this.db.runAsync(
|
|
1554
2034
|
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
@@ -1602,7 +2082,8 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1602
2082
|
let blobData = null;
|
|
1603
2083
|
if (rawBlob !== null && rawBlob.byteLength > 0 && rawBlob.byteLength % 4 === 0) {
|
|
1604
2084
|
const copy = new ArrayBuffer(rawBlob.byteLength);
|
|
1605
|
-
new Uint8Array(copy)
|
|
2085
|
+
const alignedBlob = new Uint8Array(copy);
|
|
2086
|
+
alignedBlob.set(rawBlob);
|
|
1606
2087
|
const floats = new Float32Array(copy, 0, rawBlob.byteLength / 4);
|
|
1607
2088
|
let allFinite = true;
|
|
1608
2089
|
for (let i = 0; i < floats.length; i++) {
|
|
@@ -1612,7 +2093,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1612
2093
|
}
|
|
1613
2094
|
}
|
|
1614
2095
|
if (allFinite) {
|
|
1615
|
-
blobData =
|
|
2096
|
+
blobData = alignedBlob;
|
|
1616
2097
|
}
|
|
1617
2098
|
}
|
|
1618
2099
|
if (existing) {
|
|
@@ -1628,7 +2109,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1628
2109
|
`UPDATE ${this.prefix}entries SET entity_id = ?, title = ?, body = ?, tags = ?, confidence = ?, source_type = ?, source_hash = ?, source_ref = ?, created_at = ?, updated_at = ?, last_accessed_at = ?, access_count = ?, deleted_at = ?, embedding_blob = ?, embedding = NULL WHERE id = ?`,
|
|
1629
2110
|
[entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData, fact.id]
|
|
1630
2111
|
);
|
|
1631
|
-
factsWithPreservedBlob.
|
|
2112
|
+
factsWithPreservedBlob.set(fact.id, blobData);
|
|
1632
2113
|
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
1633
2114
|
} else {
|
|
1634
2115
|
await this.db.runAsync(
|
|
@@ -1638,13 +2119,14 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1638
2119
|
}
|
|
1639
2120
|
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1640
2121
|
upsertedFactIds.add(fact.id);
|
|
2122
|
+
if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
|
|
1641
2123
|
} else {
|
|
1642
2124
|
if (blobData != null) {
|
|
1643
2125
|
await this.db.runAsync(
|
|
1644
2126
|
`INSERT INTO ${this.prefix}entries (id, entity_id, title, body, tags, confidence, source_type, source_hash, source_ref, created_at, updated_at, last_accessed_at, access_count, deleted_at, embedding_blob) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1645
2127
|
[fact.id, entityId, fact.title, fact.body, tagsJson, fact.confidence, fact.source_type, fact.source_hash, fact.source_ref, fact.created_at, safeUpdatedAt, fact.last_accessed_at, fact.access_count, fact.deleted_at, blobData]
|
|
1646
2128
|
);
|
|
1647
|
-
factsWithPreservedBlob.
|
|
2129
|
+
factsWithPreservedBlob.set(fact.id, blobData);
|
|
1648
2130
|
if (!fact.deleted_at) preservedBlobDims.add(blobData.byteLength / 4);
|
|
1649
2131
|
} else {
|
|
1650
2132
|
await this.db.runAsync(
|
|
@@ -1654,6 +2136,7 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1654
2136
|
}
|
|
1655
2137
|
existingFactsById.set(fact.id, { id: fact.id, entity_id: entityId, updated_at: safeUpdatedAt });
|
|
1656
2138
|
upsertedFactIds.add(fact.id);
|
|
2139
|
+
if (fact.deleted_at) upsertedDeletedFactIds.add(fact.id);
|
|
1657
2140
|
}
|
|
1658
2141
|
}
|
|
1659
2142
|
const taskIds = bundle.tasks.map((task) => task.id);
|
|
@@ -1709,12 +2192,34 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1709
2192
|
if (!fact.deleted_at && upsertedFactIds.has(fact.id) && !factsWithPreservedBlob.has(fact.id)) {
|
|
1710
2193
|
await this.embedFact({
|
|
1711
2194
|
id: fact.id,
|
|
2195
|
+
entity_id: entityId,
|
|
2196
|
+
// Use authoritative entityId from dump key, not fact.entity_id
|
|
1712
2197
|
title: fact.title,
|
|
1713
2198
|
body: fact.body,
|
|
1714
2199
|
tags: Array.isArray(fact.tags) || typeof fact.tags === "string" ? fact.tags : []
|
|
1715
2200
|
});
|
|
1716
2201
|
}
|
|
1717
2202
|
}
|
|
2203
|
+
for (const fact of bundle.facts) {
|
|
2204
|
+
const blobData = factsWithPreservedBlob.get(fact.id);
|
|
2205
|
+
if (blobData && !fact.deleted_at && upsertedFactIds.has(fact.id)) {
|
|
2206
|
+
try {
|
|
2207
|
+
const float32Vector = new Float32Array(blobData.buffer, blobData.byteOffset, blobData.byteLength / 4);
|
|
2208
|
+
await this._notifyEmbeddingPersisted(entityId, fact.id, float32Vector);
|
|
2209
|
+
} catch (hookErr) {
|
|
2210
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed for preserved-blob fact ${fact.id}:`, hookErr);
|
|
2211
|
+
}
|
|
2212
|
+
}
|
|
2213
|
+
}
|
|
2214
|
+
for (const factId of softDeletedFactIds) {
|
|
2215
|
+
if (!upsertedFactIds.has(factId) || upsertedDeletedFactIds.has(factId)) {
|
|
2216
|
+
try {
|
|
2217
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
2218
|
+
} catch (hookErr) {
|
|
2219
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted(vector=null) hook failed for soft-deleted fact ${factId}:`, hookErr);
|
|
2220
|
+
}
|
|
2221
|
+
}
|
|
2222
|
+
}
|
|
1718
2223
|
try {
|
|
1719
2224
|
const canonicalRow = await this.db.getFirstAsync(
|
|
1720
2225
|
`SELECT value FROM ${this.prefix}meta WHERE key = 'embedding_dimension'`
|
|
@@ -1785,7 +2290,17 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1785
2290
|
const now = Date.now();
|
|
1786
2291
|
let deletedEntries = 0;
|
|
1787
2292
|
let deletedTasks = 0;
|
|
2293
|
+
const deletedEntryIds = [];
|
|
1788
2294
|
if (params.clearAll) {
|
|
2295
|
+
const newDeletions = await this.db.getAllAsync(
|
|
2296
|
+
`SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NULL`,
|
|
2297
|
+
[entityId]
|
|
2298
|
+
);
|
|
2299
|
+
const alreadySoftDeleted = await this.db.getAllAsync(
|
|
2300
|
+
`SELECT id FROM ${this.prefix}entries WHERE entity_id = ? AND deleted_at IS NOT NULL`,
|
|
2301
|
+
[entityId]
|
|
2302
|
+
);
|
|
2303
|
+
deletedEntryIds.push(...newDeletions.map((e) => e.id), ...alreadySoftDeleted.map((e) => e.id));
|
|
1789
2304
|
const [entriesRes, tasksRes] = await Promise.all([
|
|
1790
2305
|
this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId]),
|
|
1791
2306
|
this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE entity_id = ? AND deleted_at IS NULL`, [now, now, entityId])
|
|
@@ -1803,6 +2318,27 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1803
2318
|
if (params.sourceRef !== void 0 && !sourceRef) throw new Error("Invalid sourceRef");
|
|
1804
2319
|
const sourceHash = params.sourceHash !== void 0 ? normalizeSourceHash(params.sourceHash) : null;
|
|
1805
2320
|
if (params.sourceHash !== void 0 && !sourceHash) throw new Error("Invalid sourceHash (must be 64-char hex string)");
|
|
2321
|
+
if (params.entryId) {
|
|
2322
|
+
const entry = await this.db.getFirstAsync(
|
|
2323
|
+
`SELECT id FROM ${this.prefix}entries WHERE id = ? AND entity_id = ?`,
|
|
2324
|
+
[params.entryId, entityId]
|
|
2325
|
+
);
|
|
2326
|
+
if (entry) deletedEntryIds.push(entry.id);
|
|
2327
|
+
}
|
|
2328
|
+
if (sourceRef || sourceHash) {
|
|
2329
|
+
let q = `SELECT id FROM ${this.prefix}entries WHERE entity_id = ?`;
|
|
2330
|
+
const args = [entityId];
|
|
2331
|
+
if (sourceRef) {
|
|
2332
|
+
q += ` AND source_ref = ?`;
|
|
2333
|
+
args.push(sourceRef);
|
|
2334
|
+
}
|
|
2335
|
+
if (sourceHash) {
|
|
2336
|
+
q += ` AND source_hash = ?`;
|
|
2337
|
+
args.push(sourceHash);
|
|
2338
|
+
}
|
|
2339
|
+
const entriesToDelete = await this.db.getAllAsync(q, args);
|
|
2340
|
+
deletedEntryIds.push(...entriesToDelete.map((e) => e.id));
|
|
2341
|
+
}
|
|
1806
2342
|
const entryPromise = params.entryId ? this.db.runAsync(`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.entryId, entityId]) : null;
|
|
1807
2343
|
const taskPromise = params.taskId ? this.db.runAsync(`UPDATE ${this.prefix}tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND entity_id = ? AND deleted_at IS NULL`, [now, now, params.taskId, entityId]) : null;
|
|
1808
2344
|
let refPromise = null;
|
|
@@ -1830,6 +2366,31 @@ The following document anchors are provided for contradiction detection only. Do
|
|
|
1830
2366
|
}
|
|
1831
2367
|
await this.rebuildMiniSearchIndex(entityId);
|
|
1832
2368
|
this.vectorCache.delete(entityId);
|
|
2369
|
+
const uniqueDeletedIds = Array.from(new Set(deletedEntryIds));
|
|
2370
|
+
for (const factId of uniqueDeletedIds) {
|
|
2371
|
+
try {
|
|
2372
|
+
await this._notifyEmbeddingPersistedOrThrow(entityId, factId, null);
|
|
2373
|
+
} catch (hookErr) {
|
|
2374
|
+
const isTimeout = hookErr?.[HOOK_TIMEOUT_MARKER] === true;
|
|
2375
|
+
if (isTimeout) {
|
|
2376
|
+
throw new Error(
|
|
2377
|
+
`forget(${entityId}/${factId}) failed: ${hookErr.message}`
|
|
2378
|
+
);
|
|
2379
|
+
}
|
|
2380
|
+
const errMsg = hookErr?.message ?? "";
|
|
2381
|
+
const isValidationError = errMsg.startsWith("Invalid deletionHookTimeoutMs");
|
|
2382
|
+
if (isValidationError) {
|
|
2383
|
+
throw new Error(
|
|
2384
|
+
`forget(${entityId}/${factId}) failed: ${errMsg}`,
|
|
2385
|
+
{ cause: hookErr }
|
|
2386
|
+
);
|
|
2387
|
+
}
|
|
2388
|
+
throw new Error(
|
|
2389
|
+
`forget(${entityId}/${factId}) failed: ANN cleanup hook rejected`,
|
|
2390
|
+
{ cause: this._sanitizeRankerError(hookErr) }
|
|
2391
|
+
);
|
|
2392
|
+
}
|
|
2393
|
+
}
|
|
1833
2394
|
return { deleted: { entries: deletedEntries, tasks: deletedTasks } };
|
|
1834
2395
|
} finally {
|
|
1835
2396
|
this.activeMaintenanceJobs.delete(forgetKey);
|
|
@@ -1899,7 +2460,15 @@ ${chunk}`;
|
|
|
1899
2460
|
}
|
|
1900
2461
|
const now = Date.now();
|
|
1901
2462
|
const insertedFacts = [];
|
|
2463
|
+
const deletedSourceFactIds = [];
|
|
1902
2464
|
await this.db.withTransactionAsync(async () => {
|
|
2465
|
+
const existingSourceFacts = await this.db.getAllAsync(
|
|
2466
|
+
`SELECT id FROM ${this.prefix}entries WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
2467
|
+
[sourceRef, entityId]
|
|
2468
|
+
);
|
|
2469
|
+
for (const row of existingSourceFacts) {
|
|
2470
|
+
deletedSourceFactIds.push(row.id);
|
|
2471
|
+
}
|
|
1903
2472
|
await this.db.runAsync(
|
|
1904
2473
|
`UPDATE ${this.prefix}entries SET deleted_at = ?, updated_at = ? WHERE source_ref = ? AND entity_id = ? AND deleted_at IS NULL`,
|
|
1905
2474
|
[now, now, sourceRef, entityId]
|
|
@@ -1911,11 +2480,19 @@ ${chunk}`;
|
|
|
1911
2480
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
1912
2481
|
[id, entityId, fact.title, fact.body, JSON.stringify(fact.tags), fact.confidence, "user_document", sourceHash, sourceRef, now, now]
|
|
1913
2482
|
);
|
|
1914
|
-
insertedFacts.push({ id, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
2483
|
+
insertedFacts.push({ id, entity_id: entityId, title: fact.title, body: fact.body, tags: JSON.stringify(fact.tags) });
|
|
1915
2484
|
}
|
|
1916
2485
|
});
|
|
1917
2486
|
await this.rebuildMiniSearchIndex(entityId);
|
|
1918
2487
|
this.vectorCache.delete(entityId);
|
|
2488
|
+
const uniqueDeletedSourceFactIds = Array.from(new Set(deletedSourceFactIds));
|
|
2489
|
+
for (const factId of uniqueDeletedSourceFactIds) {
|
|
2490
|
+
try {
|
|
2491
|
+
await this._notifyEmbeddingPersisted(entityId, factId, null);
|
|
2492
|
+
} catch (hookErr) {
|
|
2493
|
+
console.warn(`[WikiMemory] onEmbeddingPersisted hook failed during ingest for ${factId}:`, hookErr);
|
|
2494
|
+
}
|
|
2495
|
+
}
|
|
1919
2496
|
for (const fact of insertedFacts) {
|
|
1920
2497
|
await this.embedFact(fact);
|
|
1921
2498
|
}
|
|
@@ -2160,6 +2737,7 @@ function createWiki(db, options) {
|
|
|
2160
2737
|
return new WikiMemory(db, options);
|
|
2161
2738
|
}
|
|
2162
2739
|
|
|
2740
|
+
exports.PrunePartialFailureError = PrunePartialFailureError;
|
|
2163
2741
|
exports.WikiBusyError = WikiBusyError;
|
|
2164
2742
|
exports.WikiMemory = WikiMemory;
|
|
2165
2743
|
exports.createWiki = createWiki;
|