@equationalapplications/core-llm-wiki 4.2.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +23 -8
- package/dist/index.d.ts +23 -8
- package/dist/index.js +274 -126
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +274 -126
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -203,6 +203,40 @@ function parseEmbedding(blob, text) {
|
|
|
203
203
|
return null;
|
|
204
204
|
}
|
|
205
205
|
|
|
206
|
+
// src/readOptions.ts
|
|
207
|
+
function normalizeEntityIds(entityId) {
|
|
208
|
+
const input = Array.isArray(entityId) ? entityId : [entityId];
|
|
209
|
+
const seen = /* @__PURE__ */ new Set();
|
|
210
|
+
const normalized = [];
|
|
211
|
+
for (const id of input) {
|
|
212
|
+
if (seen.has(id)) continue;
|
|
213
|
+
seen.add(id);
|
|
214
|
+
normalized.push(id);
|
|
215
|
+
}
|
|
216
|
+
return normalized;
|
|
217
|
+
}
|
|
218
|
+
function sanitizeTierWeights(entityIds, tierWeights) {
|
|
219
|
+
if (tierWeights === void 0) return void 0;
|
|
220
|
+
const sanitized = /* @__PURE__ */ Object.create(null);
|
|
221
|
+
for (const entityId of entityIds) {
|
|
222
|
+
const raw = tierWeights[entityId];
|
|
223
|
+
if (raw === void 0 || !Number.isFinite(raw)) {
|
|
224
|
+
sanitized[entityId] = 1;
|
|
225
|
+
} else {
|
|
226
|
+
sanitized[entityId] = Math.max(0, raw);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
return sanitized;
|
|
230
|
+
}
|
|
231
|
+
function applyTierWeight(score, entityId, sanitizedTierWeights) {
|
|
232
|
+
const weight = sanitizedTierWeights?.[entityId] ?? 1;
|
|
233
|
+
if (weight === 0) return -Infinity;
|
|
234
|
+
return score * weight;
|
|
235
|
+
}
|
|
236
|
+
function shouldExposeReadMetadata(entityId) {
|
|
237
|
+
return Array.isArray(entityId);
|
|
238
|
+
}
|
|
239
|
+
|
|
206
240
|
// src/WikiMemory.ts
|
|
207
241
|
var HOOK_TIMEOUT_MARKER = /* @__PURE__ */ Symbol("WikiMemoryHookTimeout");
|
|
208
242
|
function parseJsonResponse(text) {
|
|
@@ -968,6 +1002,25 @@ After running the migration SQL, restart your application.`
|
|
|
968
1002
|
}
|
|
969
1003
|
async read(entityId, query, options) {
|
|
970
1004
|
const config = this.options.config;
|
|
1005
|
+
const entityIds = normalizeEntityIds(entityId);
|
|
1006
|
+
const sanitizedTierWeights = sanitizeTierWeights(entityIds, options?.tierWeights);
|
|
1007
|
+
const exposeMetadata = shouldExposeReadMetadata(entityId);
|
|
1008
|
+
if (entityIds.length === 0) {
|
|
1009
|
+
const empty = { facts: [], tasks: [], events: [] };
|
|
1010
|
+
if (exposeMetadata) {
|
|
1011
|
+
empty.metadata = { query, entityIds: [] };
|
|
1012
|
+
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) empty.metadata.tierWeights = sanitizedTierWeights;
|
|
1013
|
+
}
|
|
1014
|
+
return empty;
|
|
1015
|
+
}
|
|
1016
|
+
const MAX_ENTITY_IDS = 100;
|
|
1017
|
+
if (entityIds.length > MAX_ENTITY_IDS) {
|
|
1018
|
+
throw new RangeError(`read() accepts at most ${MAX_ENTITY_IDS} entity IDs; received ${entityIds.length}`);
|
|
1019
|
+
}
|
|
1020
|
+
const nullByteId = entityIds.find((id) => id.includes("\0"));
|
|
1021
|
+
if (nullByteId !== void 0) {
|
|
1022
|
+
throw new TypeError(`entity_id values must not contain the null byte (\\x00); got "${nullByteId}"`);
|
|
1023
|
+
}
|
|
971
1024
|
const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
|
|
972
1025
|
const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
|
|
973
1026
|
const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
|
|
@@ -978,13 +1031,15 @@ After running the migration SQL, restart your application.`
|
|
|
978
1031
|
const embedFn = this.options.llmProvider.embed;
|
|
979
1032
|
const trimmedQuery = query.trim();
|
|
980
1033
|
let facts = [];
|
|
1034
|
+
let scoreByFactId;
|
|
981
1035
|
if (maxResults === 0) ; else if (trimmedQuery) {
|
|
982
1036
|
let usedEmbed = false;
|
|
983
|
-
|
|
1037
|
+
const scoredEntityIds = this._filterScoredEntities(entityIds, sanitizedTierWeights, options?.includeZeroWeightEntities);
|
|
1038
|
+
if (scoredEntityIds.length === 0) {
|
|
1039
|
+
usedEmbed = true;
|
|
1040
|
+
} else if (!skipEmbed && embedFn) {
|
|
984
1041
|
let rankerShouldRethrow = false;
|
|
985
1042
|
let pendingRankerFallbackError;
|
|
986
|
-
let usedKeywordFallback = false;
|
|
987
|
-
let scoredAlreadySortedAndLimited = false;
|
|
988
1043
|
try {
|
|
989
1044
|
const queryVec = await embedFn(trimmedQuery);
|
|
990
1045
|
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
@@ -1003,13 +1058,14 @@ After running the migration SQL, restart your application.`
|
|
|
1003
1058
|
);
|
|
1004
1059
|
}
|
|
1005
1060
|
}
|
|
1061
|
+
const mismatchScope = this._entityInClause(entityIds);
|
|
1006
1062
|
const mismatchedCount = await this.db.getFirstAsync(
|
|
1007
1063
|
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
1008
|
-
WHERE
|
|
1064
|
+
WHERE ${mismatchScope.clause} AND deleted_at IS NULL
|
|
1009
1065
|
AND embedding_blob IS NOT NULL
|
|
1010
1066
|
AND (CAST(length(embedding_blob) AS INTEGER) % 4 = 0)
|
|
1011
1067
|
AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?`,
|
|
1012
|
-
[
|
|
1068
|
+
[...mismatchScope.params, queryVec.length]
|
|
1013
1069
|
);
|
|
1014
1070
|
if (mismatchedCount && mismatchedCount.cnt > 0) {
|
|
1015
1071
|
throw new Error(
|
|
@@ -1018,12 +1074,13 @@ After running the migration SQL, restart your application.`
|
|
|
1018
1074
|
}
|
|
1019
1075
|
const useRanker = Boolean(this.options.vectorRanker);
|
|
1020
1076
|
let candidateRows;
|
|
1021
|
-
let populateCache =
|
|
1077
|
+
let populateCache = entityIds.length === 1;
|
|
1022
1078
|
let miniSearchScores;
|
|
1023
1079
|
if (effectivePreFilterLimit !== void 0) {
|
|
1024
1080
|
populateCache = false;
|
|
1081
|
+
const entityIdSet = new Set(scoredEntityIds);
|
|
1025
1082
|
const preResults = this.miniSearch.search(trimmedQuery, {
|
|
1026
|
-
filter: (r) => r.entity_id
|
|
1083
|
+
filter: (r) => entityIdSet.has(r.entity_id),
|
|
1027
1084
|
combineWith: "OR"
|
|
1028
1085
|
});
|
|
1029
1086
|
if (preResults.length === 0) {
|
|
@@ -1041,7 +1098,7 @@ After running the migration SQL, restart your application.`
|
|
|
1041
1098
|
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
1042
1099
|
const placeholders = idChunk.map(() => "?").join(",");
|
|
1043
1100
|
const chunkRows = await this.db.getAllAsync(
|
|
1044
|
-
`SELECT id, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1101
|
+
`SELECT id, entity_id, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1045
1102
|
idChunk
|
|
1046
1103
|
);
|
|
1047
1104
|
rows.push(...chunkRows);
|
|
@@ -1053,7 +1110,7 @@ After running the migration SQL, restart your application.`
|
|
|
1053
1110
|
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
1054
1111
|
const placeholders = idChunk.map(() => "?").join(",");
|
|
1055
1112
|
const chunkRows = await this.db.getAllAsync(
|
|
1056
|
-
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1113
|
+
`SELECT id, entity_id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1057
1114
|
idChunk
|
|
1058
1115
|
);
|
|
1059
1116
|
rows.push(...chunkRows);
|
|
@@ -1068,19 +1125,22 @@ After running the migration SQL, restart your application.`
|
|
|
1068
1125
|
}
|
|
1069
1126
|
} else {
|
|
1070
1127
|
if (useRanker) {
|
|
1128
|
+
const entityScope = this._entityInClause(scoredEntityIds);
|
|
1071
1129
|
candidateRows = await this.db.getAllAsync(
|
|
1072
|
-
`SELECT id, updated_at, access_count FROM ${this.prefix}entries WHERE
|
|
1073
|
-
|
|
1130
|
+
`SELECT id, entity_id, updated_at, access_count FROM ${this.prefix}entries WHERE ${entityScope.clause} AND deleted_at IS NULL`,
|
|
1131
|
+
entityScope.params
|
|
1074
1132
|
);
|
|
1075
1133
|
} else {
|
|
1134
|
+
const entityScope = this._entityInClause(scoredEntityIds);
|
|
1076
1135
|
candidateRows = await this.db.getAllAsync(
|
|
1077
|
-
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE
|
|
1078
|
-
|
|
1136
|
+
`SELECT id, entity_id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE ${entityScope.clause} AND deleted_at IS NULL`,
|
|
1137
|
+
entityScope.params
|
|
1079
1138
|
);
|
|
1080
1139
|
}
|
|
1081
1140
|
if (weight !== void 0 && weight < 1) {
|
|
1141
|
+
const entityIdSet = new Set(scoredEntityIds);
|
|
1082
1142
|
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
1083
|
-
filter: (r) => r.entity_id
|
|
1143
|
+
filter: (r) => entityIdSet.has(r.entity_id),
|
|
1084
1144
|
combineWith: "OR"
|
|
1085
1145
|
});
|
|
1086
1146
|
const maxMsScore = Math.max(1, msResults[0]?.score ?? 1);
|
|
@@ -1090,33 +1150,45 @@ After running the migration SQL, restart your application.`
|
|
|
1090
1150
|
if (candidateRows === null) {
|
|
1091
1151
|
usedEmbed = true;
|
|
1092
1152
|
} else {
|
|
1153
|
+
const entityCacheKey = entityIds.length === 1 ? entityIds[0] : entityIds.join("\0");
|
|
1093
1154
|
let scored;
|
|
1094
1155
|
if (useRanker) {
|
|
1095
|
-
const
|
|
1156
|
+
const candidateRowsByEntity = /* @__PURE__ */ new Map();
|
|
1157
|
+
for (const row of candidateRows) {
|
|
1158
|
+
const rows = candidateRowsByEntity.get(row.entity_id) ?? [];
|
|
1159
|
+
rows.push(row);
|
|
1160
|
+
candidateRowsByEntity.set(row.entity_id, rows);
|
|
1161
|
+
}
|
|
1096
1162
|
try {
|
|
1097
|
-
const
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
scored = scored.map((s) => {
|
|
1115
|
-
const meta = metaMap.get(s.id);
|
|
1116
|
-
return { ...s, updated_at: meta?.updated_at ?? null, access_count: meta?.access_count ?? null };
|
|
1117
|
-
});
|
|
1118
|
-
}
|
|
1163
|
+
const rankerResultsByEntity = await Promise.all(
|
|
1164
|
+
scoredEntityIds.filter((id) => (candidateRowsByEntity.get(id)?.length ?? 0) > 0).map(async (scopedEntityId) => {
|
|
1165
|
+
const rowsForEntity = candidateRowsByEntity.get(scopedEntityId) ?? [];
|
|
1166
|
+
const candidateIds = effectivePreFilterLimit !== void 0 ? rowsForEntity.map((row) => row.id) : void 0;
|
|
1167
|
+
const ranked = await this._rankWithVectorRanker({
|
|
1168
|
+
entityId: scopedEntityId,
|
|
1169
|
+
queryVec,
|
|
1170
|
+
candidateIds,
|
|
1171
|
+
candidateRows: rowsForEntity,
|
|
1172
|
+
weight,
|
|
1173
|
+
miniSearchScores,
|
|
1174
|
+
limit: Math.max(maxResults * 2, maxResults + 50)
|
|
1175
|
+
});
|
|
1176
|
+
return ranked.map((row) => ({ ...row, entity_id: scopedEntityId }));
|
|
1177
|
+
})
|
|
1178
|
+
);
|
|
1179
|
+
scored = rankerResultsByEntity.flat();
|
|
1119
1180
|
const scoredIds = new Set(scored.map((s) => s.id));
|
|
1181
|
+
const metadataById = new Map(
|
|
1182
|
+
candidateRows.filter((row) => scoredIds.has(row.id)).map((row) => [row.id, row])
|
|
1183
|
+
);
|
|
1184
|
+
scored = scored.map((row) => {
|
|
1185
|
+
const metadata = metadataById.get(row.id);
|
|
1186
|
+
return {
|
|
1187
|
+
...row,
|
|
1188
|
+
updated_at: metadata?.updated_at ?? null,
|
|
1189
|
+
access_count: metadata?.access_count ?? null
|
|
1190
|
+
};
|
|
1191
|
+
});
|
|
1120
1192
|
const isHybrid = weight !== void 0 && weight < 1;
|
|
1121
1193
|
const maxBackfill = isHybrid ? maxResults : Math.max(0, maxResults - scored.length);
|
|
1122
1194
|
if (maxBackfill > 0) {
|
|
@@ -1194,6 +1266,7 @@ After running the migration SQL, restart your application.`
|
|
|
1194
1266
|
for (const { row, kwScore } of topK) {
|
|
1195
1267
|
scored.push({
|
|
1196
1268
|
id: row.id,
|
|
1269
|
+
entity_id: row.entity_id,
|
|
1197
1270
|
score: (1 - weight) * kwScore,
|
|
1198
1271
|
updated_at: row.updated_at,
|
|
1199
1272
|
access_count: row.access_count
|
|
@@ -1203,7 +1276,7 @@ After running the migration SQL, restart your application.`
|
|
|
1203
1276
|
const omitted = [];
|
|
1204
1277
|
for (const row of candidateRows) {
|
|
1205
1278
|
if (scoredIds.has(row.id)) continue;
|
|
1206
|
-
omitted.push({ id: row.id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
|
|
1279
|
+
omitted.push({ id: row.id, entity_id: row.entity_id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
|
|
1207
1280
|
}
|
|
1208
1281
|
if (omitted.length > 0) {
|
|
1209
1282
|
this._tieBreakSort(omitted);
|
|
@@ -1231,8 +1304,8 @@ After running the migration SQL, restart your application.`
|
|
|
1231
1304
|
const idChunk = rowIds.slice(i, i + chunkSize);
|
|
1232
1305
|
const placeholders = idChunk.map(() => "?").join(",");
|
|
1233
1306
|
const embeddingRows = await this.db.getAllAsync(
|
|
1234
|
-
`SELECT id, embedding_blob, embedding FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND
|
|
1235
|
-
|
|
1307
|
+
`SELECT id, embedding_blob, embedding FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1308
|
+
idChunk
|
|
1236
1309
|
);
|
|
1237
1310
|
for (const row of embeddingRows) {
|
|
1238
1311
|
embeddingsMap.set(row.id, { embedding_blob: row.embedding_blob, embedding: row.embedding });
|
|
@@ -1245,38 +1318,37 @@ After running the migration SQL, restart your application.`
|
|
|
1245
1318
|
}));
|
|
1246
1319
|
}
|
|
1247
1320
|
scored = await this._rankWithJsCosine({
|
|
1248
|
-
entityId,
|
|
1321
|
+
entityId: entityCacheKey,
|
|
1249
1322
|
queryVec,
|
|
1250
1323
|
candidateRows: fallbackRows,
|
|
1251
1324
|
weight,
|
|
1252
1325
|
miniSearchScores,
|
|
1253
1326
|
populateCache,
|
|
1254
|
-
limit:
|
|
1327
|
+
limit: fallbackRows.length,
|
|
1328
|
+
skipSort: true
|
|
1329
|
+
// read() re-sorts after applying tier weights
|
|
1255
1330
|
});
|
|
1256
|
-
scoredAlreadySortedAndLimited = true;
|
|
1257
1331
|
} else if (policy === "keyword") {
|
|
1332
|
+
const scoredEntityIdSet = new Set(scoredEntityIds);
|
|
1258
1333
|
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
1259
|
-
filter: (r) => r.entity_id
|
|
1334
|
+
filter: (r) => scoredEntityIdSet.has(r.entity_id),
|
|
1260
1335
|
combineWith: "OR"
|
|
1261
1336
|
});
|
|
1262
|
-
const
|
|
1263
|
-
const
|
|
1264
|
-
const
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
}
|
|
1270
|
-
scored = topResults.map((r) => {
|
|
1271
|
-
const meta = candidateMap.get(r.id);
|
|
1337
|
+
const keywordOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
1338
|
+
const topResults = msResults.slice(0, keywordOversampledLimit);
|
|
1339
|
+
const topResultIds = new Set(topResults.map((r) => r.id));
|
|
1340
|
+
const candidateMap = new Map(candidateRows.filter((r) => topResultIds.has(r.id)).map((row) => [row.id, row]));
|
|
1341
|
+
scored = topResults.map((result) => {
|
|
1342
|
+
const metadata = candidateMap.get(result.id);
|
|
1343
|
+
const entityForScore = metadata?.entity_id ?? result.entity_id ?? "";
|
|
1272
1344
|
return {
|
|
1273
|
-
id:
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1345
|
+
id: result.id,
|
|
1346
|
+
entity_id: entityForScore,
|
|
1347
|
+
score: result.score ?? 0,
|
|
1348
|
+
access_count: metadata?.access_count ?? null,
|
|
1349
|
+
updated_at: metadata?.updated_at ?? null
|
|
1277
1350
|
};
|
|
1278
1351
|
});
|
|
1279
|
-
usedKeywordFallback = true;
|
|
1280
1352
|
} else {
|
|
1281
1353
|
scored = [];
|
|
1282
1354
|
}
|
|
@@ -1288,46 +1360,44 @@ After running the migration SQL, restart your application.`
|
|
|
1288
1360
|
}
|
|
1289
1361
|
}
|
|
1290
1362
|
} else {
|
|
1363
|
+
const jsCosineNeedsTierSort = sanitizedTierWeights !== void 0 && Object.values(sanitizedTierWeights).some((w) => w !== 1);
|
|
1291
1364
|
scored = await this._rankWithJsCosine({
|
|
1292
|
-
entityId,
|
|
1365
|
+
entityId: entityCacheKey,
|
|
1293
1366
|
queryVec,
|
|
1294
1367
|
candidateRows,
|
|
1295
1368
|
weight,
|
|
1296
1369
|
miniSearchScores,
|
|
1297
1370
|
populateCache,
|
|
1298
|
-
limit: maxResults
|
|
1371
|
+
limit: jsCosineNeedsTierSort ? candidateRows.length : maxResults,
|
|
1372
|
+
skipSort: jsCosineNeedsTierSort
|
|
1373
|
+
// read() re-sorts after applying tier weights
|
|
1299
1374
|
});
|
|
1300
|
-
scoredAlreadySortedAndLimited = true;
|
|
1301
1375
|
}
|
|
1302
1376
|
if (scored.length > 0) {
|
|
1303
|
-
|
|
1304
|
-
|
|
1377
|
+
scored = scored.map((row) => ({
|
|
1378
|
+
...row,
|
|
1379
|
+
score: applyTierWeight(row.score, row.entity_id, sanitizedTierWeights)
|
|
1380
|
+
}));
|
|
1381
|
+
this._tieBreakSort(scored);
|
|
1382
|
+
const selectedScored = scored.slice(0, maxResults);
|
|
1383
|
+
const topIds = selectedScored.map((s) => s.id);
|
|
1384
|
+
if (exposeMetadata && trimmedQuery) {
|
|
1385
|
+
scoreByFactId = new Map(selectedScored.map((s) => [s.id, Number.isFinite(s.score) ? s.score : 0]));
|
|
1305
1386
|
}
|
|
1306
|
-
const topIds = (scoredAlreadySortedAndLimited ? scored : scored.slice(0, maxResults)).map((s) => s.id);
|
|
1307
1387
|
if (topIds.length > 0) {
|
|
1308
|
-
const
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
const
|
|
1312
|
-
const placeholders = idChunk.map(() => "?").join(",");
|
|
1313
|
-
const chunkRows = await this.db.getAllAsync(
|
|
1314
|
-
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1315
|
-
[...idChunk, entityId]
|
|
1316
|
-
);
|
|
1317
|
-
fullRows.push(...chunkRows);
|
|
1318
|
-
}
|
|
1319
|
-
const byId = new Map(fullRows.map((r) => [r.id, r]));
|
|
1320
|
-
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
1321
|
-
if (facts.length < topIds.length) {
|
|
1322
|
-
const missingIds = topIds.filter((id) => !byId.has(id));
|
|
1388
|
+
const facts2 = await this._hydrateFactsByIds(topIds, entityIds);
|
|
1389
|
+
if (facts2.length < topIds.length) {
|
|
1390
|
+
const hydrationById = new Set(facts2.map((f) => f.id));
|
|
1391
|
+
const missingIds = topIds.filter((id) => !hydrationById.has(id));
|
|
1323
1392
|
const missingCount = missingIds.length;
|
|
1324
1393
|
const sample = missingIds.slice(0, 5);
|
|
1325
1394
|
const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
|
|
1326
1395
|
const error = new Error(
|
|
1327
|
-
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs
|
|
1396
|
+
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist in requested entities.` + sampleSuffix
|
|
1328
1397
|
);
|
|
1329
1398
|
this.options.onRetrievalFallback?.(error);
|
|
1330
1399
|
}
|
|
1400
|
+
facts = facts2;
|
|
1331
1401
|
}
|
|
1332
1402
|
if (pendingRankerFallbackError) {
|
|
1333
1403
|
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
@@ -1354,26 +1424,28 @@ After running the migration SQL, restart your application.`
|
|
|
1354
1424
|
this.options.onRetrievalFallback?.(error);
|
|
1355
1425
|
}
|
|
1356
1426
|
}
|
|
1357
|
-
if (!usedEmbed) {
|
|
1427
|
+
if (!usedEmbed && scoredEntityIds.length > 0) {
|
|
1428
|
+
const fallbackEntityIdSet = new Set(scoredEntityIds);
|
|
1429
|
+
const fallbackOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
1358
1430
|
const results = this.miniSearch.search(trimmedQuery, {
|
|
1359
|
-
filter: (r) => r.entity_id
|
|
1431
|
+
filter: (r) => fallbackEntityIdSet.has(r.entity_id),
|
|
1360
1432
|
combineWith: "OR"
|
|
1361
1433
|
});
|
|
1362
|
-
const
|
|
1434
|
+
const candidates = results.slice(0, fallbackOversampledLimit).map((r) => ({
|
|
1435
|
+
id: r.id,
|
|
1436
|
+
entity_id: r.entity_id,
|
|
1437
|
+
score: applyTierWeight(r.score ?? 0, r.entity_id, sanitizedTierWeights),
|
|
1438
|
+
updated_at: null,
|
|
1439
|
+
access_count: null
|
|
1440
|
+
}));
|
|
1441
|
+
this._tieBreakSort(candidates);
|
|
1442
|
+
const topCandidates = candidates.slice(0, maxResults);
|
|
1443
|
+
const topIds = topCandidates.map((c) => c.id);
|
|
1363
1444
|
if (topIds.length > 0) {
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
const idChunk = topIds.slice(i, i + kwChunkSize);
|
|
1368
|
-
const placeholders = idChunk.map(() => "?").join(",");
|
|
1369
|
-
const chunkRows = await this.db.getAllAsync(
|
|
1370
|
-
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1371
|
-
[...idChunk, entityId]
|
|
1372
|
-
);
|
|
1373
|
-
kwRows.push(...chunkRows);
|
|
1445
|
+
facts = await this._hydrateFactsByIds(topIds, entityIds);
|
|
1446
|
+
if (exposeMetadata) {
|
|
1447
|
+
scoreByFactId = new Map(topCandidates.map((c) => [c.id, Number.isFinite(c.score) ? c.score : 0]));
|
|
1374
1448
|
}
|
|
1375
|
-
const byId = new Map(kwRows.map((r) => [r.id, r]));
|
|
1376
|
-
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
1377
1449
|
}
|
|
1378
1450
|
}
|
|
1379
1451
|
if (facts.length > 0) {
|
|
@@ -1392,37 +1464,66 @@ After running the migration SQL, restart your application.`
|
|
|
1392
1464
|
}
|
|
1393
1465
|
}
|
|
1394
1466
|
} else {
|
|
1395
|
-
|
|
1467
|
+
const entityScope = this._entityInClause(entityIds);
|
|
1468
|
+
const rawFacts = await this.db.getAllAsync(
|
|
1396
1469
|
`SELECT * FROM ${this.prefix}entries
|
|
1397
|
-
WHERE
|
|
1470
|
+
WHERE ${entityScope.clause} AND deleted_at IS NULL
|
|
1398
1471
|
ORDER BY updated_at DESC
|
|
1399
1472
|
LIMIT ?`,
|
|
1400
|
-
[
|
|
1473
|
+
[...entityScope.params, maxResults]
|
|
1401
1474
|
);
|
|
1475
|
+
facts = rawFacts.map((f) => {
|
|
1476
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
1477
|
+
return {
|
|
1478
|
+
...rest,
|
|
1479
|
+
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
1480
|
+
};
|
|
1481
|
+
});
|
|
1402
1482
|
}
|
|
1403
1483
|
const [tasks, events] = await Promise.all([
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1484
|
+
(async () => {
|
|
1485
|
+
const entityScope = this._entityInClause(entityIds);
|
|
1486
|
+
const tasksLimit = entityIds.length === 1 ? void 0 : Math.min(20 * entityIds.length, 200);
|
|
1487
|
+
return this.db.getAllAsync(
|
|
1488
|
+
`SELECT * FROM ${this.prefix}tasks
|
|
1489
|
+
WHERE ${entityScope.clause} AND status IN ('pending', 'in_progress') AND deleted_at IS NULL
|
|
1490
|
+
ORDER BY priority DESC, created_at ASC${tasksLimit !== void 0 ? "\n LIMIT ?" : ""}`,
|
|
1491
|
+
tasksLimit !== void 0 ? [...entityScope.params, tasksLimit] : entityScope.params
|
|
1492
|
+
);
|
|
1493
|
+
})(),
|
|
1494
|
+
(async () => {
|
|
1495
|
+
const entityScope = this._entityInClause(entityIds);
|
|
1496
|
+
const eventsLimit = Math.min(10 * entityIds.length, 100);
|
|
1497
|
+
return this.db.getAllAsync(
|
|
1498
|
+
`SELECT * FROM ${this.prefix}events
|
|
1499
|
+
WHERE ${entityScope.clause}
|
|
1500
|
+
ORDER BY created_at DESC
|
|
1501
|
+
LIMIT ?`,
|
|
1502
|
+
[...entityScope.params, eventsLimit]
|
|
1503
|
+
);
|
|
1504
|
+
})()
|
|
1417
1505
|
]);
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1506
|
+
let factScores;
|
|
1507
|
+
if (exposeMetadata && trimmedQuery && scoreByFactId) {
|
|
1508
|
+
factScores = Object.fromEntries(facts.map((fact) => [fact.id, scoreByFactId.get(fact.id) ?? 0]));
|
|
1509
|
+
}
|
|
1510
|
+
const bundle = { facts, tasks, events: events.reverse() };
|
|
1511
|
+
if (exposeMetadata) {
|
|
1512
|
+
bundle.metadata = { query, entityIds };
|
|
1513
|
+
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) bundle.metadata.tierWeights = sanitizedTierWeights;
|
|
1514
|
+
if (factScores && Object.keys(factScores).length > 0) bundle.factScores = factScores;
|
|
1515
|
+
}
|
|
1516
|
+
return bundle;
|
|
1517
|
+
}
|
|
1518
|
+
/**
|
|
1519
|
+
* Returns entity IDs that will participate in scored retrieval.
|
|
1520
|
+
* Excludes zero-weight entities unless includeZeroWeightEntities is true.
|
|
1521
|
+
*/
|
|
1522
|
+
_filterScoredEntities(entityIds, sanitizedTierWeights, includeZeroWeightEntities) {
|
|
1523
|
+
return entityIds.filter((id) => {
|
|
1524
|
+
const w = sanitizedTierWeights?.[id] ?? 1;
|
|
1525
|
+
return includeZeroWeightEntities === true || w !== 0;
|
|
1424
1526
|
});
|
|
1425
|
-
return { facts: parsedFacts, tasks, events: events.reverse() };
|
|
1426
1527
|
}
|
|
1427
1528
|
/**
|
|
1428
1529
|
* Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
|
|
@@ -1436,13 +1537,48 @@ After running the migration SQL, restart your application.`
|
|
|
1436
1537
|
*/
|
|
1437
1538
|
_compareScoredRows(a, b) {
|
|
1438
1539
|
const scoreDiff = b.score - a.score;
|
|
1439
|
-
if (scoreDiff !== 0) return scoreDiff;
|
|
1540
|
+
if (!Number.isNaN(scoreDiff) && scoreDiff !== 0) return scoreDiff;
|
|
1440
1541
|
const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
|
|
1441
1542
|
if (accessCountDiff !== 0) return accessCountDiff;
|
|
1442
1543
|
const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
|
|
1443
1544
|
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
1444
1545
|
return a.id.localeCompare(b.id);
|
|
1445
1546
|
}
|
|
1547
|
+
/**
|
|
1548
|
+
* Build SQL IN clause with placeholders for multiple entity IDs.
|
|
1549
|
+
*/
|
|
1550
|
+
_entityInClause(entityIds) {
|
|
1551
|
+
if (entityIds.length === 0) return { clause: "1=0", params: [] };
|
|
1552
|
+
const placeholders = entityIds.map(() => "?").join(",");
|
|
1553
|
+
return { clause: `entity_id IN (${placeholders})`, params: [...entityIds] };
|
|
1554
|
+
}
|
|
1555
|
+
/**
|
|
1556
|
+
* Hydrate full facts by ID. Pass scopedEntityIds to restrict to requested namespaces in SQL
|
|
1557
|
+
* (defense-in-depth against a rogue VectorRanker returning cross-entity IDs).
|
|
1558
|
+
*/
|
|
1559
|
+
async _hydrateFactsByIds(ids, scopedEntityIds) {
|
|
1560
|
+
const fullRows = [];
|
|
1561
|
+
const chunkSize = 500;
|
|
1562
|
+
const entityClause = scopedEntityIds && scopedEntityIds.length > 0 ? ` AND entity_id IN (${scopedEntityIds.map(() => "?").join(",")})` : "";
|
|
1563
|
+
const entityParams = scopedEntityIds && scopedEntityIds.length > 0 ? [...scopedEntityIds] : [];
|
|
1564
|
+
for (let i = 0; i < ids.length; i += chunkSize) {
|
|
1565
|
+
const idChunk = ids.slice(i, i + chunkSize);
|
|
1566
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1567
|
+
const chunkRows = await this.db.getAllAsync(
|
|
1568
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders})${entityClause} AND deleted_at IS NULL`,
|
|
1569
|
+
[...idChunk, ...entityParams]
|
|
1570
|
+
);
|
|
1571
|
+
fullRows.push(...chunkRows);
|
|
1572
|
+
}
|
|
1573
|
+
const byId = new Map(fullRows.map((row) => [row.id, row]));
|
|
1574
|
+
return ids.map((id) => byId.get(id)).filter((fact) => fact !== void 0).map((fact) => {
|
|
1575
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = fact;
|
|
1576
|
+
return {
|
|
1577
|
+
...rest,
|
|
1578
|
+
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
1579
|
+
};
|
|
1580
|
+
});
|
|
1581
|
+
}
|
|
1446
1582
|
/**
|
|
1447
1583
|
* Strip potentially sensitive data from ranker errors before exposing to host callbacks.
|
|
1448
1584
|
* Preserves error type for debugging but removes message/stack that may contain credentials.
|
|
@@ -1467,7 +1603,7 @@ After running the migration SQL, restart your application.`
|
|
|
1467
1603
|
*/
|
|
1468
1604
|
async _rankWithJsCosine(args) {
|
|
1469
1605
|
const queryVec = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
|
|
1470
|
-
const { entityId, candidateRows, weight, miniSearchScores, populateCache, limit } = args;
|
|
1606
|
+
const { entityId, candidateRows, weight, miniSearchScores, populateCache, limit, skipSort } = args;
|
|
1471
1607
|
let entityCache = this.vectorCache.get(entityId);
|
|
1472
1608
|
const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
1473
1609
|
if (tooLarge && entityCache) {
|
|
@@ -1498,7 +1634,13 @@ After running the migration SQL, restart your application.`
|
|
|
1498
1634
|
} else {
|
|
1499
1635
|
score = -2;
|
|
1500
1636
|
}
|
|
1501
|
-
return {
|
|
1637
|
+
return {
|
|
1638
|
+
id: row.id,
|
|
1639
|
+
entity_id: row.entity_id,
|
|
1640
|
+
score,
|
|
1641
|
+
updated_at: row.updated_at,
|
|
1642
|
+
access_count: row.access_count
|
|
1643
|
+
};
|
|
1502
1644
|
});
|
|
1503
1645
|
if (canCache && entityCache && entityCache.size > 0) {
|
|
1504
1646
|
if (!this.vectorCache.has(entityId)) {
|
|
@@ -1509,7 +1651,7 @@ After running the migration SQL, restart your application.`
|
|
|
1509
1651
|
this.vectorCache.set(entityId, entityCache);
|
|
1510
1652
|
}
|
|
1511
1653
|
}
|
|
1512
|
-
this._tieBreakSort(scored);
|
|
1654
|
+
if (!skipSort) this._tieBreakSort(scored);
|
|
1513
1655
|
return scored.slice(0, limit);
|
|
1514
1656
|
}
|
|
1515
1657
|
/**
|
|
@@ -1518,7 +1660,7 @@ After running the migration SQL, restart your application.`
|
|
|
1518
1660
|
* Returns scored results ready for hybrid blending and tie-break sorting.
|
|
1519
1661
|
*/
|
|
1520
1662
|
async _rankWithVectorRanker(args) {
|
|
1521
|
-
const { entityId, candidateIds, weight, miniSearchScores, limit } = args;
|
|
1663
|
+
const { entityId, candidateIds, candidateRows, weight, miniSearchScores, limit } = args;
|
|
1522
1664
|
const ranker = this.options.vectorRanker;
|
|
1523
1665
|
if (!ranker) {
|
|
1524
1666
|
throw new Error("vectorRanker not configured");
|
|
@@ -1530,7 +1672,7 @@ After running the migration SQL, restart your application.`
|
|
|
1530
1672
|
candidateIds,
|
|
1531
1673
|
limit
|
|
1532
1674
|
});
|
|
1533
|
-
const allowedIds =
|
|
1675
|
+
const allowedIds = new Set(candidateRows.map((row) => row.id));
|
|
1534
1676
|
const seen = /* @__PURE__ */ new Set();
|
|
1535
1677
|
const normalized = [];
|
|
1536
1678
|
for (const r of rankerResults) {
|
|
@@ -1541,13 +1683,19 @@ After running the migration SQL, restart your application.`
|
|
|
1541
1683
|
seen.add(r.id);
|
|
1542
1684
|
normalized.push(r);
|
|
1543
1685
|
}
|
|
1686
|
+
const entityIdByCandidateId = new Map(candidateRows.map((row) => [row.id, row.entity_id]));
|
|
1544
1687
|
const scored = normalized.map((r) => {
|
|
1545
1688
|
let score = r.semanticScore;
|
|
1546
1689
|
if (weight !== void 0) {
|
|
1547
1690
|
const kwScore = miniSearchScores?.get(r.id) ?? 0;
|
|
1548
1691
|
score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
|
|
1549
1692
|
}
|
|
1550
|
-
return {
|
|
1693
|
+
return {
|
|
1694
|
+
id: r.id,
|
|
1695
|
+
entity_id: entityIdByCandidateId.get(r.id),
|
|
1696
|
+
// allowedIds filter above guarantees membership
|
|
1697
|
+
score
|
|
1698
|
+
};
|
|
1551
1699
|
});
|
|
1552
1700
|
return scored;
|
|
1553
1701
|
}
|