@equationalapplications/core-llm-wiki 4.2.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +23 -8
- package/dist/index.d.ts +23 -8
- package/dist/index.js +274 -126
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +274 -126
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -209,6 +209,40 @@ function parseEmbedding(blob, text) {
|
|
|
209
209
|
return null;
|
|
210
210
|
}
|
|
211
211
|
|
|
212
|
+
// src/readOptions.ts
|
|
213
|
+
function normalizeEntityIds(entityId) {
|
|
214
|
+
const input = Array.isArray(entityId) ? entityId : [entityId];
|
|
215
|
+
const seen = /* @__PURE__ */ new Set();
|
|
216
|
+
const normalized = [];
|
|
217
|
+
for (const id of input) {
|
|
218
|
+
if (seen.has(id)) continue;
|
|
219
|
+
seen.add(id);
|
|
220
|
+
normalized.push(id);
|
|
221
|
+
}
|
|
222
|
+
return normalized;
|
|
223
|
+
}
|
|
224
|
+
function sanitizeTierWeights(entityIds, tierWeights) {
|
|
225
|
+
if (tierWeights === void 0) return void 0;
|
|
226
|
+
const sanitized = /* @__PURE__ */ Object.create(null);
|
|
227
|
+
for (const entityId of entityIds) {
|
|
228
|
+
const raw = tierWeights[entityId];
|
|
229
|
+
if (raw === void 0 || !Number.isFinite(raw)) {
|
|
230
|
+
sanitized[entityId] = 1;
|
|
231
|
+
} else {
|
|
232
|
+
sanitized[entityId] = Math.max(0, raw);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
return sanitized;
|
|
236
|
+
}
|
|
237
|
+
function applyTierWeight(score, entityId, sanitizedTierWeights) {
|
|
238
|
+
const weight = sanitizedTierWeights?.[entityId] ?? 1;
|
|
239
|
+
if (weight === 0) return -Infinity;
|
|
240
|
+
return score * weight;
|
|
241
|
+
}
|
|
242
|
+
function shouldExposeReadMetadata(entityId) {
|
|
243
|
+
return Array.isArray(entityId);
|
|
244
|
+
}
|
|
245
|
+
|
|
212
246
|
// src/WikiMemory.ts
|
|
213
247
|
var HOOK_TIMEOUT_MARKER = /* @__PURE__ */ Symbol("WikiMemoryHookTimeout");
|
|
214
248
|
function parseJsonResponse(text) {
|
|
@@ -974,6 +1008,25 @@ After running the migration SQL, restart your application.`
|
|
|
974
1008
|
}
|
|
975
1009
|
async read(entityId, query, options) {
|
|
976
1010
|
const config = this.options.config;
|
|
1011
|
+
const entityIds = normalizeEntityIds(entityId);
|
|
1012
|
+
const sanitizedTierWeights = sanitizeTierWeights(entityIds, options?.tierWeights);
|
|
1013
|
+
const exposeMetadata = shouldExposeReadMetadata(entityId);
|
|
1014
|
+
if (entityIds.length === 0) {
|
|
1015
|
+
const empty = { facts: [], tasks: [], events: [] };
|
|
1016
|
+
if (exposeMetadata) {
|
|
1017
|
+
empty.metadata = { query, entityIds: [] };
|
|
1018
|
+
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) empty.metadata.tierWeights = sanitizedTierWeights;
|
|
1019
|
+
}
|
|
1020
|
+
return empty;
|
|
1021
|
+
}
|
|
1022
|
+
const MAX_ENTITY_IDS = 100;
|
|
1023
|
+
if (entityIds.length > MAX_ENTITY_IDS) {
|
|
1024
|
+
throw new RangeError(`read() accepts at most ${MAX_ENTITY_IDS} entity IDs; received ${entityIds.length}`);
|
|
1025
|
+
}
|
|
1026
|
+
const nullByteId = entityIds.find((id) => id.includes("\0"));
|
|
1027
|
+
if (nullByteId !== void 0) {
|
|
1028
|
+
throw new TypeError(`entity_id values must not contain the null byte (\\x00); got "${nullByteId}"`);
|
|
1029
|
+
}
|
|
977
1030
|
const rawMaxResults = options?.maxResults ?? config?.maxResults ?? config?.maxFtsResults ?? 10;
|
|
978
1031
|
const maxResults = Number.isFinite(rawMaxResults) ? Math.max(0, Math.trunc(rawMaxResults)) : 10;
|
|
979
1032
|
const rawPreFilterLimit = options?.preFilterLimit === null ? void 0 : options?.preFilterLimit ?? config?.preFilterLimit;
|
|
@@ -984,13 +1037,15 @@ After running the migration SQL, restart your application.`
|
|
|
984
1037
|
const embedFn = this.options.llmProvider.embed;
|
|
985
1038
|
const trimmedQuery = query.trim();
|
|
986
1039
|
let facts = [];
|
|
1040
|
+
let scoreByFactId;
|
|
987
1041
|
if (maxResults === 0) ; else if (trimmedQuery) {
|
|
988
1042
|
let usedEmbed = false;
|
|
989
|
-
|
|
1043
|
+
const scoredEntityIds = this._filterScoredEntities(entityIds, sanitizedTierWeights, options?.includeZeroWeightEntities);
|
|
1044
|
+
if (scoredEntityIds.length === 0) {
|
|
1045
|
+
usedEmbed = true;
|
|
1046
|
+
} else if (!skipEmbed && embedFn) {
|
|
990
1047
|
let rankerShouldRethrow = false;
|
|
991
1048
|
let pendingRankerFallbackError;
|
|
992
|
-
let usedKeywordFallback = false;
|
|
993
|
-
let scoredAlreadySortedAndLimited = false;
|
|
994
1049
|
try {
|
|
995
1050
|
const queryVec = await embedFn(trimmedQuery);
|
|
996
1051
|
if (queryVec.length === 0 || !queryVec.every((v) => typeof v === "number" && isFinite(v))) {
|
|
@@ -1009,13 +1064,14 @@ After running the migration SQL, restart your application.`
|
|
|
1009
1064
|
);
|
|
1010
1065
|
}
|
|
1011
1066
|
}
|
|
1067
|
+
const mismatchScope = this._entityInClause(entityIds);
|
|
1012
1068
|
const mismatchedCount = await this.db.getFirstAsync(
|
|
1013
1069
|
`SELECT COUNT(*) AS cnt FROM ${this.prefix}entries
|
|
1014
|
-
WHERE
|
|
1070
|
+
WHERE ${mismatchScope.clause} AND deleted_at IS NULL
|
|
1015
1071
|
AND embedding_blob IS NOT NULL
|
|
1016
1072
|
AND (CAST(length(embedding_blob) AS INTEGER) % 4 = 0)
|
|
1017
1073
|
AND (CAST(length(embedding_blob) AS INTEGER) / 4) != ?`,
|
|
1018
|
-
[
|
|
1074
|
+
[...mismatchScope.params, queryVec.length]
|
|
1019
1075
|
);
|
|
1020
1076
|
if (mismatchedCount && mismatchedCount.cnt > 0) {
|
|
1021
1077
|
throw new Error(
|
|
@@ -1024,12 +1080,13 @@ After running the migration SQL, restart your application.`
|
|
|
1024
1080
|
}
|
|
1025
1081
|
const useRanker = Boolean(this.options.vectorRanker);
|
|
1026
1082
|
let candidateRows;
|
|
1027
|
-
let populateCache =
|
|
1083
|
+
let populateCache = entityIds.length === 1;
|
|
1028
1084
|
let miniSearchScores;
|
|
1029
1085
|
if (effectivePreFilterLimit !== void 0) {
|
|
1030
1086
|
populateCache = false;
|
|
1087
|
+
const entityIdSet = new Set(scoredEntityIds);
|
|
1031
1088
|
const preResults = this.miniSearch.search(trimmedQuery, {
|
|
1032
|
-
filter: (r) => r.entity_id
|
|
1089
|
+
filter: (r) => entityIdSet.has(r.entity_id),
|
|
1033
1090
|
combineWith: "OR"
|
|
1034
1091
|
});
|
|
1035
1092
|
if (preResults.length === 0) {
|
|
@@ -1047,7 +1104,7 @@ After running the migration SQL, restart your application.`
|
|
|
1047
1104
|
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
1048
1105
|
const placeholders = idChunk.map(() => "?").join(",");
|
|
1049
1106
|
const chunkRows = await this.db.getAllAsync(
|
|
1050
|
-
`SELECT id, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1107
|
+
`SELECT id, entity_id, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1051
1108
|
idChunk
|
|
1052
1109
|
);
|
|
1053
1110
|
rows.push(...chunkRows);
|
|
@@ -1059,7 +1116,7 @@ After running the migration SQL, restart your application.`
|
|
|
1059
1116
|
const idChunk = topKIds.slice(i, i + inClauseChunkSize);
|
|
1060
1117
|
const placeholders = idChunk.map(() => "?").join(",");
|
|
1061
1118
|
const chunkRows = await this.db.getAllAsync(
|
|
1062
|
-
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1119
|
+
`SELECT id, entity_id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1063
1120
|
idChunk
|
|
1064
1121
|
);
|
|
1065
1122
|
rows.push(...chunkRows);
|
|
@@ -1074,19 +1131,22 @@ After running the migration SQL, restart your application.`
|
|
|
1074
1131
|
}
|
|
1075
1132
|
} else {
|
|
1076
1133
|
if (useRanker) {
|
|
1134
|
+
const entityScope = this._entityInClause(scoredEntityIds);
|
|
1077
1135
|
candidateRows = await this.db.getAllAsync(
|
|
1078
|
-
`SELECT id, updated_at, access_count FROM ${this.prefix}entries WHERE
|
|
1079
|
-
|
|
1136
|
+
`SELECT id, entity_id, updated_at, access_count FROM ${this.prefix}entries WHERE ${entityScope.clause} AND deleted_at IS NULL`,
|
|
1137
|
+
entityScope.params
|
|
1080
1138
|
);
|
|
1081
1139
|
} else {
|
|
1140
|
+
const entityScope = this._entityInClause(scoredEntityIds);
|
|
1082
1141
|
candidateRows = await this.db.getAllAsync(
|
|
1083
|
-
`SELECT id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE
|
|
1084
|
-
|
|
1142
|
+
`SELECT id, entity_id, embedding_blob, embedding, updated_at, access_count FROM ${this.prefix}entries WHERE ${entityScope.clause} AND deleted_at IS NULL`,
|
|
1143
|
+
entityScope.params
|
|
1085
1144
|
);
|
|
1086
1145
|
}
|
|
1087
1146
|
if (weight !== void 0 && weight < 1) {
|
|
1147
|
+
const entityIdSet = new Set(scoredEntityIds);
|
|
1088
1148
|
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
1089
|
-
filter: (r) => r.entity_id
|
|
1149
|
+
filter: (r) => entityIdSet.has(r.entity_id),
|
|
1090
1150
|
combineWith: "OR"
|
|
1091
1151
|
});
|
|
1092
1152
|
const maxMsScore = Math.max(1, msResults[0]?.score ?? 1);
|
|
@@ -1096,33 +1156,45 @@ After running the migration SQL, restart your application.`
|
|
|
1096
1156
|
if (candidateRows === null) {
|
|
1097
1157
|
usedEmbed = true;
|
|
1098
1158
|
} else {
|
|
1159
|
+
const entityCacheKey = entityIds.length === 1 ? entityIds[0] : entityIds.join("\0");
|
|
1099
1160
|
let scored;
|
|
1100
1161
|
if (useRanker) {
|
|
1101
|
-
const
|
|
1162
|
+
const candidateRowsByEntity = /* @__PURE__ */ new Map();
|
|
1163
|
+
for (const row of candidateRows) {
|
|
1164
|
+
const rows = candidateRowsByEntity.get(row.entity_id) ?? [];
|
|
1165
|
+
rows.push(row);
|
|
1166
|
+
candidateRowsByEntity.set(row.entity_id, rows);
|
|
1167
|
+
}
|
|
1102
1168
|
try {
|
|
1103
|
-
const
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
scored = scored.map((s) => {
|
|
1121
|
-
const meta = metaMap.get(s.id);
|
|
1122
|
-
return { ...s, updated_at: meta?.updated_at ?? null, access_count: meta?.access_count ?? null };
|
|
1123
|
-
});
|
|
1124
|
-
}
|
|
1169
|
+
const rankerResultsByEntity = await Promise.all(
|
|
1170
|
+
scoredEntityIds.filter((id) => (candidateRowsByEntity.get(id)?.length ?? 0) > 0).map(async (scopedEntityId) => {
|
|
1171
|
+
const rowsForEntity = candidateRowsByEntity.get(scopedEntityId) ?? [];
|
|
1172
|
+
const candidateIds = effectivePreFilterLimit !== void 0 ? rowsForEntity.map((row) => row.id) : void 0;
|
|
1173
|
+
const ranked = await this._rankWithVectorRanker({
|
|
1174
|
+
entityId: scopedEntityId,
|
|
1175
|
+
queryVec,
|
|
1176
|
+
candidateIds,
|
|
1177
|
+
candidateRows: rowsForEntity,
|
|
1178
|
+
weight,
|
|
1179
|
+
miniSearchScores,
|
|
1180
|
+
limit: Math.max(maxResults * 2, maxResults + 50)
|
|
1181
|
+
});
|
|
1182
|
+
return ranked.map((row) => ({ ...row, entity_id: scopedEntityId }));
|
|
1183
|
+
})
|
|
1184
|
+
);
|
|
1185
|
+
scored = rankerResultsByEntity.flat();
|
|
1125
1186
|
const scoredIds = new Set(scored.map((s) => s.id));
|
|
1187
|
+
const metadataById = new Map(
|
|
1188
|
+
candidateRows.filter((row) => scoredIds.has(row.id)).map((row) => [row.id, row])
|
|
1189
|
+
);
|
|
1190
|
+
scored = scored.map((row) => {
|
|
1191
|
+
const metadata = metadataById.get(row.id);
|
|
1192
|
+
return {
|
|
1193
|
+
...row,
|
|
1194
|
+
updated_at: metadata?.updated_at ?? null,
|
|
1195
|
+
access_count: metadata?.access_count ?? null
|
|
1196
|
+
};
|
|
1197
|
+
});
|
|
1126
1198
|
const isHybrid = weight !== void 0 && weight < 1;
|
|
1127
1199
|
const maxBackfill = isHybrid ? maxResults : Math.max(0, maxResults - scored.length);
|
|
1128
1200
|
if (maxBackfill > 0) {
|
|
@@ -1200,6 +1272,7 @@ After running the migration SQL, restart your application.`
|
|
|
1200
1272
|
for (const { row, kwScore } of topK) {
|
|
1201
1273
|
scored.push({
|
|
1202
1274
|
id: row.id,
|
|
1275
|
+
entity_id: row.entity_id,
|
|
1203
1276
|
score: (1 - weight) * kwScore,
|
|
1204
1277
|
updated_at: row.updated_at,
|
|
1205
1278
|
access_count: row.access_count
|
|
@@ -1209,7 +1282,7 @@ After running the migration SQL, restart your application.`
|
|
|
1209
1282
|
const omitted = [];
|
|
1210
1283
|
for (const row of candidateRows) {
|
|
1211
1284
|
if (scoredIds.has(row.id)) continue;
|
|
1212
|
-
omitted.push({ id: row.id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
|
|
1285
|
+
omitted.push({ id: row.id, entity_id: row.entity_id, score: -2, updated_at: row.updated_at, access_count: row.access_count });
|
|
1213
1286
|
}
|
|
1214
1287
|
if (omitted.length > 0) {
|
|
1215
1288
|
this._tieBreakSort(omitted);
|
|
@@ -1237,8 +1310,8 @@ After running the migration SQL, restart your application.`
|
|
|
1237
1310
|
const idChunk = rowIds.slice(i, i + chunkSize);
|
|
1238
1311
|
const placeholders = idChunk.map(() => "?").join(",");
|
|
1239
1312
|
const embeddingRows = await this.db.getAllAsync(
|
|
1240
|
-
`SELECT id, embedding_blob, embedding FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND
|
|
1241
|
-
|
|
1313
|
+
`SELECT id, embedding_blob, embedding FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND deleted_at IS NULL`,
|
|
1314
|
+
idChunk
|
|
1242
1315
|
);
|
|
1243
1316
|
for (const row of embeddingRows) {
|
|
1244
1317
|
embeddingsMap.set(row.id, { embedding_blob: row.embedding_blob, embedding: row.embedding });
|
|
@@ -1251,38 +1324,37 @@ After running the migration SQL, restart your application.`
|
|
|
1251
1324
|
}));
|
|
1252
1325
|
}
|
|
1253
1326
|
scored = await this._rankWithJsCosine({
|
|
1254
|
-
entityId,
|
|
1327
|
+
entityId: entityCacheKey,
|
|
1255
1328
|
queryVec,
|
|
1256
1329
|
candidateRows: fallbackRows,
|
|
1257
1330
|
weight,
|
|
1258
1331
|
miniSearchScores,
|
|
1259
1332
|
populateCache,
|
|
1260
|
-
limit:
|
|
1333
|
+
limit: fallbackRows.length,
|
|
1334
|
+
skipSort: true
|
|
1335
|
+
// read() re-sorts after applying tier weights
|
|
1261
1336
|
});
|
|
1262
|
-
scoredAlreadySortedAndLimited = true;
|
|
1263
1337
|
} else if (policy === "keyword") {
|
|
1338
|
+
const scoredEntityIdSet = new Set(scoredEntityIds);
|
|
1264
1339
|
const msResults = this.miniSearch.search(trimmedQuery, {
|
|
1265
|
-
filter: (r) => r.entity_id
|
|
1340
|
+
filter: (r) => scoredEntityIdSet.has(r.entity_id),
|
|
1266
1341
|
combineWith: "OR"
|
|
1267
1342
|
});
|
|
1268
|
-
const
|
|
1269
|
-
const
|
|
1270
|
-
const
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
}
|
|
1276
|
-
scored = topResults.map((r) => {
|
|
1277
|
-
const meta = candidateMap.get(r.id);
|
|
1343
|
+
const keywordOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
1344
|
+
const topResults = msResults.slice(0, keywordOversampledLimit);
|
|
1345
|
+
const topResultIds = new Set(topResults.map((r) => r.id));
|
|
1346
|
+
const candidateMap = new Map(candidateRows.filter((r) => topResultIds.has(r.id)).map((row) => [row.id, row]));
|
|
1347
|
+
scored = topResults.map((result) => {
|
|
1348
|
+
const metadata = candidateMap.get(result.id);
|
|
1349
|
+
const entityForScore = metadata?.entity_id ?? result.entity_id ?? "";
|
|
1278
1350
|
return {
|
|
1279
|
-
id:
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1351
|
+
id: result.id,
|
|
1352
|
+
entity_id: entityForScore,
|
|
1353
|
+
score: result.score ?? 0,
|
|
1354
|
+
access_count: metadata?.access_count ?? null,
|
|
1355
|
+
updated_at: metadata?.updated_at ?? null
|
|
1283
1356
|
};
|
|
1284
1357
|
});
|
|
1285
|
-
usedKeywordFallback = true;
|
|
1286
1358
|
} else {
|
|
1287
1359
|
scored = [];
|
|
1288
1360
|
}
|
|
@@ -1294,46 +1366,44 @@ After running the migration SQL, restart your application.`
|
|
|
1294
1366
|
}
|
|
1295
1367
|
}
|
|
1296
1368
|
} else {
|
|
1369
|
+
const jsCosineNeedsTierSort = sanitizedTierWeights !== void 0 && Object.values(sanitizedTierWeights).some((w) => w !== 1);
|
|
1297
1370
|
scored = await this._rankWithJsCosine({
|
|
1298
|
-
entityId,
|
|
1371
|
+
entityId: entityCacheKey,
|
|
1299
1372
|
queryVec,
|
|
1300
1373
|
candidateRows,
|
|
1301
1374
|
weight,
|
|
1302
1375
|
miniSearchScores,
|
|
1303
1376
|
populateCache,
|
|
1304
|
-
limit: maxResults
|
|
1377
|
+
limit: jsCosineNeedsTierSort ? candidateRows.length : maxResults,
|
|
1378
|
+
skipSort: jsCosineNeedsTierSort
|
|
1379
|
+
// read() re-sorts after applying tier weights
|
|
1305
1380
|
});
|
|
1306
|
-
scoredAlreadySortedAndLimited = true;
|
|
1307
1381
|
}
|
|
1308
1382
|
if (scored.length > 0) {
|
|
1309
|
-
|
|
1310
|
-
|
|
1383
|
+
scored = scored.map((row) => ({
|
|
1384
|
+
...row,
|
|
1385
|
+
score: applyTierWeight(row.score, row.entity_id, sanitizedTierWeights)
|
|
1386
|
+
}));
|
|
1387
|
+
this._tieBreakSort(scored);
|
|
1388
|
+
const selectedScored = scored.slice(0, maxResults);
|
|
1389
|
+
const topIds = selectedScored.map((s) => s.id);
|
|
1390
|
+
if (exposeMetadata && trimmedQuery) {
|
|
1391
|
+
scoreByFactId = new Map(selectedScored.map((s) => [s.id, Number.isFinite(s.score) ? s.score : 0]));
|
|
1311
1392
|
}
|
|
1312
|
-
const topIds = (scoredAlreadySortedAndLimited ? scored : scored.slice(0, maxResults)).map((s) => s.id);
|
|
1313
1393
|
if (topIds.length > 0) {
|
|
1314
|
-
const
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
const
|
|
1318
|
-
const placeholders = idChunk.map(() => "?").join(",");
|
|
1319
|
-
const chunkRows = await this.db.getAllAsync(
|
|
1320
|
-
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1321
|
-
[...idChunk, entityId]
|
|
1322
|
-
);
|
|
1323
|
-
fullRows.push(...chunkRows);
|
|
1324
|
-
}
|
|
1325
|
-
const byId = new Map(fullRows.map((r) => [r.id, r]));
|
|
1326
|
-
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
1327
|
-
if (facts.length < topIds.length) {
|
|
1328
|
-
const missingIds = topIds.filter((id) => !byId.has(id));
|
|
1394
|
+
const facts2 = await this._hydrateFactsByIds(topIds, entityIds);
|
|
1395
|
+
if (facts2.length < topIds.length) {
|
|
1396
|
+
const hydrationById = new Set(facts2.map((f) => f.id));
|
|
1397
|
+
const missingIds = topIds.filter((id) => !hydrationById.has(id));
|
|
1329
1398
|
const missingCount = missingIds.length;
|
|
1330
1399
|
const sample = missingIds.slice(0, 5);
|
|
1331
1400
|
const sampleSuffix = sample.length > 0 ? ` Missing ID sample: ${sample.join(", ")}${missingIds.length > sample.length ? ", ..." : ""}.` : "";
|
|
1332
1401
|
const error = new Error(
|
|
1333
|
-
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs
|
|
1402
|
+
`Phase 2 fact hydration returned ${missingCount} fewer row(s) than ranked IDs. Rows may have been concurrently soft-deleted or filtered by deleted_at during hydration, or vector ranker output may include IDs that do not exist in requested entities.` + sampleSuffix
|
|
1334
1403
|
);
|
|
1335
1404
|
this.options.onRetrievalFallback?.(error);
|
|
1336
1405
|
}
|
|
1406
|
+
facts = facts2;
|
|
1337
1407
|
}
|
|
1338
1408
|
if (pendingRankerFallbackError) {
|
|
1339
1409
|
this.options.onRetrievalFallback?.(pendingRankerFallbackError);
|
|
@@ -1360,26 +1430,28 @@ After running the migration SQL, restart your application.`
|
|
|
1360
1430
|
this.options.onRetrievalFallback?.(error);
|
|
1361
1431
|
}
|
|
1362
1432
|
}
|
|
1363
|
-
if (!usedEmbed) {
|
|
1433
|
+
if (!usedEmbed && scoredEntityIds.length > 0) {
|
|
1434
|
+
const fallbackEntityIdSet = new Set(scoredEntityIds);
|
|
1435
|
+
const fallbackOversampledLimit = Math.max(maxResults * 2, maxResults + 50);
|
|
1364
1436
|
const results = this.miniSearch.search(trimmedQuery, {
|
|
1365
|
-
filter: (r) => r.entity_id
|
|
1437
|
+
filter: (r) => fallbackEntityIdSet.has(r.entity_id),
|
|
1366
1438
|
combineWith: "OR"
|
|
1367
1439
|
});
|
|
1368
|
-
const
|
|
1440
|
+
const candidates = results.slice(0, fallbackOversampledLimit).map((r) => ({
|
|
1441
|
+
id: r.id,
|
|
1442
|
+
entity_id: r.entity_id,
|
|
1443
|
+
score: applyTierWeight(r.score ?? 0, r.entity_id, sanitizedTierWeights),
|
|
1444
|
+
updated_at: null,
|
|
1445
|
+
access_count: null
|
|
1446
|
+
}));
|
|
1447
|
+
this._tieBreakSort(candidates);
|
|
1448
|
+
const topCandidates = candidates.slice(0, maxResults);
|
|
1449
|
+
const topIds = topCandidates.map((c) => c.id);
|
|
1369
1450
|
if (topIds.length > 0) {
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
const idChunk = topIds.slice(i, i + kwChunkSize);
|
|
1374
|
-
const placeholders = idChunk.map(() => "?").join(",");
|
|
1375
|
-
const chunkRows = await this.db.getAllAsync(
|
|
1376
|
-
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders}) AND entity_id = ? AND deleted_at IS NULL`,
|
|
1377
|
-
[...idChunk, entityId]
|
|
1378
|
-
);
|
|
1379
|
-
kwRows.push(...chunkRows);
|
|
1451
|
+
facts = await this._hydrateFactsByIds(topIds, entityIds);
|
|
1452
|
+
if (exposeMetadata) {
|
|
1453
|
+
scoreByFactId = new Map(topCandidates.map((c) => [c.id, Number.isFinite(c.score) ? c.score : 0]));
|
|
1380
1454
|
}
|
|
1381
|
-
const byId = new Map(kwRows.map((r) => [r.id, r]));
|
|
1382
|
-
facts = topIds.map((id) => byId.get(id)).filter((f) => f !== void 0);
|
|
1383
1455
|
}
|
|
1384
1456
|
}
|
|
1385
1457
|
if (facts.length > 0) {
|
|
@@ -1398,37 +1470,66 @@ After running the migration SQL, restart your application.`
|
|
|
1398
1470
|
}
|
|
1399
1471
|
}
|
|
1400
1472
|
} else {
|
|
1401
|
-
|
|
1473
|
+
const entityScope = this._entityInClause(entityIds);
|
|
1474
|
+
const rawFacts = await this.db.getAllAsync(
|
|
1402
1475
|
`SELECT * FROM ${this.prefix}entries
|
|
1403
|
-
WHERE
|
|
1476
|
+
WHERE ${entityScope.clause} AND deleted_at IS NULL
|
|
1404
1477
|
ORDER BY updated_at DESC
|
|
1405
1478
|
LIMIT ?`,
|
|
1406
|
-
[
|
|
1479
|
+
[...entityScope.params, maxResults]
|
|
1407
1480
|
);
|
|
1481
|
+
facts = rawFacts.map((f) => {
|
|
1482
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = f;
|
|
1483
|
+
return {
|
|
1484
|
+
...rest,
|
|
1485
|
+
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
1486
|
+
};
|
|
1487
|
+
});
|
|
1408
1488
|
}
|
|
1409
1489
|
const [tasks, events] = await Promise.all([
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1490
|
+
(async () => {
|
|
1491
|
+
const entityScope = this._entityInClause(entityIds);
|
|
1492
|
+
const tasksLimit = entityIds.length === 1 ? void 0 : Math.min(20 * entityIds.length, 200);
|
|
1493
|
+
return this.db.getAllAsync(
|
|
1494
|
+
`SELECT * FROM ${this.prefix}tasks
|
|
1495
|
+
WHERE ${entityScope.clause} AND status IN ('pending', 'in_progress') AND deleted_at IS NULL
|
|
1496
|
+
ORDER BY priority DESC, created_at ASC${tasksLimit !== void 0 ? "\n LIMIT ?" : ""}`,
|
|
1497
|
+
tasksLimit !== void 0 ? [...entityScope.params, tasksLimit] : entityScope.params
|
|
1498
|
+
);
|
|
1499
|
+
})(),
|
|
1500
|
+
(async () => {
|
|
1501
|
+
const entityScope = this._entityInClause(entityIds);
|
|
1502
|
+
const eventsLimit = Math.min(10 * entityIds.length, 100);
|
|
1503
|
+
return this.db.getAllAsync(
|
|
1504
|
+
`SELECT * FROM ${this.prefix}events
|
|
1505
|
+
WHERE ${entityScope.clause}
|
|
1506
|
+
ORDER BY created_at DESC
|
|
1507
|
+
LIMIT ?`,
|
|
1508
|
+
[...entityScope.params, eventsLimit]
|
|
1509
|
+
);
|
|
1510
|
+
})()
|
|
1423
1511
|
]);
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1512
|
+
let factScores;
|
|
1513
|
+
if (exposeMetadata && trimmedQuery && scoreByFactId) {
|
|
1514
|
+
factScores = Object.fromEntries(facts.map((fact) => [fact.id, scoreByFactId.get(fact.id) ?? 0]));
|
|
1515
|
+
}
|
|
1516
|
+
const bundle = { facts, tasks, events: events.reverse() };
|
|
1517
|
+
if (exposeMetadata) {
|
|
1518
|
+
bundle.metadata = { query, entityIds };
|
|
1519
|
+
if (sanitizedTierWeights && Object.keys(sanitizedTierWeights).length > 0) bundle.metadata.tierWeights = sanitizedTierWeights;
|
|
1520
|
+
if (factScores && Object.keys(factScores).length > 0) bundle.factScores = factScores;
|
|
1521
|
+
}
|
|
1522
|
+
return bundle;
|
|
1523
|
+
}
|
|
1524
|
+
/**
|
|
1525
|
+
* Returns entity IDs that will participate in scored retrieval.
|
|
1526
|
+
* Excludes zero-weight entities unless includeZeroWeightEntities is true.
|
|
1527
|
+
*/
|
|
1528
|
+
_filterScoredEntities(entityIds, sanitizedTierWeights, includeZeroWeightEntities) {
|
|
1529
|
+
return entityIds.filter((id) => {
|
|
1530
|
+
const w = sanitizedTierWeights?.[id] ?? 1;
|
|
1531
|
+
return includeZeroWeightEntities === true || w !== 0;
|
|
1430
1532
|
});
|
|
1431
|
-
return { facts: parsedFacts, tasks, events: events.reverse() };
|
|
1432
1533
|
}
|
|
1433
1534
|
/**
|
|
1434
1535
|
* Stable tie-break sort: score desc → access_count desc → updated_at desc → id asc.
|
|
@@ -1442,13 +1543,48 @@ After running the migration SQL, restart your application.`
|
|
|
1442
1543
|
*/
|
|
1443
1544
|
_compareScoredRows(a, b) {
|
|
1444
1545
|
const scoreDiff = b.score - a.score;
|
|
1445
|
-
if (scoreDiff !== 0) return scoreDiff;
|
|
1546
|
+
if (!Number.isNaN(scoreDiff) && scoreDiff !== 0) return scoreDiff;
|
|
1446
1547
|
const accessCountDiff = (b.access_count ?? 0) - (a.access_count ?? 0);
|
|
1447
1548
|
if (accessCountDiff !== 0) return accessCountDiff;
|
|
1448
1549
|
const updatedAtDiff = (b.updated_at ?? 0) - (a.updated_at ?? 0);
|
|
1449
1550
|
if (updatedAtDiff !== 0) return updatedAtDiff;
|
|
1450
1551
|
return a.id.localeCompare(b.id);
|
|
1451
1552
|
}
|
|
1553
|
+
/**
|
|
1554
|
+
* Build SQL IN clause with placeholders for multiple entity IDs.
|
|
1555
|
+
*/
|
|
1556
|
+
_entityInClause(entityIds) {
|
|
1557
|
+
if (entityIds.length === 0) return { clause: "1=0", params: [] };
|
|
1558
|
+
const placeholders = entityIds.map(() => "?").join(",");
|
|
1559
|
+
return { clause: `entity_id IN (${placeholders})`, params: [...entityIds] };
|
|
1560
|
+
}
|
|
1561
|
+
/**
|
|
1562
|
+
* Hydrate full facts by ID. Pass scopedEntityIds to restrict to requested namespaces in SQL
|
|
1563
|
+
* (defense-in-depth against a rogue VectorRanker returning cross-entity IDs).
|
|
1564
|
+
*/
|
|
1565
|
+
async _hydrateFactsByIds(ids, scopedEntityIds) {
|
|
1566
|
+
const fullRows = [];
|
|
1567
|
+
const chunkSize = 500;
|
|
1568
|
+
const entityClause = scopedEntityIds && scopedEntityIds.length > 0 ? ` AND entity_id IN (${scopedEntityIds.map(() => "?").join(",")})` : "";
|
|
1569
|
+
const entityParams = scopedEntityIds && scopedEntityIds.length > 0 ? [...scopedEntityIds] : [];
|
|
1570
|
+
for (let i = 0; i < ids.length; i += chunkSize) {
|
|
1571
|
+
const idChunk = ids.slice(i, i + chunkSize);
|
|
1572
|
+
const placeholders = idChunk.map(() => "?").join(",");
|
|
1573
|
+
const chunkRows = await this.db.getAllAsync(
|
|
1574
|
+
`SELECT * FROM ${this.prefix}entries WHERE id IN (${placeholders})${entityClause} AND deleted_at IS NULL`,
|
|
1575
|
+
[...idChunk, ...entityParams]
|
|
1576
|
+
);
|
|
1577
|
+
fullRows.push(...chunkRows);
|
|
1578
|
+
}
|
|
1579
|
+
const byId = new Map(fullRows.map((row) => [row.id, row]));
|
|
1580
|
+
return ids.map((id) => byId.get(id)).filter((fact) => fact !== void 0).map((fact) => {
|
|
1581
|
+
const { embedding: _embedding, embedding_blob: _blob, ...rest } = fact;
|
|
1582
|
+
return {
|
|
1583
|
+
...rest,
|
|
1584
|
+
tags: typeof rest.tags === "string" ? JSON.parse(rest.tags) : rest.tags
|
|
1585
|
+
};
|
|
1586
|
+
});
|
|
1587
|
+
}
|
|
1452
1588
|
/**
|
|
1453
1589
|
* Strip potentially sensitive data from ranker errors before exposing to host callbacks.
|
|
1454
1590
|
* Preserves error type for debugging but removes message/stack that may contain credentials.
|
|
@@ -1473,7 +1609,7 @@ After running the migration SQL, restart your application.`
|
|
|
1473
1609
|
*/
|
|
1474
1610
|
async _rankWithJsCosine(args) {
|
|
1475
1611
|
const queryVec = args.queryVec instanceof Float32Array ? args.queryVec.slice() : Array.from(args.queryVec);
|
|
1476
|
-
const { entityId, candidateRows, weight, miniSearchScores, populateCache, limit } = args;
|
|
1612
|
+
const { entityId, candidateRows, weight, miniSearchScores, populateCache, limit, skipSort } = args;
|
|
1477
1613
|
let entityCache = this.vectorCache.get(entityId);
|
|
1478
1614
|
const tooLarge = populateCache && candidateRows.length > _WikiMemory.MAX_VECTOR_CACHE_FACTS_PER_ENTITY;
|
|
1479
1615
|
if (tooLarge && entityCache) {
|
|
@@ -1504,7 +1640,13 @@ After running the migration SQL, restart your application.`
|
|
|
1504
1640
|
} else {
|
|
1505
1641
|
score = -2;
|
|
1506
1642
|
}
|
|
1507
|
-
return {
|
|
1643
|
+
return {
|
|
1644
|
+
id: row.id,
|
|
1645
|
+
entity_id: row.entity_id,
|
|
1646
|
+
score,
|
|
1647
|
+
updated_at: row.updated_at,
|
|
1648
|
+
access_count: row.access_count
|
|
1649
|
+
};
|
|
1508
1650
|
});
|
|
1509
1651
|
if (canCache && entityCache && entityCache.size > 0) {
|
|
1510
1652
|
if (!this.vectorCache.has(entityId)) {
|
|
@@ -1515,7 +1657,7 @@ After running the migration SQL, restart your application.`
|
|
|
1515
1657
|
this.vectorCache.set(entityId, entityCache);
|
|
1516
1658
|
}
|
|
1517
1659
|
}
|
|
1518
|
-
this._tieBreakSort(scored);
|
|
1660
|
+
if (!skipSort) this._tieBreakSort(scored);
|
|
1519
1661
|
return scored.slice(0, limit);
|
|
1520
1662
|
}
|
|
1521
1663
|
/**
|
|
@@ -1524,7 +1666,7 @@ After running the migration SQL, restart your application.`
|
|
|
1524
1666
|
* Returns scored results ready for hybrid blending and tie-break sorting.
|
|
1525
1667
|
*/
|
|
1526
1668
|
async _rankWithVectorRanker(args) {
|
|
1527
|
-
const { entityId, candidateIds, weight, miniSearchScores, limit } = args;
|
|
1669
|
+
const { entityId, candidateIds, candidateRows, weight, miniSearchScores, limit } = args;
|
|
1528
1670
|
const ranker = this.options.vectorRanker;
|
|
1529
1671
|
if (!ranker) {
|
|
1530
1672
|
throw new Error("vectorRanker not configured");
|
|
@@ -1536,7 +1678,7 @@ After running the migration SQL, restart your application.`
|
|
|
1536
1678
|
candidateIds,
|
|
1537
1679
|
limit
|
|
1538
1680
|
});
|
|
1539
|
-
const allowedIds =
|
|
1681
|
+
const allowedIds = new Set(candidateRows.map((row) => row.id));
|
|
1540
1682
|
const seen = /* @__PURE__ */ new Set();
|
|
1541
1683
|
const normalized = [];
|
|
1542
1684
|
for (const r of rankerResults) {
|
|
@@ -1547,13 +1689,19 @@ After running the migration SQL, restart your application.`
|
|
|
1547
1689
|
seen.add(r.id);
|
|
1548
1690
|
normalized.push(r);
|
|
1549
1691
|
}
|
|
1692
|
+
const entityIdByCandidateId = new Map(candidateRows.map((row) => [row.id, row.entity_id]));
|
|
1550
1693
|
const scored = normalized.map((r) => {
|
|
1551
1694
|
let score = r.semanticScore;
|
|
1552
1695
|
if (weight !== void 0) {
|
|
1553
1696
|
const kwScore = miniSearchScores?.get(r.id) ?? 0;
|
|
1554
1697
|
score = weight * Math.max(0, r.semanticScore) + (1 - weight) * kwScore;
|
|
1555
1698
|
}
|
|
1556
|
-
return {
|
|
1699
|
+
return {
|
|
1700
|
+
id: r.id,
|
|
1701
|
+
entity_id: entityIdByCandidateId.get(r.id),
|
|
1702
|
+
// allowedIds filter above guarantees membership
|
|
1703
|
+
score
|
|
1704
|
+
};
|
|
1557
1705
|
});
|
|
1558
1706
|
return scored;
|
|
1559
1707
|
}
|