@agenr/agenr-plugin 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-MEHOGUZE.js → chunk-6T5RXGIR.js} +989 -70
- package/dist/{chunk-Y2BC7RCE.js → chunk-7TDALVPY.js} +1434 -305
- package/dist/{chunk-XD3446YW.js → chunk-DGV6D6Q3.js} +2 -21
- package/dist/chunk-IMQIJPIP.js +886 -0
- package/dist/chunk-MJIB6J5S.js +3059 -0
- package/dist/index.js +1466 -124
- package/openclaw.plugin.json +86 -2
- package/package.json +1 -1
|
@@ -1,3 +1,176 @@
|
|
|
1
|
+
// src/core/recall/cross-encoder.ts
|
|
2
|
+
var DEFAULT_CROSS_ENCODER_TOP_K = 10;
|
|
3
|
+
var DEFAULT_CROSS_ENCODER_ALPHA = 0.6;
|
|
4
|
+
async function applyCrossEncoderRerank(options) {
|
|
5
|
+
const candidates = [...options.candidates];
|
|
6
|
+
const k = resolveTopK(options.topK, candidates.length);
|
|
7
|
+
const alpha = resolveAlpha(options.alpha);
|
|
8
|
+
const startedAt = Date.now();
|
|
9
|
+
const passthrough = (degradedReason) => ({
|
|
10
|
+
applied: false,
|
|
11
|
+
k,
|
|
12
|
+
alpha,
|
|
13
|
+
latencyMs: elapsedMs(startedAt),
|
|
14
|
+
...degradedReason ? { degradedReason } : {},
|
|
15
|
+
candidates: candidates.map((candidate) => ({
|
|
16
|
+
candidate: candidate.candidate,
|
|
17
|
+
score: candidate.score
|
|
18
|
+
})),
|
|
19
|
+
rescoredIds: []
|
|
20
|
+
});
|
|
21
|
+
if (options.disabled === true) {
|
|
22
|
+
return passthrough("disabled");
|
|
23
|
+
}
|
|
24
|
+
if (!options.port) {
|
|
25
|
+
return passthrough("not_configured");
|
|
26
|
+
}
|
|
27
|
+
if (candidates.length === 0) {
|
|
28
|
+
return passthrough("no_candidates");
|
|
29
|
+
}
|
|
30
|
+
const shortlist = candidates.slice(0, k);
|
|
31
|
+
const tail = candidates.slice(k);
|
|
32
|
+
const query = options.query.trim();
|
|
33
|
+
if (query.length === 0 || shortlist.length === 0) {
|
|
34
|
+
return passthrough("no_candidates");
|
|
35
|
+
}
|
|
36
|
+
let scores;
|
|
37
|
+
try {
|
|
38
|
+
scores = await options.port.rank(
|
|
39
|
+
query,
|
|
40
|
+
shortlist.map((candidate) => ({ id: candidate.id, text: candidate.text }))
|
|
41
|
+
);
|
|
42
|
+
} catch {
|
|
43
|
+
return passthrough("provider_error");
|
|
44
|
+
}
|
|
45
|
+
if (!Array.isArray(scores)) {
|
|
46
|
+
return passthrough("provider_error");
|
|
47
|
+
}
|
|
48
|
+
const scoreById = /* @__PURE__ */ new Map();
|
|
49
|
+
for (const entry of scores) {
|
|
50
|
+
if (!entry || typeof entry.id !== "string" || typeof entry.score !== "number" || !Number.isFinite(entry.score)) {
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
scoreById.set(entry.id, clampUnit(entry.score));
|
|
54
|
+
}
|
|
55
|
+
if (scoreById.size === 0) {
|
|
56
|
+
return passthrough("provider_error");
|
|
57
|
+
}
|
|
58
|
+
const rescoredIds = [];
|
|
59
|
+
const rescoredShortlist = shortlist.map((candidate) => {
|
|
60
|
+
const crossEncoderScore = scoreById.get(candidate.id);
|
|
61
|
+
if (crossEncoderScore === void 0) {
|
|
62
|
+
return {
|
|
63
|
+
candidate: candidate.candidate,
|
|
64
|
+
score: candidate.score,
|
|
65
|
+
id: candidate.id,
|
|
66
|
+
rescored: false
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
const nextScore = clampUnit(alpha * crossEncoderScore + (1 - alpha) * candidate.score);
|
|
70
|
+
if (nextScore !== candidate.score) {
|
|
71
|
+
rescoredIds.push(candidate.id);
|
|
72
|
+
}
|
|
73
|
+
return {
|
|
74
|
+
candidate: candidate.candidate,
|
|
75
|
+
score: nextScore,
|
|
76
|
+
crossEncoderScore,
|
|
77
|
+
id: candidate.id,
|
|
78
|
+
rescored: true
|
|
79
|
+
};
|
|
80
|
+
});
|
|
81
|
+
const reorderedShortlist = [...rescoredShortlist].sort((left, right) => {
|
|
82
|
+
if (left.score !== right.score) {
|
|
83
|
+
return right.score - left.score;
|
|
84
|
+
}
|
|
85
|
+
return shortlist.findIndex((candidate) => candidate.id === left.id) - shortlist.findIndex((candidate) => candidate.id === right.id);
|
|
86
|
+
});
|
|
87
|
+
const shortlistOutput = reorderedShortlist.map(({ candidate, score, crossEncoderScore }) => ({
|
|
88
|
+
candidate,
|
|
89
|
+
score,
|
|
90
|
+
...typeof crossEncoderScore === "number" ? { crossEncoderScore } : {}
|
|
91
|
+
}));
|
|
92
|
+
const tailOutput = tail.map((candidate) => ({
|
|
93
|
+
candidate: candidate.candidate,
|
|
94
|
+
score: candidate.score
|
|
95
|
+
}));
|
|
96
|
+
return {
|
|
97
|
+
applied: true,
|
|
98
|
+
k,
|
|
99
|
+
alpha,
|
|
100
|
+
latencyMs: elapsedMs(startedAt),
|
|
101
|
+
candidates: [...shortlistOutput, ...tailOutput],
|
|
102
|
+
rescoredIds
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
function resolveTopK(value, total) {
|
|
106
|
+
if (total <= 0) {
|
|
107
|
+
return 0;
|
|
108
|
+
}
|
|
109
|
+
const raw = typeof value === "number" && Number.isFinite(value) && value > 0 ? Math.floor(value) : DEFAULT_CROSS_ENCODER_TOP_K;
|
|
110
|
+
return Math.max(1, Math.min(total, raw));
|
|
111
|
+
}
|
|
112
|
+
function resolveAlpha(value) {
|
|
113
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
114
|
+
return DEFAULT_CROSS_ENCODER_ALPHA;
|
|
115
|
+
}
|
|
116
|
+
return clampUnit(value);
|
|
117
|
+
}
|
|
118
|
+
function clampUnit(value) {
|
|
119
|
+
if (!Number.isFinite(value) || value <= 0) {
|
|
120
|
+
return 0;
|
|
121
|
+
}
|
|
122
|
+
return value >= 1 ? 1 : value;
|
|
123
|
+
}
|
|
124
|
+
function elapsedMs(startedAt) {
|
|
125
|
+
return Math.max(0, Date.now() - startedAt);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// src/core/recall/fusion.ts
|
|
129
|
+
var DEFAULT_RRF_RANK_CONSTANT = 60;
|
|
130
|
+
var DEFAULT_RRF_SMALL_POOL_RANK_CONSTANT = 8;
|
|
131
|
+
var SMALL_POOL_RRF_POOL_SIZE = 4;
|
|
132
|
+
function rrfFuse(channels, rankConstant = DEFAULT_RRF_RANK_CONSTANT) {
|
|
133
|
+
const k = sanitizeRankConstant(rankConstant);
|
|
134
|
+
const nonEmptyChannels = channels.filter((channel) => channel.length > 0);
|
|
135
|
+
const scores = /* @__PURE__ */ new Map();
|
|
136
|
+
if (nonEmptyChannels.length === 0) {
|
|
137
|
+
return scores;
|
|
138
|
+
}
|
|
139
|
+
for (const channel of nonEmptyChannels) {
|
|
140
|
+
const seenInChannel = /* @__PURE__ */ new Set();
|
|
141
|
+
let compactedRank = 0;
|
|
142
|
+
for (const id of channel) {
|
|
143
|
+
if (id === void 0 || seenInChannel.has(id)) {
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
146
|
+
seenInChannel.add(id);
|
|
147
|
+
const contribution = 1 / (compactedRank + k);
|
|
148
|
+
scores.set(id, (scores.get(id) ?? 0) + contribution);
|
|
149
|
+
compactedRank += 1;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
const normalizationDenominator = nonEmptyChannels.length * (1 / k);
|
|
153
|
+
for (const [id, score] of scores) {
|
|
154
|
+
scores.set(id, clampUnit2(score / normalizationDenominator));
|
|
155
|
+
}
|
|
156
|
+
return scores;
|
|
157
|
+
}
|
|
158
|
+
function rrfFuseVectorLexical(vectorRanks, lexicalRanks, rankConstant = DEFAULT_RRF_RANK_CONSTANT) {
|
|
159
|
+
return rrfFuse([vectorRanks, lexicalRanks], rankConstant);
|
|
160
|
+
}
|
|
161
|
+
function sanitizeRankConstant(value) {
|
|
162
|
+
if (!Number.isFinite(value) || value <= 0) {
|
|
163
|
+
return DEFAULT_RRF_RANK_CONSTANT;
|
|
164
|
+
}
|
|
165
|
+
return value;
|
|
166
|
+
}
|
|
167
|
+
function clampUnit2(value) {
|
|
168
|
+
if (!Number.isFinite(value) || value <= 0) {
|
|
169
|
+
return 0;
|
|
170
|
+
}
|
|
171
|
+
return value >= 1 ? 1 : value;
|
|
172
|
+
}
|
|
173
|
+
|
|
1
174
|
// src/core/recall/lexical.ts
|
|
2
175
|
var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
3
176
|
"the",
|
|
@@ -205,7 +378,7 @@ function recencyScore(createdAt, expiry, now = /* @__PURE__ */ new Date()) {
|
|
|
205
378
|
}
|
|
206
379
|
const halfLifeDays = expiry === "permanent" ? 365 : 30;
|
|
207
380
|
const daysOld = Math.max(0, (nowDate.getTime() - createdDate.getTime()) / DAY_IN_MILLISECONDS);
|
|
208
|
-
return
|
|
381
|
+
return clampUnit3(Math.pow(0.5, daysOld / halfLifeDays));
|
|
209
382
|
}
|
|
210
383
|
function gaussianRecency(createdAt, aroundDate, radiusDays) {
|
|
211
384
|
const createdDate = asValidDate(createdAt);
|
|
@@ -218,27 +391,19 @@ function gaussianRecency(createdAt, aroundDate, radiusDays) {
|
|
|
218
391
|
return createdDate.getTime() === anchorDate.getTime() ? 1 : 0;
|
|
219
392
|
}
|
|
220
393
|
const daysDelta = Math.abs(createdDate.getTime() - anchorDate.getTime()) / DAY_IN_MILLISECONDS;
|
|
221
|
-
return
|
|
394
|
+
return clampUnit3(Math.exp(-0.5 * (daysDelta / normalizedRadius) ** 2));
|
|
222
395
|
}
|
|
223
396
|
function importanceScore(importance) {
|
|
224
397
|
const clampedImportance = clampRange(sanitizeNonNegative(importance), 1, 10);
|
|
225
|
-
return
|
|
226
|
-
}
|
|
227
|
-
function combinedRelevance(vectorSim, lexical) {
|
|
228
|
-
const normalizedVector = clampUnit(sanitizeNonNegative(vectorSim));
|
|
229
|
-
const normalizedLexical = clampUnit(sanitizeNonNegative(lexical));
|
|
230
|
-
if (normalizedVector > 0 && normalizedLexical > 0) {
|
|
231
|
-
return clampUnit(normalizedVector * 0.6 + normalizedLexical * 0.4);
|
|
232
|
-
}
|
|
233
|
-
return Math.max(normalizedVector, normalizedLexical);
|
|
398
|
+
return clampUnit3(IMPORTANCE_FLOOR + (clampedImportance - 1) / 9 * (1 - IMPORTANCE_FLOOR));
|
|
234
399
|
}
|
|
235
400
|
function scoreCandidate(params) {
|
|
236
|
-
const vector =
|
|
237
|
-
const lexical =
|
|
238
|
-
const recency =
|
|
239
|
-
const importance =
|
|
240
|
-
const relevance =
|
|
241
|
-
const score =
|
|
401
|
+
const vector = clampUnit3(sanitizeNonNegative(params.vectorSim));
|
|
402
|
+
const lexical = clampUnit3(sanitizeNonNegative(params.lexical));
|
|
403
|
+
const recency = clampUnit3(sanitizeNonNegative(params.recency));
|
|
404
|
+
const importance = clampUnit3(sanitizeNonNegative(params.importance));
|
|
405
|
+
const relevance = clampUnit3(sanitizeNonNegative(params.relevance));
|
|
406
|
+
const score = clampUnit3(relevance * RELEVANCE_WEIGHT + recency * RECENCY_WEIGHT + importance * IMPORTANCE_WEIGHT);
|
|
242
407
|
return {
|
|
243
408
|
score,
|
|
244
409
|
scores: {
|
|
@@ -268,17 +433,269 @@ function cosineSimilarity(left, right) {
|
|
|
268
433
|
if (leftNorm <= 0 || rightNorm <= 0) {
|
|
269
434
|
return 0;
|
|
270
435
|
}
|
|
271
|
-
return
|
|
436
|
+
return clampUnit3(dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm)));
|
|
272
437
|
}
|
|
273
438
|
var asValidDate = (value) => {
|
|
274
439
|
const date = value instanceof Date ? new Date(value.getTime()) : new Date(value);
|
|
275
440
|
return Number.isNaN(date.getTime()) ? null : date;
|
|
276
441
|
};
|
|
277
|
-
var
|
|
442
|
+
var clampUnit3 = (value) => clampRange(sanitizeNonNegative(value), 0, 1);
|
|
278
443
|
var clampRange = (value, min, max) => Math.min(max, Math.max(min, value));
|
|
279
444
|
var sanitizeFinite = (value) => typeof value === "number" && Number.isFinite(value) ? value : 0;
|
|
280
445
|
var sanitizeNonNegative = (value) => Math.max(0, sanitizeFinite(value));
|
|
281
446
|
|
|
447
|
+
// src/core/recall/mmr.ts
|
|
448
|
+
var DEFAULT_MMR_LAMBDA = 0.7;
|
|
449
|
+
var DEFAULT_MMR_MIN_POOL_SIZE = 4;
|
|
450
|
+
var NEAR_DUPLICATE_SIMILARITY = 0.95;
|
|
451
|
+
function maximalMarginalRelevance(options) {
|
|
452
|
+
const lambda = clampUnit4(sanitizeNumber(options.lambda, DEFAULT_MMR_LAMBDA));
|
|
453
|
+
const inputIds = options.candidates.map((candidate) => candidate.id);
|
|
454
|
+
const limit = resolveLimit(options.limit, inputIds.length);
|
|
455
|
+
const minPoolSize = resolveMinPoolSize(options.minPoolSize);
|
|
456
|
+
const embeddedCandidates = options.candidates.filter((candidate) => hasUsableEmbedding(candidate.embedding));
|
|
457
|
+
const unembeddedIds = options.candidates.filter((candidate) => !hasUsableEmbedding(candidate.embedding)).map((candidate) => candidate.id);
|
|
458
|
+
const poolBelowGate = minPoolSize > 0 && options.candidates.length <= minPoolSize;
|
|
459
|
+
const canApplyMmr = !poolBelowGate && options.queryVector.length > 0 && embeddedCandidates.length >= 2;
|
|
460
|
+
if (!canApplyMmr) {
|
|
461
|
+
return {
|
|
462
|
+
applied: false,
|
|
463
|
+
lambda,
|
|
464
|
+
orderedIds: sliceOrDefault(inputIds, limit),
|
|
465
|
+
droppedDuplicateCount: 0,
|
|
466
|
+
reorderedIds: []
|
|
467
|
+
};
|
|
468
|
+
}
|
|
469
|
+
const relevanceById = /* @__PURE__ */ new Map();
|
|
470
|
+
for (const candidate of embeddedCandidates) {
|
|
471
|
+
const overriddenRelevance = sanitizeUnit(candidate.relevance);
|
|
472
|
+
const derivedRelevance = overriddenRelevance ?? cosineSimilarity(options.queryVector, candidate.embedding ?? []);
|
|
473
|
+
relevanceById.set(candidate.id, derivedRelevance);
|
|
474
|
+
}
|
|
475
|
+
const pairwiseMaxById = computePairwiseMaxSimilarity(embeddedCandidates);
|
|
476
|
+
const mmrScoreById = /* @__PURE__ */ new Map();
|
|
477
|
+
for (const candidate of embeddedCandidates) {
|
|
478
|
+
const relevance = relevanceById.get(candidate.id) ?? 0;
|
|
479
|
+
const maxPairwise = pairwiseMaxById.get(candidate.id) ?? 0;
|
|
480
|
+
mmrScoreById.set(candidate.id, lambda * relevance - (1 - lambda) * maxPairwise);
|
|
481
|
+
}
|
|
482
|
+
const rankedEmbeddedIds = [...embeddedCandidates].sort((left, right) => {
|
|
483
|
+
const leftScore = mmrScoreById.get(left.id) ?? 0;
|
|
484
|
+
const rightScore = mmrScoreById.get(right.id) ?? 0;
|
|
485
|
+
if (leftScore !== rightScore) {
|
|
486
|
+
return rightScore - leftScore;
|
|
487
|
+
}
|
|
488
|
+
return inputIds.indexOf(left.id) - inputIds.indexOf(right.id);
|
|
489
|
+
}).map((candidate) => candidate.id);
|
|
490
|
+
const orderedIds = sliceOrDefault([...rankedEmbeddedIds, ...unembeddedIds], limit);
|
|
491
|
+
const reorderedIds = inputIds.filter((id, index) => orderedIds[index] !== id);
|
|
492
|
+
const droppedDuplicateCount = countDroppedDuplicates(rankedEmbeddedIds, inputIds, pairwiseMaxById);
|
|
493
|
+
return {
|
|
494
|
+
applied: true,
|
|
495
|
+
lambda,
|
|
496
|
+
orderedIds,
|
|
497
|
+
droppedDuplicateCount,
|
|
498
|
+
reorderedIds
|
|
499
|
+
};
|
|
500
|
+
}
|
|
501
|
+
function computePairwiseMaxSimilarity(embeddedCandidates) {
|
|
502
|
+
const result = /* @__PURE__ */ new Map();
|
|
503
|
+
for (const candidate of embeddedCandidates) {
|
|
504
|
+
result.set(candidate.id, 0);
|
|
505
|
+
}
|
|
506
|
+
for (let outer = 0; outer < embeddedCandidates.length; outer += 1) {
|
|
507
|
+
for (let inner = outer + 1; inner < embeddedCandidates.length; inner += 1) {
|
|
508
|
+
const left = embeddedCandidates[outer];
|
|
509
|
+
const right = embeddedCandidates[inner];
|
|
510
|
+
const similarity = cosineSimilarity(left.embedding ?? [], right.embedding ?? []);
|
|
511
|
+
if (similarity > (result.get(left.id) ?? 0)) {
|
|
512
|
+
result.set(left.id, similarity);
|
|
513
|
+
}
|
|
514
|
+
if (similarity > (result.get(right.id) ?? 0)) {
|
|
515
|
+
result.set(right.id, similarity);
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
return result;
|
|
520
|
+
}
|
|
521
|
+
function countDroppedDuplicates(rankedEmbeddedIds, inputIds, pairwiseMaxById) {
|
|
522
|
+
const inputRankById = /* @__PURE__ */ new Map();
|
|
523
|
+
inputIds.forEach((id, index) => inputRankById.set(id, index));
|
|
524
|
+
let droppedDuplicates = 0;
|
|
525
|
+
rankedEmbeddedIds.forEach((id, mmrRank) => {
|
|
526
|
+
const inputRank = inputRankById.get(id);
|
|
527
|
+
if (inputRank === void 0 || mmrRank <= inputRank) {
|
|
528
|
+
return;
|
|
529
|
+
}
|
|
530
|
+
const maxSimilarity = pairwiseMaxById.get(id) ?? 0;
|
|
531
|
+
if (maxSimilarity >= NEAR_DUPLICATE_SIMILARITY) {
|
|
532
|
+
droppedDuplicates += 1;
|
|
533
|
+
}
|
|
534
|
+
});
|
|
535
|
+
return droppedDuplicates;
|
|
536
|
+
}
|
|
537
|
+
function sliceOrDefault(ids, limit) {
|
|
538
|
+
if (limit === null || limit >= ids.length) {
|
|
539
|
+
return [...ids];
|
|
540
|
+
}
|
|
541
|
+
return ids.slice(0, limit);
|
|
542
|
+
}
|
|
543
|
+
function resolveLimit(value, totalCandidates) {
|
|
544
|
+
if (value === void 0 || !Number.isFinite(value) || value <= 0) {
|
|
545
|
+
return null;
|
|
546
|
+
}
|
|
547
|
+
return Math.min(totalCandidates, Math.floor(value));
|
|
548
|
+
}
|
|
549
|
+
function hasUsableEmbedding(embedding) {
|
|
550
|
+
return Array.isArray(embedding) && embedding.length > 0;
|
|
551
|
+
}
|
|
552
|
+
function clampUnit4(value) {
|
|
553
|
+
if (!Number.isFinite(value) || value <= 0) {
|
|
554
|
+
return 0;
|
|
555
|
+
}
|
|
556
|
+
return value >= 1 ? 1 : value;
|
|
557
|
+
}
|
|
558
|
+
function resolveMinPoolSize(value) {
|
|
559
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
560
|
+
return DEFAULT_MMR_MIN_POOL_SIZE;
|
|
561
|
+
}
|
|
562
|
+
if (value < 0) {
|
|
563
|
+
return DEFAULT_MMR_MIN_POOL_SIZE;
|
|
564
|
+
}
|
|
565
|
+
return Math.floor(value);
|
|
566
|
+
}
|
|
567
|
+
function sanitizeNumber(value, fallback) {
|
|
568
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
569
|
+
return fallback;
|
|
570
|
+
}
|
|
571
|
+
return value;
|
|
572
|
+
}
|
|
573
|
+
function sanitizeUnit(value) {
|
|
574
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
575
|
+
return null;
|
|
576
|
+
}
|
|
577
|
+
return clampUnit4(value);
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
// src/core/recall/neighborhood.ts
|
|
581
|
+
var DEFAULT_NEIGHBORHOOD_BUDGET = 24;
|
|
582
|
+
var DEFAULT_STRONG_SEED_TOP_N = 3;
|
|
583
|
+
var DEFAULT_STRONG_SEED_SCORE_GAP = 0.05;
|
|
584
|
+
var DEFAULT_SEEDED_RERANK_WEIGHT = 0.03;
|
|
585
|
+
function selectStrongSeeds(candidates, options = {}) {
|
|
586
|
+
if (candidates.length === 0) {
|
|
587
|
+
return [];
|
|
588
|
+
}
|
|
589
|
+
const topN = Math.max(1, sanitizeInteger(options.topN, DEFAULT_STRONG_SEED_TOP_N));
|
|
590
|
+
const floor = sanitizeUnit2(options.scoreGapFloor ?? DEFAULT_STRONG_SEED_SCORE_GAP);
|
|
591
|
+
const ordered = [...candidates].sort((left, right) => right.score - left.score);
|
|
592
|
+
const leader = ordered[0];
|
|
593
|
+
if (!leader || leader.score <= 0) {
|
|
594
|
+
return [];
|
|
595
|
+
}
|
|
596
|
+
const followerScore = ordered[topN]?.score ?? 0;
|
|
597
|
+
if (leader.score - followerScore < floor) {
|
|
598
|
+
return [];
|
|
599
|
+
}
|
|
600
|
+
const cutoff = Math.max(leader.score - floor, followerScore);
|
|
601
|
+
return ordered.slice(0, topN).filter((candidate) => candidate.score >= cutoff);
|
|
602
|
+
}
|
|
603
|
+
function seededRerank(candidates, seeds, sharesLineage, options = {}) {
|
|
604
|
+
const boostedIds = [];
|
|
605
|
+
if (candidates.length === 0 || seeds.length === 0) {
|
|
606
|
+
return { candidates: [...candidates], boostedIds };
|
|
607
|
+
}
|
|
608
|
+
const weight = sanitizeUnit2(options.weight ?? DEFAULT_SEEDED_RERANK_WEIGHT);
|
|
609
|
+
if (weight <= 0) {
|
|
610
|
+
return { candidates: [...candidates], boostedIds };
|
|
611
|
+
}
|
|
612
|
+
const seedIds = new Set(seeds.map((seed) => seed.id));
|
|
613
|
+
const reranked = candidates.map((candidate) => {
|
|
614
|
+
if (seedIds.has(candidate.id)) {
|
|
615
|
+
return candidate;
|
|
616
|
+
}
|
|
617
|
+
const match = seeds.find((seed) => seed.id !== candidate.id && sharesLineage(candidate, seed));
|
|
618
|
+
if (!match) {
|
|
619
|
+
return candidate;
|
|
620
|
+
}
|
|
621
|
+
boostedIds.push(candidate.id);
|
|
622
|
+
return {
|
|
623
|
+
...candidate,
|
|
624
|
+
score: clampUnit5(candidate.score + weight)
|
|
625
|
+
};
|
|
626
|
+
});
|
|
627
|
+
return { candidates: reranked, boostedIds };
|
|
628
|
+
}
|
|
629
|
+
function sharesEntryLineage(candidate, seed) {
|
|
630
|
+
if (candidate.id === seed.id) {
|
|
631
|
+
return false;
|
|
632
|
+
}
|
|
633
|
+
if (candidate.claim_key && seed.claim_key && candidate.claim_key === seed.claim_key) {
|
|
634
|
+
return true;
|
|
635
|
+
}
|
|
636
|
+
if (candidate.superseded_by === seed.id || seed.superseded_by === candidate.id) {
|
|
637
|
+
return true;
|
|
638
|
+
}
|
|
639
|
+
return sharesTopicPrefix(candidate.subject, seed.subject);
|
|
640
|
+
}
|
|
641
|
+
function sharesEpisodeLineage(candidate, seed) {
|
|
642
|
+
if (candidate.id === seed.id) {
|
|
643
|
+
return false;
|
|
644
|
+
}
|
|
645
|
+
if (candidate.source === seed.source && candidate.sourceId !== void 0 && candidate.sourceId === seed.sourceId) {
|
|
646
|
+
return true;
|
|
647
|
+
}
|
|
648
|
+
if (candidate.transcriptHash && seed.transcriptHash && candidate.transcriptHash === seed.transcriptHash) {
|
|
649
|
+
return true;
|
|
650
|
+
}
|
|
651
|
+
return false;
|
|
652
|
+
}
|
|
653
|
+
function sharesProcedureLineage(candidate, seed) {
|
|
654
|
+
if (candidate.id === seed.id) {
|
|
655
|
+
return false;
|
|
656
|
+
}
|
|
657
|
+
return candidate.procedure_key === seed.procedure_key;
|
|
658
|
+
}
|
|
659
|
+
var TOPIC_PREFIX_SHARED_MIN = 2;
|
|
660
|
+
var TOPIC_PREFIX_COVERAGE_MIN = 0.6;
|
|
661
|
+
function sharesTopicPrefix(leftSubject, rightSubject) {
|
|
662
|
+
const leftTokens = tokenize(leftSubject);
|
|
663
|
+
const rightTokens = tokenize(rightSubject);
|
|
664
|
+
if (leftTokens.length === 0 || rightTokens.length === 0) {
|
|
665
|
+
return false;
|
|
666
|
+
}
|
|
667
|
+
const length = Math.min(leftTokens.length, rightTokens.length);
|
|
668
|
+
let shared = 0;
|
|
669
|
+
for (let index = 0; index < length; index += 1) {
|
|
670
|
+
if (leftTokens[index] !== rightTokens[index]) {
|
|
671
|
+
break;
|
|
672
|
+
}
|
|
673
|
+
shared += 1;
|
|
674
|
+
}
|
|
675
|
+
if (shared < TOPIC_PREFIX_SHARED_MIN) {
|
|
676
|
+
return false;
|
|
677
|
+
}
|
|
678
|
+
return shared / length >= TOPIC_PREFIX_COVERAGE_MIN;
|
|
679
|
+
}
|
|
680
|
+
function clampUnit5(value) {
|
|
681
|
+
if (!Number.isFinite(value) || value <= 0) {
|
|
682
|
+
return 0;
|
|
683
|
+
}
|
|
684
|
+
return value >= 1 ? 1 : value;
|
|
685
|
+
}
|
|
686
|
+
function sanitizeUnit2(value) {
|
|
687
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
688
|
+
return 0;
|
|
689
|
+
}
|
|
690
|
+
return Math.min(1, value);
|
|
691
|
+
}
|
|
692
|
+
function sanitizeInteger(value, fallback) {
|
|
693
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 1) {
|
|
694
|
+
return fallback;
|
|
695
|
+
}
|
|
696
|
+
return Math.floor(value);
|
|
697
|
+
}
|
|
698
|
+
|
|
282
699
|
// src/core/claim-key.ts
|
|
283
700
|
var UNKNOWN_SEGMENT = "unknown";
|
|
284
701
|
var SELF_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["i", "me", "myself", "the_user", "user", "we", "our_team", "the_project", "this_project"]);
|
|
@@ -887,11 +1304,14 @@ function createNoopRecallTraceSink() {
|
|
|
887
1304
|
}
|
|
888
1305
|
|
|
889
1306
|
// src/core/recall/search.ts
|
|
1307
|
+
var HISTORICAL_NEIGHBORHOOD_FAMILIES = ["supersession_chain", "claim_key_sibling", "topic_family"];
|
|
890
1308
|
var MIN_VECTOR_ONLY_EVIDENCE = 0.3;
|
|
891
1309
|
var HISTORICAL_STATE_FLAT_RECENCY = 0.5;
|
|
892
1310
|
var HISTORICAL_PREDECESSOR_BOOST = 0.08;
|
|
893
1311
|
var HISTORICAL_RETIRED_PREDECESSOR_BOOST = 0.06;
|
|
894
1312
|
var HISTORICAL_OLDER_STATE_BOOST = 0.08;
|
|
1313
|
+
var HISTORICAL_LINEAGE_GAP_MARGIN = 0.02;
|
|
1314
|
+
var HISTORICAL_LINEAGE_MAX_BONUS = 0.45;
|
|
895
1315
|
var HISTORICAL_TOPIC_SHARED_PREFIX_MIN = 2;
|
|
896
1316
|
var HISTORICAL_TOPIC_PREFIX_OF_CANDIDATE_MIN = 0.6;
|
|
897
1317
|
var CLAIM_KEY_TENTATIVE_CURRENT_PENALTY = 0.08;
|
|
@@ -899,6 +1319,28 @@ var CLAIM_KEY_REDUNDANT_TRUSTED_SLOT_PENALTY = 0.05;
|
|
|
899
1319
|
var CLAIM_KEY_REDUNDANT_TRUSTED_SLOT_MAX_PENALTY = 0.15;
|
|
900
1320
|
var QUERY_EMBEDDING_FAILURE_NOTICE = "Embeddings failed during recall, so Agenr fell back to lexical-only entry ranking.";
|
|
901
1321
|
var VECTOR_SEARCH_FAILURE_NOTICE = "Vector search failed during recall, so Agenr continued with lexical entry candidates only.";
|
|
1322
|
+
var ENTITY_ATTRIBUTE_IDENTITY_WRAPPERS = /* @__PURE__ */ new Set(["identity", "profile", "bio", "biography", "summary"]);
|
|
1323
|
+
var WEAK_QUERY_GROUNDING_TOKENS = /* @__PURE__ */ new Set([
|
|
1324
|
+
"earlier",
|
|
1325
|
+
"last",
|
|
1326
|
+
"mention",
|
|
1327
|
+
"mentioned",
|
|
1328
|
+
"number",
|
|
1329
|
+
"order",
|
|
1330
|
+
"remember",
|
|
1331
|
+
"remind",
|
|
1332
|
+
"reminder",
|
|
1333
|
+
"run",
|
|
1334
|
+
"runs",
|
|
1335
|
+
"thing",
|
|
1336
|
+
"time",
|
|
1337
|
+
"use",
|
|
1338
|
+
"uses",
|
|
1339
|
+
"using"
|
|
1340
|
+
]);
|
|
1341
|
+
var WEAKLY_GROUNDED_REMINDER_PATTERN = /\b(earlier|last time|mention(?:ed)?|remember|remind(?:er)?)\b/iu;
|
|
1342
|
+
var MIN_VECTOR_WITHOUT_GROUNDED_LEXICAL_SUPPORT = 0.45;
|
|
1343
|
+
var GROUNDING_SORT_MAX_SCORE_GAP = 0.03;
|
|
902
1344
|
async function recall(query, ports, options = {}) {
|
|
903
1345
|
const text = query.text.trim();
|
|
904
1346
|
const limit = normalizeLimit(query.limit);
|
|
@@ -970,39 +1412,47 @@ async function recall(query, ports, options = {}) {
|
|
|
970
1412
|
];
|
|
971
1413
|
summary.degraded.lexicalOnly = summary.degraded.active && queryEmbedding.length === 0;
|
|
972
1414
|
const mergeStartedAt = Date.now();
|
|
973
|
-
const
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
rankingProfile: query.rankingProfile
|
|
1415
|
+
const mergeOutcome = mergeCandidates(vectorCandidates, ftsCandidates);
|
|
1416
|
+
const neighborhoodEnabled = options.rankingPolicy?.neighborhood !== "disabled";
|
|
1417
|
+
const expansionRanks = neighborhoodEnabled ? await expandEntryNeighborhood(mergeOutcome.merged, queryEmbedding, ports, {
|
|
1418
|
+
rankingProfile: query.rankingProfile,
|
|
1419
|
+
neighborhoodTrace: summary.neighborhood
|
|
1420
|
+
}) : [];
|
|
1421
|
+
const relevanceByEntryId = resolveEntryRelevance({
|
|
1422
|
+
vectorRanks: mergeOutcome.vectorRanks,
|
|
1423
|
+
ftsRanks: mergeOutcome.ftsRanks,
|
|
1424
|
+
expansionRanks,
|
|
1425
|
+
policy: options.rankingPolicy,
|
|
1426
|
+
trace: summary.rrf
|
|
977
1427
|
});
|
|
978
|
-
summary.candidateCounts.merged =
|
|
979
|
-
summary.timings.mergeCandidatesMs =
|
|
1428
|
+
summary.candidateCounts.merged = mergeOutcome.merged.size;
|
|
1429
|
+
summary.timings.mergeCandidatesMs = elapsedMs2(mergeStartedAt);
|
|
980
1430
|
const scoreStartedAt = Date.now();
|
|
981
|
-
const
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
asOfDate,
|
|
986
|
-
aroundDate,
|
|
987
|
-
aroundRadius: query.aroundRadius,
|
|
988
|
-
rankingProfile: query.rankingProfile
|
|
989
|
-
})
|
|
990
|
-
),
|
|
991
|
-
{
|
|
1431
|
+
const historicallyBoosted = applyHistoricalLineageBoosts(
|
|
1432
|
+
Array.from(mergeOutcome.merged.values()).map(
|
|
1433
|
+
(candidate) => scoreMergedCandidate(candidate, text, queryEmbedding, relevanceByEntryId.get(candidate.entry.id) ?? 0, {
|
|
1434
|
+
asOfDate,
|
|
992
1435
|
aroundDate,
|
|
1436
|
+
aroundRadius: query.aroundRadius,
|
|
993
1437
|
rankingProfile: query.rankingProfile
|
|
994
|
-
}
|
|
995
|
-
summary.claimKey,
|
|
996
|
-
slotPolicyConfig
|
|
1438
|
+
})
|
|
997
1439
|
),
|
|
1440
|
+
{
|
|
1441
|
+
aroundDate,
|
|
1442
|
+
rankingProfile: query.rankingProfile
|
|
1443
|
+
},
|
|
998
1444
|
summary.claimKey,
|
|
999
1445
|
slotPolicyConfig
|
|
1000
|
-
)
|
|
1001
|
-
|
|
1446
|
+
);
|
|
1447
|
+
const rerankedCandidates = neighborhoodEnabled ? applySeededEntryRerank(historicallyBoosted, summary.neighborhood) : historicallyBoosted;
|
|
1448
|
+
const shaped = applyClaimKeyResultShaping(rerankedCandidates, summary.claimKey, slotPolicyConfig).sort((left, right) => right.score - left.score);
|
|
1449
|
+
const diversified = applyMmrDiversification(shaped, queryEmbedding, options.rankingPolicy, summary.mmr);
|
|
1450
|
+
const scored = await applyEntryCrossEncoderRerank(diversified, text, ports.crossEncoder, options.rankingPolicy, summary.crossEncoder);
|
|
1451
|
+
summary.timings.scoreCandidatesMs = elapsedMs2(scoreStartedAt);
|
|
1002
1452
|
const thresholdStartedAt = Date.now();
|
|
1003
|
-
const thresholded = scored.filter((result) => hasSufficientReturnEvidence(result) && result.score >= threshold);
|
|
1453
|
+
const thresholded = scored.filter((result) => hasSufficientReturnEvidence(result, query) && result.score >= threshold);
|
|
1004
1454
|
summary.candidateCounts.thresholdQualified = thresholded.length;
|
|
1005
|
-
summary.timings.thresholdMs =
|
|
1455
|
+
summary.timings.thresholdMs = elapsedMs2(thresholdStartedAt);
|
|
1006
1456
|
if (thresholded.length === 0) {
|
|
1007
1457
|
reportTrace(resolveNoResultReason(summary, scored.length === 0 ? "no_candidates" : "below_threshold"));
|
|
1008
1458
|
return [];
|
|
@@ -1010,8 +1460,8 @@ async function recall(query, ports, options = {}) {
|
|
|
1010
1460
|
const budgetStartedAt = Date.now();
|
|
1011
1461
|
const budgeted = budget === null ? thresholded : applyBudget(thresholded, budget);
|
|
1012
1462
|
summary.candidateCounts.budgetAccepted = budgeted.length;
|
|
1013
|
-
summary.timings.budgetMs = budget === null ? 0 :
|
|
1014
|
-
const ranked = budgeted.slice(0, limit);
|
|
1463
|
+
summary.timings.budgetMs = budget === null ? 0 : elapsedMs2(budgetStartedAt);
|
|
1464
|
+
const ranked = sortAcceptedCandidates(budgeted.slice(0, limit), text, query.rankingProfile);
|
|
1015
1465
|
summary.candidateCounts.finalRanked = ranked.length;
|
|
1016
1466
|
if (ranked.length === 0) {
|
|
1017
1467
|
reportTrace("limit_zero");
|
|
@@ -1034,7 +1484,7 @@ async function recall(query, ports, options = {}) {
|
|
|
1034
1484
|
];
|
|
1035
1485
|
});
|
|
1036
1486
|
summary.candidateCounts.returned = results.length;
|
|
1037
|
-
summary.timings.shapeResultsMs =
|
|
1487
|
+
summary.timings.shapeResultsMs = elapsedMs2(shapeStartedAt);
|
|
1038
1488
|
if (results.length === 0) {
|
|
1039
1489
|
reportTrace(resolveNoResultReason(summary, "hydrate_missing"));
|
|
1040
1490
|
return [];
|
|
@@ -1089,6 +1539,36 @@ function buildRecallTraceSummary(params) {
|
|
|
1089
1539
|
trustPenalized: 0,
|
|
1090
1540
|
redundancyPenalized: 0
|
|
1091
1541
|
},
|
|
1542
|
+
rrf: {
|
|
1543
|
+
applied: false,
|
|
1544
|
+
channelCount: 0,
|
|
1545
|
+
rankConstant: DEFAULT_RRF_RANK_CONSTANT,
|
|
1546
|
+
fusedCandidateCount: 0,
|
|
1547
|
+
maxFusedScore: 0
|
|
1548
|
+
},
|
|
1549
|
+
neighborhood: {
|
|
1550
|
+
expansionRequested: false,
|
|
1551
|
+
expansionAvailable: false,
|
|
1552
|
+
familiesRequested: [],
|
|
1553
|
+
includeRetired: false,
|
|
1554
|
+
seedIds: [],
|
|
1555
|
+
expansionCandidates: 0,
|
|
1556
|
+
strongSeedIds: [],
|
|
1557
|
+
rerankBoostedIds: []
|
|
1558
|
+
},
|
|
1559
|
+
mmr: {
|
|
1560
|
+
applied: false,
|
|
1561
|
+
lambda: DEFAULT_MMR_LAMBDA,
|
|
1562
|
+
droppedDuplicateCount: 0,
|
|
1563
|
+
reorderedIds: []
|
|
1564
|
+
},
|
|
1565
|
+
crossEncoder: {
|
|
1566
|
+
applied: false,
|
|
1567
|
+
k: 0,
|
|
1568
|
+
alpha: DEFAULT_CROSS_ENCODER_ALPHA,
|
|
1569
|
+
latencyMs: 0,
|
|
1570
|
+
rescoredIds: []
|
|
1571
|
+
},
|
|
1092
1572
|
timings: {
|
|
1093
1573
|
mergeCandidatesMs: 0,
|
|
1094
1574
|
scoreCandidatesMs: 0,
|
|
@@ -1131,12 +1611,13 @@ function resolveNoResultReason(summary, reason) {
|
|
|
1131
1611
|
}
|
|
1132
1612
|
return reason;
|
|
1133
1613
|
}
|
|
1134
|
-
function scoreMergedCandidate(candidate, queryText, queryEmbedding, params) {
|
|
1614
|
+
function scoreMergedCandidate(candidate, queryText, queryEmbedding, rrfScore, params) {
|
|
1135
1615
|
const vector = candidate.vectorSim ?? cosineSimilarity(candidate.entry.embedding ?? [], queryEmbedding);
|
|
1136
1616
|
const lexical = computeLexicalScore(queryText, candidate.entry.subject, candidate.entry.content);
|
|
1137
1617
|
const recency = resolveRecencyScore(candidate.entry, params);
|
|
1138
1618
|
const importance = importanceScore(candidate.entry.importance);
|
|
1139
1619
|
const scored = scoreCandidate({
|
|
1620
|
+
relevance: rrfScore,
|
|
1140
1621
|
vectorSim: vector,
|
|
1141
1622
|
lexical,
|
|
1142
1623
|
recency,
|
|
@@ -1147,28 +1628,139 @@ function scoreMergedCandidate(candidate, queryText, queryEmbedding, params) {
|
|
|
1147
1628
|
score: scored.score,
|
|
1148
1629
|
scores: {
|
|
1149
1630
|
...scored.scores,
|
|
1631
|
+
// `rrf` mirrors `relevance` and makes the reciprocal rank fusion source
|
|
1632
|
+
// explicit for trace summaries and cross-stage reasoning in later phases.
|
|
1633
|
+
rrf: scored.scores.relevance,
|
|
1150
1634
|
historicalLineage: 0,
|
|
1635
|
+
neighborhoodBoost: 0,
|
|
1151
1636
|
claimKeyTrustPenalty: 0,
|
|
1152
1637
|
claimKeyRedundancyPenalty: 0
|
|
1153
1638
|
}
|
|
1154
1639
|
};
|
|
1155
1640
|
}
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1641
|
+
function resolveEntryRelevance(params) {
|
|
1642
|
+
const { vectorRanks, ftsRanks, expansionRanks, policy, trace } = params;
|
|
1643
|
+
if (policy?.rrf === "disabled") {
|
|
1644
|
+
const fallbackChannel = vectorRanks.length > 0 ? vectorRanks : ftsRanks;
|
|
1645
|
+
const fallback = /* @__PURE__ */ new Map();
|
|
1646
|
+
fallbackChannel.forEach((id, index) => {
|
|
1647
|
+
if (!fallback.has(id)) {
|
|
1648
|
+
fallback.set(id, 1 / (index + 1));
|
|
1649
|
+
}
|
|
1650
|
+
});
|
|
1651
|
+
trace.applied = false;
|
|
1652
|
+
trace.channelCount = fallbackChannel.length > 0 ? 1 : 0;
|
|
1653
|
+
trace.rankConstant = resolveRrfRankConstant(policy, fallback.size);
|
|
1654
|
+
trace.fusedCandidateCount = fallback.size;
|
|
1655
|
+
trace.maxFusedScore = fallback.size > 0 ? Math.max(...fallback.values()) : 0;
|
|
1656
|
+
return fallback;
|
|
1657
|
+
}
|
|
1658
|
+
const channels = [Array.from(vectorRanks), Array.from(ftsRanks), Array.from(expansionRanks)];
|
|
1659
|
+
const activeChannels = channels.filter((channel) => channel.length > 0);
|
|
1660
|
+
const uniqueFusedIds = /* @__PURE__ */ new Set();
|
|
1661
|
+
for (const channel of channels) {
|
|
1662
|
+
for (const id of channel) {
|
|
1663
|
+
uniqueFusedIds.add(id);
|
|
1664
|
+
}
|
|
1159
1665
|
}
|
|
1160
|
-
const
|
|
1161
|
-
|
|
1666
|
+
const rankConstant = resolveRrfRankConstant(policy, uniqueFusedIds.size);
|
|
1667
|
+
trace.rankConstant = rankConstant;
|
|
1668
|
+
const fused = rrfFuse(channels, rankConstant);
|
|
1669
|
+
trace.applied = fused.size > 0;
|
|
1670
|
+
trace.channelCount = activeChannels.length;
|
|
1671
|
+
trace.fusedCandidateCount = fused.size;
|
|
1672
|
+
trace.maxFusedScore = fused.size > 0 ? Math.max(...fused.values()) : 0;
|
|
1673
|
+
return fused;
|
|
1674
|
+
}
|
|
1675
|
+
function resolveRrfRankConstant(policy, fusedPoolSize) {
|
|
1676
|
+
const rawGeneral = policy?.rrfRankConstant;
|
|
1677
|
+
const hasExplicitGeneral = typeof rawGeneral === "number" && Number.isFinite(rawGeneral) && rawGeneral > 0;
|
|
1678
|
+
const generalConstant = hasExplicitGeneral ? rawGeneral : DEFAULT_RRF_RANK_CONSTANT;
|
|
1679
|
+
const isSmallPool = Number.isFinite(fusedPoolSize) && fusedPoolSize > 0 && fusedPoolSize <= SMALL_POOL_RRF_POOL_SIZE;
|
|
1680
|
+
if (!isSmallPool) {
|
|
1681
|
+
return generalConstant;
|
|
1682
|
+
}
|
|
1683
|
+
const rawSmall = policy?.rrfSmallPoolRankConstant;
|
|
1684
|
+
if (typeof rawSmall === "number" && Number.isFinite(rawSmall) && rawSmall > 0) {
|
|
1685
|
+
return rawSmall;
|
|
1686
|
+
}
|
|
1687
|
+
if (hasExplicitGeneral) {
|
|
1688
|
+
return generalConstant;
|
|
1689
|
+
}
|
|
1690
|
+
return DEFAULT_RRF_SMALL_POOL_RANK_CONSTANT;
|
|
1691
|
+
}
|
|
1692
|
+
async function expandEntryNeighborhood(mergedCandidates, queryEmbedding, ports, params) {
|
|
1693
|
+
const trace = params.neighborhoodTrace;
|
|
1694
|
+
trace.expansionAvailable = Boolean(ports.expandNeighborhood);
|
|
1695
|
+
if (mergedCandidates.size === 0 || !ports.expandNeighborhood || params.rankingProfile !== "historical_state") {
|
|
1696
|
+
return [];
|
|
1697
|
+
}
|
|
1698
|
+
const families = HISTORICAL_NEIGHBORHOOD_FAMILIES;
|
|
1699
|
+
const includeRetired = true;
|
|
1700
|
+
const seedIds = Array.from(mergedCandidates.keys());
|
|
1701
|
+
trace.expansionRequested = true;
|
|
1702
|
+
trace.familiesRequested = [...families];
|
|
1703
|
+
trace.includeRetired = includeRetired;
|
|
1704
|
+
trace.seedIds = seedIds;
|
|
1705
|
+
const expanded = await ports.expandNeighborhood({
|
|
1706
|
+
seedIds,
|
|
1707
|
+
budget: DEFAULT_NEIGHBORHOOD_BUDGET,
|
|
1708
|
+
families,
|
|
1709
|
+
includeRetired
|
|
1162
1710
|
});
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1711
|
+
const ranked = expanded.filter((entry) => !mergedCandidates.has(entry.id)).map((entry) => ({
|
|
1712
|
+
entry,
|
|
1713
|
+
vectorSim: cosineSimilarity(entry.embedding ?? [], queryEmbedding)
|
|
1714
|
+
})).sort((left, right) => right.vectorSim - left.vectorSim || left.entry.id.localeCompare(right.entry.id));
|
|
1715
|
+
for (const candidate of ranked) {
|
|
1716
|
+
mergedCandidates.set(candidate.entry.id, {
|
|
1717
|
+
entry: candidate.entry,
|
|
1718
|
+
vectorSim: candidate.vectorSim
|
|
1170
1719
|
});
|
|
1171
1720
|
}
|
|
1721
|
+
trace.expansionCandidates = ranked.length;
|
|
1722
|
+
return ranked.map((candidate) => candidate.entry.id);
|
|
1723
|
+
}
|
|
1724
|
+
function applySeededEntryRerank(candidates, trace) {
|
|
1725
|
+
if (candidates.length === 0) {
|
|
1726
|
+
return candidates;
|
|
1727
|
+
}
|
|
1728
|
+
const seeds = selectStrongSeeds(
|
|
1729
|
+
candidates.map((candidate) => ({ id: candidate.entry.id, score: candidate.score, entry: candidate.entry })),
|
|
1730
|
+
{
|
|
1731
|
+
topN: DEFAULT_STRONG_SEED_TOP_N,
|
|
1732
|
+
scoreGapFloor: DEFAULT_STRONG_SEED_SCORE_GAP
|
|
1733
|
+
}
|
|
1734
|
+
);
|
|
1735
|
+
if (seeds.length === 0) {
|
|
1736
|
+
return candidates;
|
|
1737
|
+
}
|
|
1738
|
+
trace.strongSeedIds = seeds.map((seed) => seed.id);
|
|
1739
|
+
const payloads = candidates.map((candidate) => ({
|
|
1740
|
+
id: candidate.entry.id,
|
|
1741
|
+
score: candidate.score,
|
|
1742
|
+
entry: candidate.entry
|
|
1743
|
+
}));
|
|
1744
|
+
const reranked = seededRerank(payloads, seeds, (candidate, seed) => sharesEntryLineage(candidate.entry, seed.entry), {
|
|
1745
|
+
weight: DEFAULT_SEEDED_RERANK_WEIGHT
|
|
1746
|
+
});
|
|
1747
|
+
trace.rerankBoostedIds = reranked.boostedIds;
|
|
1748
|
+
const scoreById = new Map(reranked.candidates.map((candidate) => [candidate.id, candidate.score]));
|
|
1749
|
+
return candidates.map((candidate) => {
|
|
1750
|
+
const nextScore = scoreById.get(candidate.entry.id) ?? candidate.score;
|
|
1751
|
+
const delta = nextScore - candidate.score;
|
|
1752
|
+
if (delta <= 0) {
|
|
1753
|
+
return candidate;
|
|
1754
|
+
}
|
|
1755
|
+
return {
|
|
1756
|
+
...candidate,
|
|
1757
|
+
score: nextScore,
|
|
1758
|
+
scores: {
|
|
1759
|
+
...candidate.scores,
|
|
1760
|
+
neighborhoodBoost: candidate.scores.neighborhoodBoost + delta
|
|
1761
|
+
}
|
|
1762
|
+
};
|
|
1763
|
+
});
|
|
1172
1764
|
}
|
|
1173
1765
|
function resolveRecencyScore(entry, params) {
|
|
1174
1766
|
if (params.asOfDate) {
|
|
@@ -1214,8 +1806,9 @@ function applyHistoricalLineageBoosts(candidates, params, claimKeyTrace, slotPol
|
|
|
1214
1806
|
return candidates;
|
|
1215
1807
|
}
|
|
1216
1808
|
const entries = candidates.map((candidate) => candidate.entry);
|
|
1809
|
+
const scoresById = new Map(candidates.map((candidate) => [candidate.entry.id, candidate.score]));
|
|
1217
1810
|
return candidates.map((candidate) => {
|
|
1218
|
-
const decision = resolveHistoricalLineageBonus(candidate.entry, entries, params.aroundDate, slotPolicyConfig);
|
|
1811
|
+
const decision = resolveHistoricalLineageBonus(candidate.entry, entries, scoresById, candidate.score, params.aroundDate, slotPolicyConfig);
|
|
1219
1812
|
if (decision.tentativeLineageSuppressed) {
|
|
1220
1813
|
claimKeyTrace.tentativeLineageSuppressed += 1;
|
|
1221
1814
|
}
|
|
@@ -1234,10 +1827,12 @@ function applyHistoricalLineageBoosts(candidates, params, claimKeyTrace, slotPol
|
|
|
1234
1827
|
};
|
|
1235
1828
|
});
|
|
1236
1829
|
}
|
|
1237
|
-
function resolveHistoricalLineageBonus(entry, entries, aroundDate, slotPolicyConfig) {
|
|
1238
|
-
|
|
1830
|
+
function resolveHistoricalLineageBonus(entry, entries, scoresById, candidateScore, aroundDate, slotPolicyConfig) {
|
|
1831
|
+
const directSuccessor = entries.find((peer) => peer.id !== entry.id && entry.superseded_by === peer.id);
|
|
1832
|
+
if (directSuccessor) {
|
|
1833
|
+
const successorScore = scoresById.get(directSuccessor.id) ?? 0;
|
|
1239
1834
|
return {
|
|
1240
|
-
bonus: HISTORICAL_PREDECESSOR_BOOST,
|
|
1835
|
+
bonus: shapeHistoricalLineageBonus(HISTORICAL_PREDECESSOR_BOOST, candidateScore, successorScore),
|
|
1241
1836
|
tentativeLineageSuppressed: false
|
|
1242
1837
|
};
|
|
1243
1838
|
}
|
|
@@ -1248,6 +1843,8 @@ function resolveHistoricalLineageBonus(entry, entries, aroundDate, slotPolicyCon
|
|
|
1248
1843
|
};
|
|
1249
1844
|
}
|
|
1250
1845
|
let tentativeLineageSuppressed = false;
|
|
1846
|
+
let bestPeerScore = 0;
|
|
1847
|
+
let peerMatched = false;
|
|
1251
1848
|
for (const peer of entries) {
|
|
1252
1849
|
if (peer.id === entry.id || !isPotentialCurrentPeer(peer) || createdAtMs(entry.created_at) >= createdAtMs(peer.created_at)) {
|
|
1253
1850
|
continue;
|
|
@@ -1260,16 +1857,29 @@ function resolveHistoricalLineageBonus(entry, entries, aroundDate, slotPolicyCon
|
|
|
1260
1857
|
if (relation === null) {
|
|
1261
1858
|
continue;
|
|
1262
1859
|
}
|
|
1860
|
+
peerMatched = true;
|
|
1861
|
+
const peerScore = scoresById.get(peer.id) ?? 0;
|
|
1862
|
+
if (peerScore > bestPeerScore) {
|
|
1863
|
+
bestPeerScore = peerScore;
|
|
1864
|
+
}
|
|
1865
|
+
}
|
|
1866
|
+
if (!peerMatched) {
|
|
1263
1867
|
return {
|
|
1264
|
-
bonus:
|
|
1868
|
+
bonus: 0,
|
|
1265
1869
|
tentativeLineageSuppressed
|
|
1266
1870
|
};
|
|
1267
1871
|
}
|
|
1872
|
+
const base = entry.retired ? HISTORICAL_RETIRED_PREDECESSOR_BOOST : HISTORICAL_OLDER_STATE_BOOST;
|
|
1268
1873
|
return {
|
|
1269
|
-
bonus:
|
|
1874
|
+
bonus: shapeHistoricalLineageBonus(base, candidateScore, bestPeerScore),
|
|
1270
1875
|
tentativeLineageSuppressed
|
|
1271
1876
|
};
|
|
1272
1877
|
}
|
|
1878
|
+
function shapeHistoricalLineageBonus(base, candidateScore, successorScore) {
|
|
1879
|
+
const gap = successorScore - candidateScore;
|
|
1880
|
+
const needed = gap > 0 ? gap + HISTORICAL_LINEAGE_GAP_MARGIN : 0;
|
|
1881
|
+
return Math.min(HISTORICAL_LINEAGE_MAX_BONUS, Math.max(base, needed));
|
|
1882
|
+
}
|
|
1273
1883
|
function isPotentialCurrentPeer(entry) {
|
|
1274
1884
|
return !entry.retired && entry.superseded_by === void 0;
|
|
1275
1885
|
}
|
|
@@ -1328,6 +1938,102 @@ function countSharedPrefixTokens(leftTokens, rightTokens) {
|
|
|
1328
1938
|
}
|
|
1329
1939
|
return sharedPrefixCount;
|
|
1330
1940
|
}
|
|
1941
|
+
function applyMmrDiversification(candidates, queryEmbedding, policy, trace) {
|
|
1942
|
+
if (candidates.length < 2 || policy?.mmr === "disabled") {
|
|
1943
|
+
trace.applied = false;
|
|
1944
|
+
trace.lambda = resolveMmrLambda(policy);
|
|
1945
|
+
return candidates;
|
|
1946
|
+
}
|
|
1947
|
+
const reorder = maximalMarginalRelevance({
|
|
1948
|
+
queryVector: queryEmbedding,
|
|
1949
|
+
candidates: candidates.map((candidate) => ({
|
|
1950
|
+
id: candidate.entry.id,
|
|
1951
|
+
relevance: candidate.score,
|
|
1952
|
+
...candidate.entry.embedding ? { embedding: candidate.entry.embedding } : {}
|
|
1953
|
+
})),
|
|
1954
|
+
lambda: resolveMmrLambda(policy),
|
|
1955
|
+
minPoolSize: resolveMmrMinPoolSize(policy)
|
|
1956
|
+
});
|
|
1957
|
+
trace.applied = reorder.applied;
|
|
1958
|
+
trace.lambda = reorder.lambda;
|
|
1959
|
+
trace.droppedDuplicateCount = reorder.droppedDuplicateCount;
|
|
1960
|
+
trace.reorderedIds = reorder.reorderedIds;
|
|
1961
|
+
if (!reorder.applied) {
|
|
1962
|
+
return candidates;
|
|
1963
|
+
}
|
|
1964
|
+
const candidatesById = new Map(candidates.map((candidate) => [candidate.entry.id, candidate]));
|
|
1965
|
+
return reorder.orderedIds.flatMap((id) => {
|
|
1966
|
+
const candidate = candidatesById.get(id);
|
|
1967
|
+
return candidate ? [candidate] : [];
|
|
1968
|
+
});
|
|
1969
|
+
}
|
|
1970
|
+
async function applyEntryCrossEncoderRerank(candidates, query, crossEncoder, policy, trace) {
|
|
1971
|
+
const result = await applyCrossEncoderRerank({
|
|
1972
|
+
query,
|
|
1973
|
+
candidates: candidates.map((candidate) => ({
|
|
1974
|
+
id: candidate.entry.id,
|
|
1975
|
+
text: buildCrossEncoderPassageText(candidate.entry),
|
|
1976
|
+
score: candidate.score,
|
|
1977
|
+
candidate
|
|
1978
|
+
})),
|
|
1979
|
+
port: crossEncoder,
|
|
1980
|
+
disabled: policy?.crossEncoder === "disabled",
|
|
1981
|
+
topK: policy?.crossEncoderTopK ?? DEFAULT_CROSS_ENCODER_TOP_K,
|
|
1982
|
+
alpha: policy?.crossEncoderAlpha ?? DEFAULT_CROSS_ENCODER_ALPHA
|
|
1983
|
+
});
|
|
1984
|
+
trace.applied = result.applied;
|
|
1985
|
+
trace.k = result.k;
|
|
1986
|
+
trace.alpha = result.alpha;
|
|
1987
|
+
trace.latencyMs = result.latencyMs;
|
|
1988
|
+
trace.rescoredIds = [...result.rescoredIds];
|
|
1989
|
+
if (result.degradedReason) {
|
|
1990
|
+
trace.degradedReason = result.degradedReason;
|
|
1991
|
+
} else {
|
|
1992
|
+
delete trace.degradedReason;
|
|
1993
|
+
}
|
|
1994
|
+
return result.candidates.map((entry) => {
|
|
1995
|
+
const scoredCandidate = entry.candidate;
|
|
1996
|
+
const nextScore = entry.score;
|
|
1997
|
+
if (typeof entry.crossEncoderScore !== "number" && nextScore === scoredCandidate.score) {
|
|
1998
|
+
return scoredCandidate;
|
|
1999
|
+
}
|
|
2000
|
+
return {
|
|
2001
|
+
...scoredCandidate,
|
|
2002
|
+
score: nextScore,
|
|
2003
|
+
scores: {
|
|
2004
|
+
...scoredCandidate.scores,
|
|
2005
|
+
...typeof entry.crossEncoderScore === "number" ? { crossEncoder: entry.crossEncoderScore } : {}
|
|
2006
|
+
}
|
|
2007
|
+
};
|
|
2008
|
+
});
|
|
2009
|
+
}
|
|
2010
|
+
function buildCrossEncoderPassageText(entry) {
|
|
2011
|
+
const subject = entry.subject.trim();
|
|
2012
|
+
const content = entry.content.trim();
|
|
2013
|
+
if (subject.length === 0) {
|
|
2014
|
+
return content;
|
|
2015
|
+
}
|
|
2016
|
+
if (content.length === 0) {
|
|
2017
|
+
return subject;
|
|
2018
|
+
}
|
|
2019
|
+
return `${subject}
|
|
2020
|
+
|
|
2021
|
+
${content}`;
|
|
2022
|
+
}
|
|
2023
|
+
function resolveMmrLambda(policy) {
|
|
2024
|
+
const rawLambda = policy?.mmrLambda;
|
|
2025
|
+
if (typeof rawLambda !== "number" || !Number.isFinite(rawLambda)) {
|
|
2026
|
+
return DEFAULT_MMR_LAMBDA;
|
|
2027
|
+
}
|
|
2028
|
+
return Math.max(0, Math.min(1, rawLambda));
|
|
2029
|
+
}
|
|
2030
|
+
function resolveMmrMinPoolSize(policy) {
|
|
2031
|
+
const raw = policy?.mmrMinPoolSize;
|
|
2032
|
+
if (typeof raw !== "number" || !Number.isFinite(raw) || raw < 0) {
|
|
2033
|
+
return DEFAULT_MMR_MIN_POOL_SIZE;
|
|
2034
|
+
}
|
|
2035
|
+
return Math.floor(raw);
|
|
2036
|
+
}
|
|
1331
2037
|
function applyClaimKeyResultShaping(candidates, claimKeyTrace, slotPolicyConfig) {
|
|
1332
2038
|
if (candidates.length === 0) {
|
|
1333
2039
|
return candidates;
|
|
@@ -1399,21 +2105,85 @@ function resolveTrustedSlotRedundancyPenalty(entryId, trustedSlotRankById) {
|
|
|
1399
2105
|
function clampRecallScore(value) {
|
|
1400
2106
|
return Math.max(0, Math.min(1, value));
|
|
1401
2107
|
}
|
|
1402
|
-
function hasSufficientReturnEvidence(candidate) {
|
|
2108
|
+
function hasSufficientReturnEvidence(candidate, query) {
|
|
2109
|
+
if (query.rankingProfile === "entity_attribute") {
|
|
2110
|
+
return hasEntityAttributeEvidence(candidate.entry, query.queryShape);
|
|
2111
|
+
}
|
|
2112
|
+
const groundedLexicalSupport = hasGroundedLexicalSupport(candidate.entry, query.text);
|
|
1403
2113
|
if (candidate.scores.lexical > 0) {
|
|
1404
|
-
|
|
2114
|
+
if (groundedLexicalSupport) {
|
|
2115
|
+
return true;
|
|
2116
|
+
}
|
|
2117
|
+
return candidate.scores.vector >= MIN_VECTOR_WITHOUT_GROUNDED_LEXICAL_SUPPORT;
|
|
2118
|
+
}
|
|
2119
|
+
if (isWeaklyGroundedReminderQuery(query.text) && !groundedLexicalSupport) {
|
|
2120
|
+
return false;
|
|
1405
2121
|
}
|
|
1406
2122
|
return candidate.scores.vector >= MIN_VECTOR_ONLY_EVIDENCE;
|
|
1407
2123
|
}
|
|
2124
|
+
function hasGroundedLexicalSupport(entry, queryText) {
|
|
2125
|
+
const groundingTokens = getGroundingTokens(queryText);
|
|
2126
|
+
if (groundingTokens.length === 0) {
|
|
2127
|
+
return false;
|
|
2128
|
+
}
|
|
2129
|
+
const candidateTokens = new Set(tokenize(`${entry.subject} ${entry.content}`).map(canonicalizeRecallToken));
|
|
2130
|
+
return groundingTokens.some((token) => candidateTokens.has(token));
|
|
2131
|
+
}
|
|
2132
|
+
function isWeaklyGroundedReminderQuery(queryText) {
|
|
2133
|
+
return WEAKLY_GROUNDED_REMINDER_PATTERN.test(queryText);
|
|
2134
|
+
}
|
|
2135
|
+
function hasEntityAttributeEvidence(entry, queryShape) {
|
|
2136
|
+
if (queryShape?.kind !== "entity_attribute") {
|
|
2137
|
+
return false;
|
|
2138
|
+
}
|
|
2139
|
+
const normalizedSubject = normalizeEntityAttributeText(entry.subject);
|
|
2140
|
+
const normalizedContent = normalizeEntityAttributeText(entry.content);
|
|
2141
|
+
const combinedTokens = new Set(tokenize(`${entry.subject} ${entry.content}`));
|
|
2142
|
+
const entityTokenMatches = countTokenMatches(queryShape.entityTokens, combinedTokens);
|
|
2143
|
+
const attributeTokenMatches = countTokenMatches(queryShape.attributeTokens, combinedTokens);
|
|
2144
|
+
if (queryShape.attributeKind === "identity") {
|
|
2145
|
+
if (normalizedSubject === queryShape.normalizedEntity || isIdentityWrapperSubject(normalizedSubject, queryShape.normalizedEntity)) {
|
|
2146
|
+
return true;
|
|
2147
|
+
}
|
|
2148
|
+
}
|
|
2149
|
+
if ((containsNormalizedPhrase(normalizedSubject, queryShape.normalizedEntity) || containsNormalizedPhrase(normalizedContent, queryShape.normalizedEntity)) && (queryShape.entityTokens.length >= 2 || attributeTokenMatches >= 1)) {
|
|
2150
|
+
return true;
|
|
2151
|
+
}
|
|
2152
|
+
return entityTokenMatches >= 2 && attributeTokenMatches >= 1;
|
|
2153
|
+
}
|
|
2154
|
+
function countTokenMatches(expectedTokens, availableTokens) {
|
|
2155
|
+
let matches = 0;
|
|
2156
|
+
for (const token of expectedTokens) {
|
|
2157
|
+
if (availableTokens.has(token)) {
|
|
2158
|
+
matches += 1;
|
|
2159
|
+
}
|
|
2160
|
+
}
|
|
2161
|
+
return matches;
|
|
2162
|
+
}
|
|
2163
|
+
function isIdentityWrapperSubject(normalizedSubject, normalizedEntity) {
|
|
2164
|
+
return Array.from(ENTITY_ATTRIBUTE_IDENTITY_WRAPPERS).some((wrapper) => normalizedSubject === `${normalizedEntity} ${wrapper}`);
|
|
2165
|
+
}
|
|
2166
|
+
function containsNormalizedPhrase(normalizedText, normalizedPhrase) {
|
|
2167
|
+
return normalizedPhrase.length > 0 && normalizedText.includes(normalizedPhrase);
|
|
2168
|
+
}
|
|
2169
|
+
function normalizeEntityAttributeText(text) {
|
|
2170
|
+
return text.replace(/\s+/gu, " ").trim().normalize("NFKC").toLocaleLowerCase();
|
|
2171
|
+
}
|
|
1408
2172
|
function mergeCandidates(vectorCandidates, ftsCandidates) {
|
|
1409
2173
|
const merged = /* @__PURE__ */ new Map();
|
|
2174
|
+
const vectorRanks = [];
|
|
2175
|
+
const ftsRanks = [];
|
|
1410
2176
|
for (const candidate of vectorCandidates) {
|
|
2177
|
+
if (!merged.has(candidate.entry.id)) {
|
|
2178
|
+
vectorRanks.push(candidate.entry.id);
|
|
2179
|
+
}
|
|
1411
2180
|
merged.set(candidate.entry.id, {
|
|
1412
2181
|
entry: candidate.entry,
|
|
1413
2182
|
vectorSim: candidate.vectorSim
|
|
1414
2183
|
});
|
|
1415
2184
|
}
|
|
1416
2185
|
for (const candidate of ftsCandidates) {
|
|
2186
|
+
ftsRanks.push(candidate.entry.id);
|
|
1417
2187
|
const existing = merged.get(candidate.entry.id);
|
|
1418
2188
|
if (existing) {
|
|
1419
2189
|
existing.entry = existing.entry.embedding ? existing.entry : candidate.entry;
|
|
@@ -1423,7 +2193,11 @@ function mergeCandidates(vectorCandidates, ftsCandidates) {
|
|
|
1423
2193
|
entry: candidate.entry
|
|
1424
2194
|
});
|
|
1425
2195
|
}
|
|
1426
|
-
return
|
|
2196
|
+
return {
|
|
2197
|
+
merged,
|
|
2198
|
+
vectorRanks,
|
|
2199
|
+
ftsRanks
|
|
2200
|
+
};
|
|
1427
2201
|
}
|
|
1428
2202
|
function buildEntryFilters(types, tags, since, until) {
|
|
1429
2203
|
const filters = {};
|
|
@@ -1457,6 +2231,134 @@ function applyBudget(results, budget) {
|
|
|
1457
2231
|
}
|
|
1458
2232
|
return accepted;
|
|
1459
2233
|
}
|
|
2234
|
+
function sortAcceptedCandidates(candidates, queryText, rankingProfile) {
|
|
2235
|
+
if (rankingProfile === "historical_state" || rankingProfile === "entity_attribute") {
|
|
2236
|
+
return candidates.map((candidate, index) => ({ candidate, index })).sort((left, right) => right.candidate.score - left.candidate.score || left.index - right.index).map(({ candidate }) => candidate);
|
|
2237
|
+
}
|
|
2238
|
+
const groundingTokens = getGroundingTokens(queryText);
|
|
2239
|
+
return candidates.map((candidate, index) => ({
|
|
2240
|
+
candidate,
|
|
2241
|
+
index,
|
|
2242
|
+
grounding: computeGroundingSupport(candidate.entry, groundingTokens)
|
|
2243
|
+
})).sort((left, right) => {
|
|
2244
|
+
const scoreGap = Math.abs(left.candidate.score - right.candidate.score);
|
|
2245
|
+
if (scoreGap > GROUNDING_SORT_MAX_SCORE_GAP || hasStructuralScoreShaping(left.candidate) || hasStructuralScoreShaping(right.candidate)) {
|
|
2246
|
+
if (left.candidate.score !== right.candidate.score) {
|
|
2247
|
+
return right.candidate.score - left.candidate.score;
|
|
2248
|
+
}
|
|
2249
|
+
return left.index - right.index;
|
|
2250
|
+
}
|
|
2251
|
+
if (left.grounding.phraseMatches !== right.grounding.phraseMatches) {
|
|
2252
|
+
return right.grounding.phraseMatches - left.grounding.phraseMatches;
|
|
2253
|
+
}
|
|
2254
|
+
if (left.grounding.coverage !== right.grounding.coverage) {
|
|
2255
|
+
return right.grounding.coverage - left.grounding.coverage;
|
|
2256
|
+
}
|
|
2257
|
+
if (left.candidate.scores.lexical !== right.candidate.scores.lexical) {
|
|
2258
|
+
return right.candidate.scores.lexical - left.candidate.scores.lexical;
|
|
2259
|
+
}
|
|
2260
|
+
if (left.candidate.score !== right.candidate.score) {
|
|
2261
|
+
return right.candidate.score - left.candidate.score;
|
|
2262
|
+
}
|
|
2263
|
+
if (left.candidate.scores.vector !== right.candidate.scores.vector) {
|
|
2264
|
+
return right.candidate.scores.vector - left.candidate.scores.vector;
|
|
2265
|
+
}
|
|
2266
|
+
return left.index - right.index;
|
|
2267
|
+
}).map(({ candidate }) => candidate);
|
|
2268
|
+
}
|
|
2269
|
+
function hasStructuralScoreShaping(candidate) {
|
|
2270
|
+
return candidate.scores.historicalLineage > 0 || candidate.scores.neighborhoodBoost > 0 || candidate.scores.claimKeyTrustPenalty > 0 || candidate.scores.claimKeyRedundancyPenalty > 0;
|
|
2271
|
+
}
|
|
2272
|
+
function getGroundingTokens(queryText) {
|
|
2273
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2274
|
+
const groundingTokens = [];
|
|
2275
|
+
for (const token of tokenize(queryText)) {
|
|
2276
|
+
if (WEAK_QUERY_GROUNDING_TOKENS.has(token)) {
|
|
2277
|
+
continue;
|
|
2278
|
+
}
|
|
2279
|
+
const canonical = canonicalizeRecallToken(token);
|
|
2280
|
+
if (seen.has(canonical)) {
|
|
2281
|
+
continue;
|
|
2282
|
+
}
|
|
2283
|
+
seen.add(canonical);
|
|
2284
|
+
groundingTokens.push(canonical);
|
|
2285
|
+
}
|
|
2286
|
+
return groundingTokens;
|
|
2287
|
+
}
|
|
2288
|
+
function canonicalizeRecallToken(token) {
|
|
2289
|
+
const normalized = token.normalize("NFKC").toLocaleLowerCase();
|
|
2290
|
+
if (normalized === "db" || normalized === "database" || normalized === "databases") {
|
|
2291
|
+
return "db";
|
|
2292
|
+
}
|
|
2293
|
+
if (normalized === "resolve" || normalized === "resolves" || normalized === "resolved" || normalized === "resolving" || normalized === "resolution") {
|
|
2294
|
+
return "resolve";
|
|
2295
|
+
}
|
|
2296
|
+
if (normalized === "branches") {
|
|
2297
|
+
return "branch";
|
|
2298
|
+
}
|
|
2299
|
+
if (normalized === "prefix" || normalized === "prefixes") {
|
|
2300
|
+
return "prefix";
|
|
2301
|
+
}
|
|
2302
|
+
if (normalized.endsWith("ies") && normalized.length > 4) {
|
|
2303
|
+
return `${normalized.slice(0, -3)}y`;
|
|
2304
|
+
}
|
|
2305
|
+
if (normalized.endsWith("es") && normalized.length > 4) {
|
|
2306
|
+
return normalized.slice(0, -2);
|
|
2307
|
+
}
|
|
2308
|
+
if (normalized.endsWith("s") && normalized.length > 3) {
|
|
2309
|
+
return normalized.slice(0, -1);
|
|
2310
|
+
}
|
|
2311
|
+
return normalized;
|
|
2312
|
+
}
|
|
2313
|
+
function computeGroundingSupport(entry, groundingTokens) {
|
|
2314
|
+
if (groundingTokens.length === 0) {
|
|
2315
|
+
return {
|
|
2316
|
+
phraseMatches: 0,
|
|
2317
|
+
coverage: 0
|
|
2318
|
+
};
|
|
2319
|
+
}
|
|
2320
|
+
const subjectTokens = tokenize(entry.subject).map(canonicalizeRecallToken);
|
|
2321
|
+
const contentTokens = tokenize(entry.content).map(canonicalizeRecallToken);
|
|
2322
|
+
const candidateTokens = /* @__PURE__ */ new Set([...subjectTokens, ...contentTokens]);
|
|
2323
|
+
const matchedTokens = groundingTokens.filter((token) => candidateTokens.has(token));
|
|
2324
|
+
return {
|
|
2325
|
+
phraseMatches: countCanonicalPhraseMatches(groundingTokens, subjectTokens, contentTokens),
|
|
2326
|
+
coverage: matchedTokens.length / groundingTokens.length
|
|
2327
|
+
};
|
|
2328
|
+
}
|
|
2329
|
+
function countCanonicalPhraseMatches(queryTokens, subjectTokens, contentTokens) {
|
|
2330
|
+
if (queryTokens.length < 2) {
|
|
2331
|
+
return 0;
|
|
2332
|
+
}
|
|
2333
|
+
const matchedPhrases = /* @__PURE__ */ new Set();
|
|
2334
|
+
for (let size = 2; size <= queryTokens.length; size += 1) {
|
|
2335
|
+
for (let index = 0; index + size <= queryTokens.length; index += 1) {
|
|
2336
|
+
const phraseTokens = queryTokens.slice(index, index + size);
|
|
2337
|
+
if (hasCanonicalConsecutivePhrase(subjectTokens, phraseTokens) || hasCanonicalConsecutivePhrase(contentTokens, phraseTokens)) {
|
|
2338
|
+
matchedPhrases.add(phraseTokens.join(" "));
|
|
2339
|
+
}
|
|
2340
|
+
}
|
|
2341
|
+
}
|
|
2342
|
+
return matchedPhrases.size;
|
|
2343
|
+
}
|
|
2344
|
+
function hasCanonicalConsecutivePhrase(haystack, needle) {
|
|
2345
|
+
if (needle.length === 0 || haystack.length < needle.length) {
|
|
2346
|
+
return false;
|
|
2347
|
+
}
|
|
2348
|
+
for (let index = 0; index + needle.length <= haystack.length; index += 1) {
|
|
2349
|
+
let matches = true;
|
|
2350
|
+
for (let offset = 0; offset < needle.length; offset += 1) {
|
|
2351
|
+
if (haystack[index + offset] !== needle[offset]) {
|
|
2352
|
+
matches = false;
|
|
2353
|
+
break;
|
|
2354
|
+
}
|
|
2355
|
+
}
|
|
2356
|
+
if (matches) {
|
|
2357
|
+
return true;
|
|
2358
|
+
}
|
|
2359
|
+
}
|
|
2360
|
+
return false;
|
|
2361
|
+
}
|
|
1460
2362
|
function estimateTokens(entry) {
|
|
1461
2363
|
return (entry.subject.length + entry.content.length) / 4;
|
|
1462
2364
|
}
|
|
@@ -1487,7 +2389,7 @@ function normalizeAroundRadius(value) {
|
|
|
1487
2389
|
}
|
|
1488
2390
|
return value;
|
|
1489
2391
|
}
|
|
1490
|
-
function
|
|
2392
|
+
function elapsedMs2(startedAt) {
|
|
1491
2393
|
return Math.max(0, Date.now() - startedAt);
|
|
1492
2394
|
}
|
|
1493
2395
|
|
|
@@ -1495,7 +2397,6 @@ export {
|
|
|
1495
2397
|
recencyScore,
|
|
1496
2398
|
gaussianRecency,
|
|
1497
2399
|
importanceScore,
|
|
1498
|
-
combinedRelevance,
|
|
1499
2400
|
scoreCandidate,
|
|
1500
2401
|
cosineSimilarity,
|
|
1501
2402
|
normalizeClaimKeySegment,
|
|
@@ -1513,5 +2414,23 @@ export {
|
|
|
1513
2414
|
inferAroundDate,
|
|
1514
2415
|
parseRelativeDate,
|
|
1515
2416
|
resolveClaimSlotPolicy,
|
|
2417
|
+
DEFAULT_CROSS_ENCODER_TOP_K,
|
|
2418
|
+
DEFAULT_CROSS_ENCODER_ALPHA,
|
|
2419
|
+
applyCrossEncoderRerank,
|
|
2420
|
+
DEFAULT_RRF_RANK_CONSTANT,
|
|
2421
|
+
rrfFuse,
|
|
2422
|
+
rrfFuseVectorLexical,
|
|
2423
|
+
DEFAULT_MMR_LAMBDA,
|
|
2424
|
+
NEAR_DUPLICATE_SIMILARITY,
|
|
2425
|
+
maximalMarginalRelevance,
|
|
2426
|
+
DEFAULT_NEIGHBORHOOD_BUDGET,
|
|
2427
|
+
DEFAULT_STRONG_SEED_TOP_N,
|
|
2428
|
+
DEFAULT_STRONG_SEED_SCORE_GAP,
|
|
2429
|
+
DEFAULT_SEEDED_RERANK_WEIGHT,
|
|
2430
|
+
selectStrongSeeds,
|
|
2431
|
+
seededRerank,
|
|
2432
|
+
sharesEntryLineage,
|
|
2433
|
+
sharesEpisodeLineage,
|
|
2434
|
+
sharesProcedureLineage,
|
|
1516
2435
|
recall
|
|
1517
2436
|
};
|