@khanglvm/outline-cli 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.test.example +2 -0
- package/AGENTS.md +107 -0
- package/CHANGELOG.md +102 -0
- package/README.md +244 -0
- package/bin/outline-agent.js +5 -0
- package/bin/outline-cli.js +13 -0
- package/package.json +25 -0
- package/scripts/generate-entry-integrity.mjs +123 -0
- package/scripts/release.mjs +353 -0
- package/src/action-gate.js +257 -0
- package/src/agent-skills.js +759 -0
- package/src/cli.js +956 -0
- package/src/config-store.js +720 -0
- package/src/entry-integrity-binding.generated.js +6 -0
- package/src/entry-integrity-manifest.generated.js +74 -0
- package/src/entry-integrity.js +112 -0
- package/src/errors.js +15 -0
- package/src/outline-client.js +237 -0
- package/src/result-store.js +183 -0
- package/src/secure-keyring.js +290 -0
- package/src/tool-arg-schemas.js +2346 -0
- package/src/tools.extended.js +3252 -0
- package/src/tools.js +1056 -0
- package/src/tools.mutation.js +1807 -0
- package/src/tools.navigation.js +2273 -0
- package/src/tools.platform.js +554 -0
- package/src/utils.js +176 -0
- package/test/action-gate.unit.test.js +157 -0
- package/test/agent-skills.unit.test.js +52 -0
- package/test/config-store.unit.test.js +89 -0
- package/test/hardening.unit.test.js +3778 -0
- package/test/live.integration.test.js +5140 -0
- package/test/profile-selection.unit.test.js +279 -0
- package/test/security.unit.test.js +113 -0
|
@@ -0,0 +1,2273 @@
|
|
|
1
|
+
import { CliError } from "./errors.js";
|
|
2
|
+
import { compactValue, ensureStringArray, mapLimit, toInteger } from "./utils.js";
|
|
3
|
+
|
|
4
|
+
function normalizeDocumentRow(row, view = "summary", excerptChars = 220) {
|
|
5
|
+
if (!row) {
|
|
6
|
+
return null;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
if (view === "full") {
|
|
10
|
+
return row;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
const summary = {
|
|
14
|
+
id: row.id,
|
|
15
|
+
title: row.title,
|
|
16
|
+
collectionId: row.collectionId,
|
|
17
|
+
parentDocumentId: row.parentDocumentId,
|
|
18
|
+
updatedAt: row.updatedAt,
|
|
19
|
+
publishedAt: row.publishedAt,
|
|
20
|
+
urlId: row.urlId,
|
|
21
|
+
emoji: row.emoji,
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
if (view === "ids") {
|
|
25
|
+
return {
|
|
26
|
+
id: summary.id,
|
|
27
|
+
title: summary.title,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (row.text) {
|
|
32
|
+
summary.excerpt = row.text.length > excerptChars ? `${row.text.slice(0, excerptChars)}...` : row.text;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return summary;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function normalizeSearchHit(hit, view = "summary", contextChars = 220) {
|
|
39
|
+
const doc = hit?.document || hit;
|
|
40
|
+
if (!doc) {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (view === "full") {
|
|
45
|
+
return hit;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const context = typeof hit?.context === "string" ? hit.context : "";
|
|
49
|
+
const summary = {
|
|
50
|
+
id: doc.id,
|
|
51
|
+
title: doc.title,
|
|
52
|
+
collectionId: doc.collectionId,
|
|
53
|
+
parentDocumentId: doc.parentDocumentId,
|
|
54
|
+
updatedAt: doc.updatedAt,
|
|
55
|
+
publishedAt: doc.publishedAt,
|
|
56
|
+
urlId: doc.urlId,
|
|
57
|
+
ranking: Number.isFinite(Number(hit?.ranking)) ? Number(hit.ranking) : undefined,
|
|
58
|
+
context: context ? (context.length > contextChars ? `${context.slice(0, contextChars)}...` : context) : undefined,
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
if (view === "ids") {
|
|
62
|
+
return {
|
|
63
|
+
id: summary.id,
|
|
64
|
+
title: summary.title,
|
|
65
|
+
ranking: summary.ranking,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return summary;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function normalizeRanking(ranking) {
|
|
73
|
+
const val = Number(ranking);
|
|
74
|
+
if (!Number.isFinite(val)) {
|
|
75
|
+
return 0;
|
|
76
|
+
}
|
|
77
|
+
if (val < 0) {
|
|
78
|
+
return 0;
|
|
79
|
+
}
|
|
80
|
+
if (val <= 1) {
|
|
81
|
+
return val;
|
|
82
|
+
}
|
|
83
|
+
return Math.min(1, val / 10);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function tokenize(text) {
|
|
87
|
+
return String(text || "")
|
|
88
|
+
.toLowerCase()
|
|
89
|
+
.split(/[^a-z0-9]+/)
|
|
90
|
+
.filter(Boolean);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function uniqueStrings(values = []) {
|
|
94
|
+
const out = [];
|
|
95
|
+
const seen = new Set();
|
|
96
|
+
for (const value of values) {
|
|
97
|
+
const trimmed = String(value || "").trim();
|
|
98
|
+
if (!trimmed || seen.has(trimmed)) {
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
seen.add(trimmed);
|
|
102
|
+
out.push(trimmed);
|
|
103
|
+
}
|
|
104
|
+
return out;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const QUERY_STOP_WORDS = new Set([
|
|
108
|
+
"a",
|
|
109
|
+
"an",
|
|
110
|
+
"and",
|
|
111
|
+
"are",
|
|
112
|
+
"as",
|
|
113
|
+
"at",
|
|
114
|
+
"be",
|
|
115
|
+
"by",
|
|
116
|
+
"for",
|
|
117
|
+
"from",
|
|
118
|
+
"how",
|
|
119
|
+
"i",
|
|
120
|
+
"in",
|
|
121
|
+
"is",
|
|
122
|
+
"it",
|
|
123
|
+
"of",
|
|
124
|
+
"on",
|
|
125
|
+
"or",
|
|
126
|
+
"our",
|
|
127
|
+
"that",
|
|
128
|
+
"the",
|
|
129
|
+
"their",
|
|
130
|
+
"this",
|
|
131
|
+
"to",
|
|
132
|
+
"we",
|
|
133
|
+
"what",
|
|
134
|
+
"when",
|
|
135
|
+
"where",
|
|
136
|
+
"which",
|
|
137
|
+
"who",
|
|
138
|
+
"why",
|
|
139
|
+
"with",
|
|
140
|
+
"you",
|
|
141
|
+
]);
|
|
142
|
+
|
|
143
|
+
function normalizeSearchRanking(value) {
|
|
144
|
+
const ranking = Number(value);
|
|
145
|
+
if (!Number.isFinite(ranking)) {
|
|
146
|
+
return 0;
|
|
147
|
+
}
|
|
148
|
+
if (ranking < 0) {
|
|
149
|
+
return 0;
|
|
150
|
+
}
|
|
151
|
+
if (ranking <= 1) {
|
|
152
|
+
return ranking;
|
|
153
|
+
}
|
|
154
|
+
return Math.min(1, ranking / 10);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function normalizeResearchPrecisionMode(mode = "balanced") {
|
|
158
|
+
const normalized = String(mode || "balanced").trim().toLowerCase();
|
|
159
|
+
if (["balanced", "precision", "recall"].includes(normalized)) {
|
|
160
|
+
return normalized;
|
|
161
|
+
}
|
|
162
|
+
return "balanced";
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function getResearchModeConfig(mode = "balanced") {
|
|
166
|
+
const precisionMode = normalizeResearchPrecisionMode(mode);
|
|
167
|
+
if (precisionMode === "precision") {
|
|
168
|
+
return {
|
|
169
|
+
precisionMode,
|
|
170
|
+
sourceWeights: { titles: 1.4, semantic: 0.9 },
|
|
171
|
+
scoreWeights: {
|
|
172
|
+
confidence: 0.48,
|
|
173
|
+
rrf: 0.2,
|
|
174
|
+
queryCoverage: 0.17,
|
|
175
|
+
sourceCoverage: 0.1,
|
|
176
|
+
recency: 0.05,
|
|
177
|
+
},
|
|
178
|
+
mmrLambda: 0.82,
|
|
179
|
+
minScore: 0.42,
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if (precisionMode === "recall") {
|
|
184
|
+
return {
|
|
185
|
+
precisionMode,
|
|
186
|
+
sourceWeights: { titles: 1.05, semantic: 1.25 },
|
|
187
|
+
scoreWeights: {
|
|
188
|
+
confidence: 0.34,
|
|
189
|
+
rrf: 0.34,
|
|
190
|
+
queryCoverage: 0.16,
|
|
191
|
+
sourceCoverage: 0.06,
|
|
192
|
+
recency: 0.1,
|
|
193
|
+
},
|
|
194
|
+
mmrLambda: 0.66,
|
|
195
|
+
minScore: 0.2,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return {
|
|
200
|
+
precisionMode: "balanced",
|
|
201
|
+
sourceWeights: { titles: 1.25, semantic: 1 },
|
|
202
|
+
scoreWeights: {
|
|
203
|
+
confidence: 0.43,
|
|
204
|
+
rrf: 0.27,
|
|
205
|
+
queryCoverage: 0.14,
|
|
206
|
+
sourceCoverage: 0.08,
|
|
207
|
+
recency: 0.08,
|
|
208
|
+
},
|
|
209
|
+
mmrLambda: 0.74,
|
|
210
|
+
minScore: 0,
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function clamp(value, min, max) {
|
|
215
|
+
return Math.min(max, Math.max(min, value));
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function recencySignal(updatedAt) {
|
|
219
|
+
const ts = Number.isFinite(Date.parse(updatedAt || "")) ? Date.parse(updatedAt) : 0;
|
|
220
|
+
if (!ts) {
|
|
221
|
+
return 0;
|
|
222
|
+
}
|
|
223
|
+
const ageMs = Math.max(0, Date.now() - ts);
|
|
224
|
+
const ageDays = ageMs / (24 * 3600 * 1000);
|
|
225
|
+
if (ageDays <= 1) {
|
|
226
|
+
return 1;
|
|
227
|
+
}
|
|
228
|
+
if (ageDays <= 7) {
|
|
229
|
+
return 0.9;
|
|
230
|
+
}
|
|
231
|
+
if (ageDays <= 30) {
|
|
232
|
+
return 0.75;
|
|
233
|
+
}
|
|
234
|
+
if (ageDays <= 90) {
|
|
235
|
+
return 0.55;
|
|
236
|
+
}
|
|
237
|
+
if (ageDays <= 365) {
|
|
238
|
+
return 0.35;
|
|
239
|
+
}
|
|
240
|
+
return 0.15;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function tokenJaccardSimilarity(a, b) {
|
|
244
|
+
const aTokens = new Set(tokenize(a));
|
|
245
|
+
const bTokens = new Set(tokenize(b));
|
|
246
|
+
if (aTokens.size === 0 && bTokens.size === 0) {
|
|
247
|
+
return 0;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
let intersection = 0;
|
|
251
|
+
for (const token of aTokens) {
|
|
252
|
+
if (bTokens.has(token)) {
|
|
253
|
+
intersection += 1;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
const union = aTokens.size + bTokens.size - intersection;
|
|
257
|
+
if (union <= 0) {
|
|
258
|
+
return 0;
|
|
259
|
+
}
|
|
260
|
+
return intersection / union;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function diversifyRankedRows(rows, limit, lambda = 0.74) {
|
|
264
|
+
const maxItems = Math.max(0, Math.min(limit, rows.length));
|
|
265
|
+
if (maxItems <= 1) {
|
|
266
|
+
return rows.slice(0, maxItems);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
const selected = [];
|
|
270
|
+
const remaining = [...rows];
|
|
271
|
+
const safeLambda = clamp(Number(lambda), 0, 1);
|
|
272
|
+
|
|
273
|
+
while (selected.length < maxItems && remaining.length > 0) {
|
|
274
|
+
let bestIndex = 0;
|
|
275
|
+
let bestScore = -Infinity;
|
|
276
|
+
|
|
277
|
+
for (let i = 0; i < remaining.length; i += 1) {
|
|
278
|
+
const candidate = remaining[i];
|
|
279
|
+
const relevance = Number(candidate.score || 0);
|
|
280
|
+
let maxSimilarity = 0;
|
|
281
|
+
for (const picked of selected) {
|
|
282
|
+
const sim = tokenJaccardSimilarity(
|
|
283
|
+
`${candidate.title || ""} ${candidate.queries?.join(" ") || ""}`,
|
|
284
|
+
`${picked.title || ""} ${picked.queries?.join(" ") || ""}`
|
|
285
|
+
);
|
|
286
|
+
if (sim > maxSimilarity) {
|
|
287
|
+
maxSimilarity = sim;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
const mmrScore = safeLambda * relevance - (1 - safeLambda) * maxSimilarity;
|
|
291
|
+
if (mmrScore > bestScore) {
|
|
292
|
+
bestScore = mmrScore;
|
|
293
|
+
bestIndex = i;
|
|
294
|
+
continue;
|
|
295
|
+
}
|
|
296
|
+
if (mmrScore === bestScore) {
|
|
297
|
+
const bestCandidate = remaining[bestIndex];
|
|
298
|
+
if (Number(candidate.score || 0) > Number(bestCandidate.score || 0)) {
|
|
299
|
+
bestIndex = i;
|
|
300
|
+
continue;
|
|
301
|
+
}
|
|
302
|
+
if (
|
|
303
|
+
Number(candidate.score || 0) === Number(bestCandidate.score || 0) &&
|
|
304
|
+
String(candidate.title || "").localeCompare(String(bestCandidate.title || "")) < 0
|
|
305
|
+
) {
|
|
306
|
+
bestIndex = i;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
selected.push(remaining.splice(bestIndex, 1)[0]);
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
return selected;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function buildResearchQueries(args) {
|
|
318
|
+
const out = [];
|
|
319
|
+
const add = (value) => {
|
|
320
|
+
const trimmed = String(value || "").trim();
|
|
321
|
+
if (!trimmed) {
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
324
|
+
if (!out.includes(trimmed)) {
|
|
325
|
+
out.push(trimmed);
|
|
326
|
+
}
|
|
327
|
+
};
|
|
328
|
+
|
|
329
|
+
const queries = ensureStringArray(args.queries, "queries") || [];
|
|
330
|
+
for (const query of queries) {
|
|
331
|
+
add(query);
|
|
332
|
+
}
|
|
333
|
+
add(args.query);
|
|
334
|
+
add(args.question);
|
|
335
|
+
|
|
336
|
+
if (out.length === 0) {
|
|
337
|
+
throw new CliError("search.research requires args.question, args.query, or args.queries[]");
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
return out;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
function tokenFrequency(values, existingTerms = new Set()) {
|
|
344
|
+
const counts = new Map();
|
|
345
|
+
for (const value of values) {
|
|
346
|
+
for (const token of tokenize(value)) {
|
|
347
|
+
if (token.length < 4 || QUERY_STOP_WORDS.has(token) || existingTerms.has(token)) {
|
|
348
|
+
continue;
|
|
349
|
+
}
|
|
350
|
+
counts.set(token, (counts.get(token) || 0) + 1);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
return Array.from(counts.entries())
|
|
354
|
+
.sort((a, b) => {
|
|
355
|
+
if (b[1] !== a[1]) {
|
|
356
|
+
return b[1] - a[1];
|
|
357
|
+
}
|
|
358
|
+
return a[0].localeCompare(b[0]);
|
|
359
|
+
})
|
|
360
|
+
.map(([token]) => token);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
function buildFollowUpQueries(merged, queries, limit = 6) {
|
|
364
|
+
const existingTerms = new Set();
|
|
365
|
+
for (const query of queries) {
|
|
366
|
+
for (const token of tokenize(query)) {
|
|
367
|
+
existingTerms.add(token);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
const top = merged.slice(0, 20);
|
|
372
|
+
const tokens = tokenFrequency(
|
|
373
|
+
top.flatMap((row) => [row.title, ...(row.evidence || []).map((ev) => ev.context)]),
|
|
374
|
+
existingTerms
|
|
375
|
+
);
|
|
376
|
+
|
|
377
|
+
return tokens.slice(0, limit);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
function shapeResearchMergedRow(row, view, excerptChars, evidencePerDocument = 5) {
|
|
381
|
+
if (view === "full") {
|
|
382
|
+
return row;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
const summary = {
|
|
386
|
+
id: row.id,
|
|
387
|
+
title: row.title,
|
|
388
|
+
score: row.score,
|
|
389
|
+
queryMatches: row.queryMatches,
|
|
390
|
+
sources: row.sources,
|
|
391
|
+
ranking: row.ranking,
|
|
392
|
+
collectionId: row.collectionId,
|
|
393
|
+
parentDocumentId: row.parentDocumentId,
|
|
394
|
+
updatedAt: row.updatedAt,
|
|
395
|
+
publishedAt: row.publishedAt,
|
|
396
|
+
urlId: row.urlId,
|
|
397
|
+
evidenceCount: Array.isArray(row.evidence) ? row.evidence.length : 0,
|
|
398
|
+
};
|
|
399
|
+
|
|
400
|
+
if (view === "ids") {
|
|
401
|
+
return {
|
|
402
|
+
id: summary.id,
|
|
403
|
+
title: summary.title,
|
|
404
|
+
score: summary.score,
|
|
405
|
+
queryMatches: summary.queryMatches,
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
if (row.text) {
|
|
410
|
+
summary.excerpt = row.text.length > excerptChars ? `${row.text.slice(0, excerptChars)}...` : row.text;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
if (Array.isArray(row.evidence)) {
|
|
414
|
+
summary.evidence = row.evidence.slice(0, evidencePerDocument);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
return summary;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function normalizeResearchTitleHit(query, row, contextChars, sourceRank) {
|
|
421
|
+
if (!row?.id) {
|
|
422
|
+
return null;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
const confidence = computeConfidence({
|
|
426
|
+
query,
|
|
427
|
+
title: row.title,
|
|
428
|
+
source: "titles",
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
return {
|
|
432
|
+
id: row.id,
|
|
433
|
+
title: row.title,
|
|
434
|
+
collectionId: row.collectionId,
|
|
435
|
+
parentDocumentId: row.parentDocumentId,
|
|
436
|
+
updatedAt: row.updatedAt,
|
|
437
|
+
publishedAt: row.publishedAt,
|
|
438
|
+
urlId: row.urlId,
|
|
439
|
+
text: row.text,
|
|
440
|
+
ranking: undefined,
|
|
441
|
+
scoreContribution: confidence,
|
|
442
|
+
source: "titles",
|
|
443
|
+
query,
|
|
444
|
+
sourceRank,
|
|
445
|
+
context: undefined,
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
function normalizeResearchSemanticHit(query, row, contextChars, sourceRank) {
|
|
450
|
+
const doc = row?.document;
|
|
451
|
+
if (!doc?.id) {
|
|
452
|
+
return null;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
const confidence = computeConfidence({
|
|
456
|
+
query,
|
|
457
|
+
title: doc.title,
|
|
458
|
+
source: "semantic",
|
|
459
|
+
ranking: row.ranking,
|
|
460
|
+
});
|
|
461
|
+
const context = typeof row.context === "string" ? row.context : "";
|
|
462
|
+
|
|
463
|
+
return {
|
|
464
|
+
id: doc.id,
|
|
465
|
+
title: doc.title,
|
|
466
|
+
collectionId: doc.collectionId,
|
|
467
|
+
parentDocumentId: doc.parentDocumentId,
|
|
468
|
+
updatedAt: doc.updatedAt,
|
|
469
|
+
publishedAt: doc.publishedAt,
|
|
470
|
+
urlId: doc.urlId,
|
|
471
|
+
text: doc.text,
|
|
472
|
+
ranking: Number.isFinite(Number(row.ranking)) ? Number(row.ranking) : undefined,
|
|
473
|
+
scoreContribution: confidence,
|
|
474
|
+
source: "semantic",
|
|
475
|
+
query,
|
|
476
|
+
sourceRank,
|
|
477
|
+
context: context ? (context.length > contextChars ? `${context.slice(0, contextChars)}...` : context) : undefined,
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
function mergeResearchHits(rawHits, seenIds = [], options = {}) {
|
|
482
|
+
const modeConfig = getResearchModeConfig(options.precisionMode || "balanced");
|
|
483
|
+
const totalQueries = Math.max(1, Number(options.totalQueries || 1));
|
|
484
|
+
const enabledSourceCount = Math.max(1, Number(options.enabledSourceCount || 2));
|
|
485
|
+
const rrfK = Math.max(1, toInteger(options.rrfK, 60));
|
|
486
|
+
const minScore = Number.isFinite(Number(options.minScore))
|
|
487
|
+
? clamp(Number(options.minScore), 0, 1)
|
|
488
|
+
: modeConfig.minScore;
|
|
489
|
+
|
|
490
|
+
const seenSet = new Set((seenIds || []).map((id) => String(id)));
|
|
491
|
+
const mergedMap = new Map();
|
|
492
|
+
let skippedSeen = 0;
|
|
493
|
+
|
|
494
|
+
for (const hit of rawHits) {
|
|
495
|
+
if (!hit?.id) {
|
|
496
|
+
continue;
|
|
497
|
+
}
|
|
498
|
+
if (seenSet.has(hit.id)) {
|
|
499
|
+
skippedSeen += 1;
|
|
500
|
+
continue;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
const existing = mergedMap.get(hit.id);
|
|
504
|
+
const source = String(hit.source || "semantic");
|
|
505
|
+
const sourceWeight = modeConfig.sourceWeights[source] || 1;
|
|
506
|
+
const sourceRank = Number.isFinite(Number(hit.sourceRank)) ? Math.max(1, Number(hit.sourceRank)) : 1;
|
|
507
|
+
const rrfContribution = sourceWeight / (rrfK + sourceRank);
|
|
508
|
+
const contribution = clamp(Number(hit.scoreContribution || 0), 0, 1);
|
|
509
|
+
const evidenceRow = {
|
|
510
|
+
query: hit.query,
|
|
511
|
+
source: hit.source,
|
|
512
|
+
sourceRank,
|
|
513
|
+
ranking: hit.ranking,
|
|
514
|
+
scoreContribution: hit.scoreContribution,
|
|
515
|
+
context: hit.context,
|
|
516
|
+
};
|
|
517
|
+
|
|
518
|
+
if (!existing) {
|
|
519
|
+
mergedMap.set(hit.id, {
|
|
520
|
+
id: hit.id,
|
|
521
|
+
title: hit.title,
|
|
522
|
+
collectionId: hit.collectionId,
|
|
523
|
+
parentDocumentId: hit.parentDocumentId,
|
|
524
|
+
updatedAt: hit.updatedAt,
|
|
525
|
+
publishedAt: hit.publishedAt,
|
|
526
|
+
urlId: hit.urlId,
|
|
527
|
+
text: hit.text,
|
|
528
|
+
ranking: hit.ranking,
|
|
529
|
+
score: contribution,
|
|
530
|
+
confidenceMax: contribution,
|
|
531
|
+
confidenceSum: contribution,
|
|
532
|
+
confidenceCount: 1,
|
|
533
|
+
rrf: rrfContribution,
|
|
534
|
+
queryMatches: 1,
|
|
535
|
+
sources: [hit.source],
|
|
536
|
+
queries: [hit.query],
|
|
537
|
+
evidence: [evidenceRow],
|
|
538
|
+
});
|
|
539
|
+
continue;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
if (!existing.sources.includes(hit.source)) {
|
|
543
|
+
existing.sources.push(hit.source);
|
|
544
|
+
}
|
|
545
|
+
if (!existing.queries.includes(hit.query)) {
|
|
546
|
+
existing.queries.push(hit.query);
|
|
547
|
+
existing.queryMatches += 1;
|
|
548
|
+
}
|
|
549
|
+
existing.score = Math.max(existing.score, contribution);
|
|
550
|
+
existing.confidenceMax = Math.max(existing.confidenceMax || 0, contribution);
|
|
551
|
+
existing.confidenceSum = Number(existing.confidenceSum || 0) + contribution;
|
|
552
|
+
existing.confidenceCount = Number(existing.confidenceCount || 0) + 1;
|
|
553
|
+
existing.rrf = Number(existing.rrf || 0) + rrfContribution;
|
|
554
|
+
existing.evidence.push(evidenceRow);
|
|
555
|
+
if (existing.ranking === undefined && hit.ranking !== undefined) {
|
|
556
|
+
existing.ranking = hit.ranking;
|
|
557
|
+
}
|
|
558
|
+
if (!existing.text && hit.text) {
|
|
559
|
+
existing.text = hit.text;
|
|
560
|
+
}
|
|
561
|
+
if (!existing.updatedAt && hit.updatedAt) {
|
|
562
|
+
existing.updatedAt = hit.updatedAt;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
const mergedBase = Array.from(mergedMap.values());
|
|
567
|
+
const maxRrf = mergedBase.reduce((max, row) => Math.max(max, Number(row.rrf || 0)), 0) || 1;
|
|
568
|
+
const scoreWeights = modeConfig.scoreWeights;
|
|
569
|
+
|
|
570
|
+
const merged = mergedBase
|
|
571
|
+
.map((row) => {
|
|
572
|
+
const rankingSignal = normalizeSearchRanking(row.ranking);
|
|
573
|
+
const confidenceSignal = clamp(
|
|
574
|
+
Math.max(
|
|
575
|
+
Number(row.confidenceMax || 0),
|
|
576
|
+
row.confidenceCount > 0 ? Number(row.confidenceSum || 0) / row.confidenceCount : 0
|
|
577
|
+
),
|
|
578
|
+
0,
|
|
579
|
+
1
|
|
580
|
+
);
|
|
581
|
+
const rrfSignal = clamp(Number(row.rrf || 0) / maxRrf, 0, 1);
|
|
582
|
+
const queryCoverage = clamp(Number(row.queryMatches || 0) / totalQueries, 0, 1);
|
|
583
|
+
const sourceCoverage = clamp(Number(row.sources?.length || 0) / enabledSourceCount, 0, 1);
|
|
584
|
+
const recency = recencySignal(row.updatedAt);
|
|
585
|
+
|
|
586
|
+
const finalScore = clamp(
|
|
587
|
+
scoreWeights.confidence * Math.max(confidenceSignal, rankingSignal * 0.55) +
|
|
588
|
+
scoreWeights.rrf * rrfSignal +
|
|
589
|
+
scoreWeights.queryCoverage * queryCoverage +
|
|
590
|
+
scoreWeights.sourceCoverage * sourceCoverage +
|
|
591
|
+
scoreWeights.recency * recency,
|
|
592
|
+
0,
|
|
593
|
+
1
|
|
594
|
+
);
|
|
595
|
+
|
|
596
|
+
return {
|
|
597
|
+
...row,
|
|
598
|
+
score: Number(finalScore.toFixed(4)),
|
|
599
|
+
};
|
|
600
|
+
})
|
|
601
|
+
.filter((row) => row.score >= minScore);
|
|
602
|
+
|
|
603
|
+
merged.sort((a, b) => {
|
|
604
|
+
if (b.score !== a.score) {
|
|
605
|
+
return b.score - a.score;
|
|
606
|
+
}
|
|
607
|
+
if (b.queryMatches !== a.queryMatches) {
|
|
608
|
+
return b.queryMatches - a.queryMatches;
|
|
609
|
+
}
|
|
610
|
+
const ar = Number(a.ranking || 0);
|
|
611
|
+
const br = Number(b.ranking || 0);
|
|
612
|
+
if (br !== ar) {
|
|
613
|
+
return br - ar;
|
|
614
|
+
}
|
|
615
|
+
const aTs = Number.isFinite(Date.parse(a.updatedAt || "")) ? Date.parse(a.updatedAt) : 0;
|
|
616
|
+
const bTs = Number.isFinite(Date.parse(b.updatedAt || "")) ? Date.parse(b.updatedAt) : 0;
|
|
617
|
+
if (bTs !== aTs) {
|
|
618
|
+
return bTs - aTs;
|
|
619
|
+
}
|
|
620
|
+
return String(a.title || "").localeCompare(String(b.title || ""));
|
|
621
|
+
});
|
|
622
|
+
|
|
623
|
+
return {
|
|
624
|
+
merged,
|
|
625
|
+
skippedSeen,
|
|
626
|
+
precisionMode: modeConfig.precisionMode,
|
|
627
|
+
minScore,
|
|
628
|
+
rrfK,
|
|
629
|
+
};
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
async function researchSingleQuery(ctx, query, args, maxAttempts, contextChars) {
|
|
633
|
+
const includeTitleSearch = args.includeTitleSearch !== false;
|
|
634
|
+
const includeSemanticSearch = args.includeSemanticSearch !== false;
|
|
635
|
+
const limitPerQuery = Math.max(1, toInteger(args.limitPerQuery, 8));
|
|
636
|
+
const offset = Math.max(0, toInteger(args.offset, 0));
|
|
637
|
+
|
|
638
|
+
const baseBody = compactValue({
|
|
639
|
+
query,
|
|
640
|
+
collectionId: args.collectionId,
|
|
641
|
+
limit: limitPerQuery,
|
|
642
|
+
offset,
|
|
643
|
+
}) || {};
|
|
644
|
+
|
|
645
|
+
const tasks = [];
|
|
646
|
+
if (includeTitleSearch) {
|
|
647
|
+
tasks.push(
|
|
648
|
+
ctx.client.call("documents.search_titles", baseBody, { maxAttempts }).then((res) => ({
|
|
649
|
+
source: "titles",
|
|
650
|
+
rows: Array.isArray(res.body?.data) ? res.body.data : [],
|
|
651
|
+
}))
|
|
652
|
+
);
|
|
653
|
+
}
|
|
654
|
+
if (includeSemanticSearch) {
|
|
655
|
+
tasks.push(
|
|
656
|
+
ctx.client
|
|
657
|
+
.call(
|
|
658
|
+
"documents.search",
|
|
659
|
+
{
|
|
660
|
+
...baseBody,
|
|
661
|
+
snippetMinWords: toInteger(args.snippetMinWords, 16),
|
|
662
|
+
snippetMaxWords: toInteger(args.snippetMaxWords, 24),
|
|
663
|
+
},
|
|
664
|
+
{ maxAttempts }
|
|
665
|
+
)
|
|
666
|
+
.then((res) => ({
|
|
667
|
+
source: "semantic",
|
|
668
|
+
rows: Array.isArray(res.body?.data) ? res.body.data : [],
|
|
669
|
+
}))
|
|
670
|
+
);
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
const settled = await Promise.all(tasks);
|
|
674
|
+
const titleRows = settled.find((item) => item.source === "titles")?.rows || [];
|
|
675
|
+
const semanticRows = settled.find((item) => item.source === "semantic")?.rows || [];
|
|
676
|
+
|
|
677
|
+
const normalizedTitleHits = titleRows
|
|
678
|
+
.map((row, index) => normalizeResearchTitleHit(query, row, contextChars, index + 1))
|
|
679
|
+
.filter(Boolean);
|
|
680
|
+
const normalizedSemanticHits = semanticRows
|
|
681
|
+
.map((row, index) => normalizeResearchSemanticHit(query, row, contextChars, index + 1))
|
|
682
|
+
.filter(Boolean);
|
|
683
|
+
|
|
684
|
+
const allHits = [...normalizedTitleHits, ...normalizedSemanticHits];
|
|
685
|
+
|
|
686
|
+
return {
|
|
687
|
+
query,
|
|
688
|
+
result: {
|
|
689
|
+
titleHits: normalizedTitleHits.length,
|
|
690
|
+
semanticHits: normalizedSemanticHits.length,
|
|
691
|
+
totalHits: allHits.length,
|
|
692
|
+
hits: allHits,
|
|
693
|
+
},
|
|
694
|
+
raw: {
|
|
695
|
+
titleRows,
|
|
696
|
+
semanticRows,
|
|
697
|
+
},
|
|
698
|
+
};
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
async function searchResearchTool(ctx, args) {
|
|
702
|
+
const includeTitleSearch = args.includeTitleSearch !== false;
|
|
703
|
+
const includeSemanticSearch = args.includeSemanticSearch !== false;
|
|
704
|
+
if (!includeTitleSearch && !includeSemanticSearch) {
|
|
705
|
+
throw new CliError("search.research requires at least one of includeTitleSearch/includeSemanticSearch to be true");
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
const queries = buildResearchQueries(args);
|
|
709
|
+
const maxAttempts = toInteger(args.maxAttempts, 2);
|
|
710
|
+
const contextChars = toInteger(args.contextChars, 220);
|
|
711
|
+
const excerptChars = toInteger(args.excerptChars, 220);
|
|
712
|
+
const concurrency = Math.max(1, toInteger(args.concurrency, 4));
|
|
713
|
+
const view = args.view || "summary";
|
|
714
|
+
const perQueryView =
|
|
715
|
+
args.perQueryView && ["ids", "summary", "full"].includes(String(args.perQueryView))
|
|
716
|
+
? String(args.perQueryView)
|
|
717
|
+
: view;
|
|
718
|
+
const maxDocuments = Math.max(1, toInteger(args.maxDocuments, 40));
|
|
719
|
+
const expandLimit = Math.max(1, toInteger(args.expandLimit, 8));
|
|
720
|
+
const seenIds = ensureStringArray(args.seenIds, "seenIds") || [];
|
|
721
|
+
const perQueryHitLimit = Math.max(1, toInteger(args.perQueryHitLimit, 6));
|
|
722
|
+
const evidencePerDocument = Math.max(1, toInteger(args.evidencePerDocument, 5));
|
|
723
|
+
const suggestedQueryLimit = Math.max(1, toInteger(args.suggestedQueryLimit, 6));
|
|
724
|
+
const includePerQuery = args.includePerQuery !== false;
|
|
725
|
+
const includeCoverage = args.includeCoverage !== false;
|
|
726
|
+
const includeExpanded = args.includeExpanded !== false;
|
|
727
|
+
const includeBacklinks = args.includeBacklinks === true;
|
|
728
|
+
const backlinksLimit = Math.max(1, toInteger(args.backlinksLimit, 5));
|
|
729
|
+
const backlinksConcurrency = Math.max(1, toInteger(args.backlinksConcurrency, 4));
|
|
730
|
+
|
|
731
|
+
const precisionMode = normalizeResearchPrecisionMode(args.precisionMode || "balanced");
|
|
732
|
+
const modeConfig = getResearchModeConfig(precisionMode);
|
|
733
|
+
const diversityLambda = Number.isFinite(Number(args.diversityLambda))
|
|
734
|
+
? clamp(Number(args.diversityLambda), 0, 1)
|
|
735
|
+
: modeConfig.mmrLambda;
|
|
736
|
+
const diversify = args.diversify !== false;
|
|
737
|
+
|
|
738
|
+
const perQueryRaw = await mapLimit(queries, concurrency, async (query) =>
|
|
739
|
+
researchSingleQuery(ctx, query, args, maxAttempts, contextChars)
|
|
740
|
+
);
|
|
741
|
+
|
|
742
|
+
const allHits = perQueryRaw.flatMap((item) => item.result.hits || []);
|
|
743
|
+
const mergeMeta = mergeResearchHits(allHits, seenIds, {
|
|
744
|
+
precisionMode,
|
|
745
|
+
minScore: args.minScore,
|
|
746
|
+
totalQueries: queries.length,
|
|
747
|
+
enabledSourceCount: (includeTitleSearch ? 1 : 0) + (includeSemanticSearch ? 1 : 0),
|
|
748
|
+
rrfK: args.rrfK,
|
|
749
|
+
});
|
|
750
|
+
const mergedAll = mergeMeta.merged;
|
|
751
|
+
const merged = diversify
|
|
752
|
+
? diversifyRankedRows(mergedAll, maxDocuments, diversityLambda)
|
|
753
|
+
: mergedAll.slice(0, maxDocuments);
|
|
754
|
+
|
|
755
|
+
const expandedIds = includeExpanded ? merged.slice(0, expandLimit).map((item) => item.id) : [];
|
|
756
|
+
const hydration = includeExpanded
|
|
757
|
+
? await fetchDocumentsByIds(ctx, expandedIds, {
|
|
758
|
+
maxAttempts,
|
|
759
|
+
concurrency: Math.max(1, toInteger(args.hydrateConcurrency, 4)),
|
|
760
|
+
})
|
|
761
|
+
: {
|
|
762
|
+
byId: new Map(),
|
|
763
|
+
items: [],
|
|
764
|
+
};
|
|
765
|
+
const backlinks = includeExpanded && includeBacklinks && expandedIds.length > 0
|
|
766
|
+
? await fetchBacklinksByDocumentIds(ctx, expandedIds, {
|
|
767
|
+
maxAttempts,
|
|
768
|
+
concurrency: backlinksConcurrency,
|
|
769
|
+
limit: backlinksLimit,
|
|
770
|
+
view: view === "ids" ? "ids" : "summary",
|
|
771
|
+
excerptChars,
|
|
772
|
+
})
|
|
773
|
+
: {
|
|
774
|
+
byId: new Map(),
|
|
775
|
+
items: [],
|
|
776
|
+
};
|
|
777
|
+
|
|
778
|
+
const expanded = includeExpanded
|
|
779
|
+
? expandedIds
|
|
780
|
+
.map((id) => {
|
|
781
|
+
const doc = hydration.byId.get(id);
|
|
782
|
+
if (!doc) {
|
|
783
|
+
return null;
|
|
784
|
+
}
|
|
785
|
+
const mergedRow = merged.find((row) => row.id === id);
|
|
786
|
+
if (!mergedRow) {
|
|
787
|
+
return null;
|
|
788
|
+
}
|
|
789
|
+
const backlinkRows = includeBacklinks ? backlinks.byId.get(id) || [] : undefined;
|
|
790
|
+
|
|
791
|
+
if (view === "ids") {
|
|
792
|
+
return compactValue({
|
|
793
|
+
id: doc.id,
|
|
794
|
+
title: doc.title,
|
|
795
|
+
score: mergedRow.score,
|
|
796
|
+
queryMatches: mergedRow.queryMatches,
|
|
797
|
+
backlinks: backlinkRows,
|
|
798
|
+
});
|
|
799
|
+
}
|
|
800
|
+
if (view === "full") {
|
|
801
|
+
return compactValue({
|
|
802
|
+
id: doc.id,
|
|
803
|
+
score: mergedRow.score,
|
|
804
|
+
queryMatches: mergedRow.queryMatches,
|
|
805
|
+
evidence: mergedRow.evidence,
|
|
806
|
+
document: doc,
|
|
807
|
+
backlinks: backlinkRows,
|
|
808
|
+
});
|
|
809
|
+
}
|
|
810
|
+
return compactValue({
|
|
811
|
+
id: doc.id,
|
|
812
|
+
title: doc.title,
|
|
813
|
+
score: mergedRow.score,
|
|
814
|
+
queryMatches: mergedRow.queryMatches,
|
|
815
|
+
evidence: mergedRow.evidence.slice(0, evidencePerDocument),
|
|
816
|
+
document: normalizeDocumentRow(doc, "summary", excerptChars),
|
|
817
|
+
backlinks: backlinkRows,
|
|
818
|
+
});
|
|
819
|
+
})
|
|
820
|
+
.filter(Boolean)
|
|
821
|
+
: [];
|
|
822
|
+
|
|
823
|
+
const perQuery = perQueryRaw.map((item) => {
|
|
824
|
+
const compactHits =
|
|
825
|
+
perQueryView === "full"
|
|
826
|
+
? item.result.hits.map((hit) => ({
|
|
827
|
+
...hit,
|
|
828
|
+
}))
|
|
829
|
+
: item.result.hits
|
|
830
|
+
.map((hit) =>
|
|
831
|
+
normalizeSearchHit(
|
|
832
|
+
hit.source === "semantic"
|
|
833
|
+
? {
|
|
834
|
+
document: {
|
|
835
|
+
id: hit.id,
|
|
836
|
+
title: hit.title,
|
|
837
|
+
collectionId: hit.collectionId,
|
|
838
|
+
parentDocumentId: hit.parentDocumentId,
|
|
839
|
+
updatedAt: hit.updatedAt,
|
|
840
|
+
publishedAt: hit.publishedAt,
|
|
841
|
+
urlId: hit.urlId,
|
|
842
|
+
},
|
|
843
|
+
ranking: hit.ranking,
|
|
844
|
+
context: hit.context,
|
|
845
|
+
}
|
|
846
|
+
: {
|
|
847
|
+
id: hit.id,
|
|
848
|
+
title: hit.title,
|
|
849
|
+
collectionId: hit.collectionId,
|
|
850
|
+
parentDocumentId: hit.parentDocumentId,
|
|
851
|
+
updatedAt: hit.updatedAt,
|
|
852
|
+
publishedAt: hit.publishedAt,
|
|
853
|
+
urlId: hit.urlId,
|
|
854
|
+
ranking: hit.ranking,
|
|
855
|
+
context: hit.context,
|
|
856
|
+
},
|
|
857
|
+
perQueryView,
|
|
858
|
+
contextChars
|
|
859
|
+
)
|
|
860
|
+
)
|
|
861
|
+
.filter(Boolean);
|
|
862
|
+
|
|
863
|
+
return {
|
|
864
|
+
query: item.query,
|
|
865
|
+
titleHits: item.result.titleHits,
|
|
866
|
+
semanticHits: item.result.semanticHits,
|
|
867
|
+
totalHits: item.result.totalHits,
|
|
868
|
+
hits: compactHits.slice(0, perQueryHitLimit),
|
|
869
|
+
};
|
|
870
|
+
});
|
|
871
|
+
|
|
872
|
+
const mergedOut = merged.map((row) =>
|
|
873
|
+
shapeResearchMergedRow(row, view, excerptChars, evidencePerDocument)
|
|
874
|
+
);
|
|
875
|
+
|
|
876
|
+
const nextSeenIds = [...new Set([...seenIds, ...merged.map((item) => item.id)])];
|
|
877
|
+
const suggestedQueries = buildFollowUpQueries(merged, queries, suggestedQueryLimit);
|
|
878
|
+
|
|
879
|
+
return {
|
|
880
|
+
tool: "search.research",
|
|
881
|
+
profile: ctx.profile.id,
|
|
882
|
+
queryCount: queries.length,
|
|
883
|
+
result: {
|
|
884
|
+
question: args.question,
|
|
885
|
+
queries,
|
|
886
|
+
...(includePerQuery ? { perQuery } : {}),
|
|
887
|
+
merged: mergedOut,
|
|
888
|
+
...(includeExpanded ? { expanded } : {}),
|
|
889
|
+
...(includeCoverage
|
|
890
|
+
? {
|
|
891
|
+
coverage: {
|
|
892
|
+
includeTitleSearch,
|
|
893
|
+
includeSemanticSearch,
|
|
894
|
+
precisionMode: mergeMeta.precisionMode,
|
|
895
|
+
minScoreApplied: mergeMeta.minScore,
|
|
896
|
+
rrfK: mergeMeta.rrfK,
|
|
897
|
+
diversified: diversify,
|
|
898
|
+
diversityLambda: diversify ? diversityLambda : undefined,
|
|
899
|
+
queryCount: queries.length,
|
|
900
|
+
seenInputCount: seenIds.length,
|
|
901
|
+
seenSkippedCount: mergeMeta.skippedSeen,
|
|
902
|
+
rawHitCount: allHits.length,
|
|
903
|
+
mergedCount: mergedAll.length,
|
|
904
|
+
returnedMergedCount: merged.length,
|
|
905
|
+
perQueryHitLimit,
|
|
906
|
+
evidencePerDocument,
|
|
907
|
+
expandedRequested: expandedIds.length,
|
|
908
|
+
expandedOk: hydration.items.filter((item) => item.ok).length,
|
|
909
|
+
expandedFailed: hydration.items.filter((item) => !item.ok).length,
|
|
910
|
+
backlinksRequested: includeBacklinks ? backlinks.items.length : 0,
|
|
911
|
+
backlinksOk: includeBacklinks ? backlinks.items.filter((item) => item.ok).length : 0,
|
|
912
|
+
backlinksFailed: includeBacklinks ? backlinks.items.filter((item) => !item.ok).length : 0,
|
|
913
|
+
},
|
|
914
|
+
}
|
|
915
|
+
: {}),
|
|
916
|
+
next: {
|
|
917
|
+
seenIds: nextSeenIds,
|
|
918
|
+
suggestedQueries,
|
|
919
|
+
},
|
|
920
|
+
},
|
|
921
|
+
};
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
function lexicalScore(query, title) {
|
|
925
|
+
const q = String(query || "").trim().toLowerCase();
|
|
926
|
+
const t = String(title || "").trim().toLowerCase();
|
|
927
|
+
if (!q || !t) {
|
|
928
|
+
return 0;
|
|
929
|
+
}
|
|
930
|
+
if (q === t) {
|
|
931
|
+
return 1;
|
|
932
|
+
}
|
|
933
|
+
if (t.startsWith(q)) {
|
|
934
|
+
return 0.92;
|
|
935
|
+
}
|
|
936
|
+
if (t.includes(q)) {
|
|
937
|
+
return 0.82;
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
const qTokens = tokenize(q);
|
|
941
|
+
const tTokens = tokenize(t);
|
|
942
|
+
if (qTokens.length === 0 || tTokens.length === 0) {
|
|
943
|
+
return 0;
|
|
944
|
+
}
|
|
945
|
+
const tSet = new Set(tTokens);
|
|
946
|
+
const overlap = qTokens.reduce((acc, token) => (tSet.has(token) ? acc + 1 : acc), 0);
|
|
947
|
+
return overlap / qTokens.length;
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
function computeConfidence({ query, title, source, ranking }) {
|
|
951
|
+
const lexical = lexicalScore(query, title);
|
|
952
|
+
const semantic = normalizeRanking(ranking);
|
|
953
|
+
|
|
954
|
+
let confidence;
|
|
955
|
+
if (source === "titles") {
|
|
956
|
+
confidence = 0.45 + lexical * 0.45 + semantic * 0.1;
|
|
957
|
+
} else {
|
|
958
|
+
confidence = 0.25 + semantic * 0.45 + lexical * 0.3;
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
if (String(query || "").trim().toLowerCase() === String(title || "").trim().toLowerCase()) {
|
|
962
|
+
confidence = Math.max(confidence, 0.98);
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
return Math.max(0, Math.min(1, Number(confidence.toFixed(4))));
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
function safeParseUrl(value) {
|
|
969
|
+
try {
|
|
970
|
+
return new URL(String(value || ""));
|
|
971
|
+
} catch {
|
|
972
|
+
return null;
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
function maybeExtractUrlIdHint(value) {
|
|
977
|
+
const raw = String(value || "").trim();
|
|
978
|
+
if (!raw) {
|
|
979
|
+
return "";
|
|
980
|
+
}
|
|
981
|
+
const match = raw.match(/-([A-Za-z0-9]{6,})$/);
|
|
982
|
+
if (match?.[1]) {
|
|
983
|
+
return String(match[1]);
|
|
984
|
+
}
|
|
985
|
+
if (/^[A-Za-z0-9]{6,}$/.test(raw)) {
|
|
986
|
+
return raw;
|
|
987
|
+
}
|
|
988
|
+
return "";
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
function extractHashUrlIdHint(hashValue) {
|
|
992
|
+
const match = String(hashValue || "").match(/(?:^#|[#/])d-([A-Za-z0-9_-]{6,})/i);
|
|
993
|
+
return match?.[1] ? String(match[1]) : "";
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
function parseOutlineReferenceUrl(rawValue, profileBaseUrl) {
|
|
997
|
+
const input = String(rawValue || "").trim();
|
|
998
|
+
const profileUrl = safeParseUrl(profileBaseUrl);
|
|
999
|
+
const parsed = safeParseUrl(input);
|
|
1000
|
+
if (!parsed) {
|
|
1001
|
+
return {
|
|
1002
|
+
input,
|
|
1003
|
+
validUrl: false,
|
|
1004
|
+
host: "",
|
|
1005
|
+
path: "",
|
|
1006
|
+
shareId: "",
|
|
1007
|
+
titleQuery: "",
|
|
1008
|
+
urlIdHints: [],
|
|
1009
|
+
matchesProfileHost: null,
|
|
1010
|
+
fallbackQuery: input,
|
|
1011
|
+
};
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
const path = parsed.pathname.replace(/\/+$/, "");
|
|
1015
|
+
const segments = path.split("/").filter(Boolean);
|
|
1016
|
+
const lowerSegments = segments.map((segment) => segment.toLowerCase());
|
|
1017
|
+
const shareIndex = lowerSegments.indexOf("share");
|
|
1018
|
+
const shareId = shareIndex >= 0 && segments[shareIndex + 1] ? String(segments[shareIndex + 1]) : "";
|
|
1019
|
+
|
|
1020
|
+
const docIndex = lowerSegments.indexOf("doc");
|
|
1021
|
+
const rawDocSegment = docIndex >= 0 && segments[docIndex + 1] ? String(segments[docIndex + 1]) : "";
|
|
1022
|
+
const hashUrlId = extractHashUrlIdHint(parsed.hash);
|
|
1023
|
+
const docUrlId = maybeExtractUrlIdHint(rawDocSegment);
|
|
1024
|
+
|
|
1025
|
+
const urlIdHints = uniqueStrings([docUrlId, hashUrlId]);
|
|
1026
|
+
|
|
1027
|
+
let titleQuery = "";
|
|
1028
|
+
if (rawDocSegment) {
|
|
1029
|
+
titleQuery = rawDocSegment;
|
|
1030
|
+
if (docUrlId && titleQuery.endsWith(`-${docUrlId}`)) {
|
|
1031
|
+
titleQuery = titleQuery.slice(0, -(docUrlId.length + 1));
|
|
1032
|
+
}
|
|
1033
|
+
titleQuery = titleQuery.replace(/[-_]+/g, " ").trim();
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
const fallbackQuery =
|
|
1037
|
+
titleQuery ||
|
|
1038
|
+
(segments.length > 0
|
|
1039
|
+
? segments[segments.length - 1].replace(/[-_]+/g, " ").trim()
|
|
1040
|
+
: input);
|
|
1041
|
+
|
|
1042
|
+
return {
|
|
1043
|
+
input,
|
|
1044
|
+
validUrl: true,
|
|
1045
|
+
host: parsed.host,
|
|
1046
|
+
path,
|
|
1047
|
+
shareId,
|
|
1048
|
+
titleQuery,
|
|
1049
|
+
urlIdHints,
|
|
1050
|
+
matchesProfileHost: profileUrl ? parsed.host.toLowerCase() === profileUrl.host.toLowerCase() : null,
|
|
1051
|
+
fallbackQuery,
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
function compareIsoDesc(a, b) {
|
|
1056
|
+
const aTs = Number.isFinite(Date.parse(a || "")) ? Date.parse(a) : 0;
|
|
1057
|
+
const bTs = Number.isFinite(Date.parse(b || "")) ? Date.parse(b) : 0;
|
|
1058
|
+
if (bTs !== aTs) {
|
|
1059
|
+
return bTs - aTs;
|
|
1060
|
+
}
|
|
1061
|
+
return 0;
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
function shapeResolveUrlCandidate(candidate, view, excerptChars) {
|
|
1065
|
+
const base = makeCandidateView(candidate, view, excerptChars);
|
|
1066
|
+
if (!base || view === "ids") {
|
|
1067
|
+
return base;
|
|
1068
|
+
}
|
|
1069
|
+
return compactValue({
|
|
1070
|
+
...base,
|
|
1071
|
+
matchedQueries: candidate.matchedQueries,
|
|
1072
|
+
matchingReasons: candidate.matchingReasons,
|
|
1073
|
+
rawConfidence: candidate.rawConfidence,
|
|
1074
|
+
urlIdHintMatched: candidate.urlIdHintMatched,
|
|
1075
|
+
});
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
function buildCandidateFromDocument(document, { confidence = 1, source = "explicit", ranking, context } = {}) {
|
|
1079
|
+
if (!document?.id) {
|
|
1080
|
+
return null;
|
|
1081
|
+
}
|
|
1082
|
+
return {
|
|
1083
|
+
id: String(document.id),
|
|
1084
|
+
title: document.title,
|
|
1085
|
+
collectionId: document.collectionId,
|
|
1086
|
+
parentDocumentId: document.parentDocumentId,
|
|
1087
|
+
updatedAt: document.updatedAt,
|
|
1088
|
+
publishedAt: document.publishedAt,
|
|
1089
|
+
urlId: document.urlId,
|
|
1090
|
+
text: document.text,
|
|
1091
|
+
ranking: Number.isFinite(Number(ranking)) ? Number(ranking) : undefined,
|
|
1092
|
+
confidence: clamp(Number(confidence), 0, 1),
|
|
1093
|
+
sources: [source],
|
|
1094
|
+
context,
|
|
1095
|
+
document,
|
|
1096
|
+
};
|
|
1097
|
+
}
|
|
1098
|
+
|
|
1099
|
+
function normalizeStatusFilter(statusFilter) {
|
|
1100
|
+
if (statusFilter === undefined || statusFilter === null) {
|
|
1101
|
+
return undefined;
|
|
1102
|
+
}
|
|
1103
|
+
if (Array.isArray(statusFilter)) {
|
|
1104
|
+
return statusFilter;
|
|
1105
|
+
}
|
|
1106
|
+
if (typeof statusFilter === "string") {
|
|
1107
|
+
return statusFilter
|
|
1108
|
+
.split(",")
|
|
1109
|
+
.map((item) => item.trim())
|
|
1110
|
+
.filter(Boolean);
|
|
1111
|
+
}
|
|
1112
|
+
throw new CliError("statusFilter must be string or string[]");
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
function makeCandidateView(candidate, view, excerptChars) {
|
|
1116
|
+
if (view === "full") {
|
|
1117
|
+
return candidate;
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
const out = {
|
|
1121
|
+
id: candidate.id,
|
|
1122
|
+
title: candidate.title,
|
|
1123
|
+
confidence: candidate.confidence,
|
|
1124
|
+
sources: candidate.sources,
|
|
1125
|
+
ranking: candidate.ranking,
|
|
1126
|
+
collectionId: candidate.collectionId,
|
|
1127
|
+
parentDocumentId: candidate.parentDocumentId,
|
|
1128
|
+
updatedAt: candidate.updatedAt,
|
|
1129
|
+
publishedAt: candidate.publishedAt,
|
|
1130
|
+
urlId: candidate.urlId,
|
|
1131
|
+
};
|
|
1132
|
+
|
|
1133
|
+
if (view === "ids") {
|
|
1134
|
+
return {
|
|
1135
|
+
id: out.id,
|
|
1136
|
+
title: out.title,
|
|
1137
|
+
confidence: out.confidence,
|
|
1138
|
+
};
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
if (candidate.text) {
|
|
1142
|
+
out.excerpt = candidate.text.length > excerptChars ? `${candidate.text.slice(0, excerptChars)}...` : candidate.text;
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
return out;
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
async function resolveSingleQuery(ctx, query, args) {
|
|
1149
|
+
const maxAttempts = toInteger(args.maxAttempts, 2);
|
|
1150
|
+
const limit = Math.max(1, toInteger(args.limit, 8));
|
|
1151
|
+
const strict = !!args.strict;
|
|
1152
|
+
|
|
1153
|
+
const common = compactValue({
|
|
1154
|
+
query,
|
|
1155
|
+
collectionId: args.collectionId,
|
|
1156
|
+
limit,
|
|
1157
|
+
offset: 0,
|
|
1158
|
+
}) || {};
|
|
1159
|
+
|
|
1160
|
+
const titleResponse = await ctx.client.call("documents.search_titles", common, { maxAttempts });
|
|
1161
|
+
const titleHits = Array.isArray(titleResponse.body?.data) ? titleResponse.body.data : [];
|
|
1162
|
+
|
|
1163
|
+
const semanticResponse =
|
|
1164
|
+
strict && titleHits.length > 0
|
|
1165
|
+
? null
|
|
1166
|
+
: await ctx.client.call(
|
|
1167
|
+
"documents.search",
|
|
1168
|
+
{
|
|
1169
|
+
...common,
|
|
1170
|
+
snippetMinWords: toInteger(args.snippetMinWords, 16),
|
|
1171
|
+
snippetMaxWords: toInteger(args.snippetMaxWords, 24),
|
|
1172
|
+
},
|
|
1173
|
+
{ maxAttempts }
|
|
1174
|
+
);
|
|
1175
|
+
|
|
1176
|
+
const semanticHits = Array.isArray(semanticResponse?.body?.data) ? semanticResponse.body.data : [];
|
|
1177
|
+
|
|
1178
|
+
const byId = new Map();
|
|
1179
|
+
|
|
1180
|
+
for (const hit of titleHits) {
|
|
1181
|
+
const id = hit?.id;
|
|
1182
|
+
if (!id) {
|
|
1183
|
+
continue;
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
const candidate = {
|
|
1187
|
+
id,
|
|
1188
|
+
title: hit.title,
|
|
1189
|
+
collectionId: hit.collectionId,
|
|
1190
|
+
parentDocumentId: hit.parentDocumentId,
|
|
1191
|
+
updatedAt: hit.updatedAt,
|
|
1192
|
+
publishedAt: hit.publishedAt,
|
|
1193
|
+
urlId: hit.urlId,
|
|
1194
|
+
text: hit.text,
|
|
1195
|
+
ranking: undefined,
|
|
1196
|
+
confidence: computeConfidence({
|
|
1197
|
+
query,
|
|
1198
|
+
title: hit.title,
|
|
1199
|
+
source: "titles",
|
|
1200
|
+
}),
|
|
1201
|
+
sources: ["titles"],
|
|
1202
|
+
document: hit,
|
|
1203
|
+
};
|
|
1204
|
+
|
|
1205
|
+
byId.set(id, candidate);
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
for (const hit of semanticHits) {
|
|
1209
|
+
const doc = hit?.document;
|
|
1210
|
+
if (!doc?.id) {
|
|
1211
|
+
continue;
|
|
1212
|
+
}
|
|
1213
|
+
|
|
1214
|
+
const id = doc.id;
|
|
1215
|
+
const confidence = computeConfidence({
|
|
1216
|
+
query,
|
|
1217
|
+
title: doc.title,
|
|
1218
|
+
source: "semantic",
|
|
1219
|
+
ranking: hit.ranking,
|
|
1220
|
+
});
|
|
1221
|
+
|
|
1222
|
+
const existing = byId.get(id);
|
|
1223
|
+
if (!existing) {
|
|
1224
|
+
byId.set(id, {
|
|
1225
|
+
id,
|
|
1226
|
+
title: doc.title,
|
|
1227
|
+
collectionId: doc.collectionId,
|
|
1228
|
+
parentDocumentId: doc.parentDocumentId,
|
|
1229
|
+
updatedAt: doc.updatedAt,
|
|
1230
|
+
publishedAt: doc.publishedAt,
|
|
1231
|
+
urlId: doc.urlId,
|
|
1232
|
+
text: doc.text,
|
|
1233
|
+
ranking: Number.isFinite(Number(hit.ranking)) ? Number(hit.ranking) : undefined,
|
|
1234
|
+
confidence,
|
|
1235
|
+
sources: ["semantic"],
|
|
1236
|
+
context: hit.context,
|
|
1237
|
+
document: doc,
|
|
1238
|
+
});
|
|
1239
|
+
continue;
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
existing.confidence = Math.max(existing.confidence, confidence);
|
|
1243
|
+
existing.sources = [...new Set([...existing.sources, "semantic"])];
|
|
1244
|
+
if (existing.ranking === undefined && Number.isFinite(Number(hit.ranking))) {
|
|
1245
|
+
existing.ranking = Number(hit.ranking);
|
|
1246
|
+
}
|
|
1247
|
+
if (!existing.context && hit.context) {
|
|
1248
|
+
existing.context = hit.context;
|
|
1249
|
+
}
|
|
1250
|
+
if (!existing.text && doc.text) {
|
|
1251
|
+
existing.text = doc.text;
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
const candidates = Array.from(byId.values()).sort((a, b) => {
|
|
1256
|
+
if (b.confidence !== a.confidence) {
|
|
1257
|
+
return b.confidence - a.confidence;
|
|
1258
|
+
}
|
|
1259
|
+
const ar = Number(a.ranking || 0);
|
|
1260
|
+
const br = Number(b.ranking || 0);
|
|
1261
|
+
if (br !== ar) {
|
|
1262
|
+
return br - ar;
|
|
1263
|
+
}
|
|
1264
|
+
return String(a.title || "").localeCompare(String(b.title || ""));
|
|
1265
|
+
});
|
|
1266
|
+
|
|
1267
|
+
const strictThreshold = Number.isFinite(Number(args.strictThreshold))
|
|
1268
|
+
? Number(args.strictThreshold)
|
|
1269
|
+
: 0.82;
|
|
1270
|
+
|
|
1271
|
+
const bestRaw = candidates[0] || null;
|
|
1272
|
+
const bestMatch = bestRaw && (!strict || bestRaw.confidence >= strictThreshold) ? bestRaw : null;
|
|
1273
|
+
|
|
1274
|
+
const view = args.view || "summary";
|
|
1275
|
+
const excerptChars = toInteger(args.excerptChars, 220);
|
|
1276
|
+
|
|
1277
|
+
return {
|
|
1278
|
+
query,
|
|
1279
|
+
bestMatch: bestMatch ? makeCandidateView(bestMatch, view, excerptChars) : null,
|
|
1280
|
+
candidates: candidates.map((item) => makeCandidateView(item, view, excerptChars)),
|
|
1281
|
+
stats: {
|
|
1282
|
+
titleHits: titleHits.length,
|
|
1283
|
+
semanticHits: semanticHits.length,
|
|
1284
|
+
candidateCount: candidates.length,
|
|
1285
|
+
strict,
|
|
1286
|
+
strictThreshold,
|
|
1287
|
+
},
|
|
1288
|
+
};
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
async function documentsResolveTool(ctx, args) {
|
|
1292
|
+
const queries = ensureStringArray(args.queries, "queries") || (args.query ? [String(args.query)] : []);
|
|
1293
|
+
if (queries.length === 0) {
|
|
1294
|
+
throw new CliError("documents.resolve requires args.query or args.queries[]");
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
const perQuery = await mapLimit(queries, Math.max(1, toInteger(args.concurrency, 4)), async (query) =>
|
|
1298
|
+
resolveSingleQuery(ctx, query, args)
|
|
1299
|
+
);
|
|
1300
|
+
|
|
1301
|
+
if (queries.length === 1 && !args.forceGroupedResult) {
|
|
1302
|
+
return {
|
|
1303
|
+
tool: "documents.resolve",
|
|
1304
|
+
profile: ctx.profile.id,
|
|
1305
|
+
query: perQuery[0].query,
|
|
1306
|
+
result: perQuery[0],
|
|
1307
|
+
};
|
|
1308
|
+
}
|
|
1309
|
+
|
|
1310
|
+
const mergedBestMatches = perQuery
|
|
1311
|
+
.map((item) => item.bestMatch)
|
|
1312
|
+
.filter(Boolean)
|
|
1313
|
+
.sort((a, b) => Number(b.confidence || 0) - Number(a.confidence || 0));
|
|
1314
|
+
|
|
1315
|
+
return {
|
|
1316
|
+
tool: "documents.resolve",
|
|
1317
|
+
profile: ctx.profile.id,
|
|
1318
|
+
queryCount: perQuery.length,
|
|
1319
|
+
result: {
|
|
1320
|
+
perQuery,
|
|
1321
|
+
mergedBestMatches,
|
|
1322
|
+
},
|
|
1323
|
+
};
|
|
1324
|
+
}
|
|
1325
|
+
|
|
1326
|
+
async function resolveSingleUrlReference(ctx, rawUrl, args) {
|
|
1327
|
+
const parsed = parseOutlineReferenceUrl(rawUrl, ctx.profile?.baseUrl);
|
|
1328
|
+
const limit = Math.max(1, toInteger(args.limit, 8));
|
|
1329
|
+
const strict = !!args.strict;
|
|
1330
|
+
const strictThreshold = Number.isFinite(Number(args.strictThreshold))
|
|
1331
|
+
? Number(args.strictThreshold)
|
|
1332
|
+
: 0.82;
|
|
1333
|
+
const view = args.view || "summary";
|
|
1334
|
+
const excerptChars = toInteger(args.excerptChars, 220);
|
|
1335
|
+
const candidateMap = new Map();
|
|
1336
|
+
const warnings = [];
|
|
1337
|
+
let shareLookupAttempted = false;
|
|
1338
|
+
|
|
1339
|
+
if (parsed.validUrl && args.strictHost === true && parsed.matchesProfileHost === false) {
|
|
1340
|
+
warnings.push("host_mismatch_skipped");
|
|
1341
|
+
return {
|
|
1342
|
+
url: parsed.input,
|
|
1343
|
+
parsed,
|
|
1344
|
+
bestMatch: null,
|
|
1345
|
+
candidates: [],
|
|
1346
|
+
stats: {
|
|
1347
|
+
queryCount: 0,
|
|
1348
|
+
candidateCount: 0,
|
|
1349
|
+
strict,
|
|
1350
|
+
strictThreshold,
|
|
1351
|
+
shareLookupAttempted,
|
|
1352
|
+
},
|
|
1353
|
+
warnings,
|
|
1354
|
+
};
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
if (parsed.validUrl && parsed.shareId) {
|
|
1358
|
+
shareLookupAttempted = true;
|
|
1359
|
+
try {
|
|
1360
|
+
const shareInfo = await ctx.client.call(
|
|
1361
|
+
"documents.info",
|
|
1362
|
+
{ shareId: parsed.shareId },
|
|
1363
|
+
{ maxAttempts: toInteger(args.maxAttempts, 2) }
|
|
1364
|
+
);
|
|
1365
|
+
const shareDoc = shareInfo.body?.data || null;
|
|
1366
|
+
const shareCandidate = buildCandidateFromDocument(shareDoc, {
|
|
1367
|
+
confidence: 1,
|
|
1368
|
+
source: "share",
|
|
1369
|
+
ranking: 1,
|
|
1370
|
+
});
|
|
1371
|
+
if (shareCandidate) {
|
|
1372
|
+
candidateMap.set(shareCandidate.id, {
|
|
1373
|
+
...shareCandidate,
|
|
1374
|
+
rawConfidence: shareCandidate.confidence,
|
|
1375
|
+
matchingReasons: ["share_id_lookup"],
|
|
1376
|
+
matchedQueries: [],
|
|
1377
|
+
urlIdHintMatched: false,
|
|
1378
|
+
});
|
|
1379
|
+
}
|
|
1380
|
+
} catch {
|
|
1381
|
+
warnings.push("share_lookup_failed");
|
|
1382
|
+
}
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
const queryHints = uniqueStrings([parsed.titleQuery, ...parsed.urlIdHints, parsed.fallbackQuery]);
|
|
1386
|
+
for (const query of queryHints) {
|
|
1387
|
+
const resolved = await resolveSingleQuery(ctx, query, {
|
|
1388
|
+
...args,
|
|
1389
|
+
view: "full",
|
|
1390
|
+
limit,
|
|
1391
|
+
strict: false,
|
|
1392
|
+
});
|
|
1393
|
+
|
|
1394
|
+
for (const candidate of resolved.candidates || []) {
|
|
1395
|
+
const id = candidate?.id;
|
|
1396
|
+
if (!id) {
|
|
1397
|
+
continue;
|
|
1398
|
+
}
|
|
1399
|
+
|
|
1400
|
+
const rawConfidence = clamp(Number(candidate.confidence || 0), 0, 1);
|
|
1401
|
+
const reasons = [];
|
|
1402
|
+
let confidence = rawConfidence;
|
|
1403
|
+
let urlIdHintMatched = false;
|
|
1404
|
+
|
|
1405
|
+
if (parsed.urlIdHints.length > 0 && parsed.urlIdHints.includes(String(candidate.urlId || ""))) {
|
|
1406
|
+
confidence = clamp(confidence + 0.24, 0, 1);
|
|
1407
|
+
urlIdHintMatched = true;
|
|
1408
|
+
reasons.push("url_id_hint");
|
|
1409
|
+
}
|
|
1410
|
+
|
|
1411
|
+
if (parsed.titleQuery) {
|
|
1412
|
+
const lexical = lexicalScore(parsed.titleQuery, candidate.title);
|
|
1413
|
+
if (lexical >= 0.88) {
|
|
1414
|
+
confidence = clamp(confidence + 0.06, 0, 1);
|
|
1415
|
+
reasons.push("title_hint");
|
|
1416
|
+
}
|
|
1417
|
+
}
|
|
1418
|
+
|
|
1419
|
+
if (parsed.matchesProfileHost === true) {
|
|
1420
|
+
confidence = clamp(confidence + 0.02, 0, 1);
|
|
1421
|
+
}
|
|
1422
|
+
|
|
1423
|
+
const existing = candidateMap.get(id);
|
|
1424
|
+
const nextRow = {
|
|
1425
|
+
...candidate,
|
|
1426
|
+
confidence: Number(confidence.toFixed(4)),
|
|
1427
|
+
rawConfidence,
|
|
1428
|
+
matchingReasons: reasons,
|
|
1429
|
+
matchedQueries: [query],
|
|
1430
|
+
urlIdHintMatched,
|
|
1431
|
+
};
|
|
1432
|
+
|
|
1433
|
+
if (!existing) {
|
|
1434
|
+
candidateMap.set(id, nextRow);
|
|
1435
|
+
continue;
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
existing.confidence = Math.max(Number(existing.confidence || 0), nextRow.confidence);
|
|
1439
|
+
existing.rawConfidence = Math.max(Number(existing.rawConfidence || 0), nextRow.rawConfidence);
|
|
1440
|
+
existing.matchingReasons = uniqueStrings([...(existing.matchingReasons || []), ...nextRow.matchingReasons]);
|
|
1441
|
+
existing.matchedQueries = uniqueStrings([...(existing.matchedQueries || []), query]);
|
|
1442
|
+
existing.sources = uniqueStrings([...(existing.sources || []), ...(nextRow.sources || [])]);
|
|
1443
|
+
existing.urlIdHintMatched = existing.urlIdHintMatched || nextRow.urlIdHintMatched;
|
|
1444
|
+
if (existing.ranking === undefined && nextRow.ranking !== undefined) {
|
|
1445
|
+
existing.ranking = nextRow.ranking;
|
|
1446
|
+
}
|
|
1447
|
+
if (!existing.context && nextRow.context) {
|
|
1448
|
+
existing.context = nextRow.context;
|
|
1449
|
+
}
|
|
1450
|
+
if (!existing.text && nextRow.text) {
|
|
1451
|
+
existing.text = nextRow.text;
|
|
1452
|
+
}
|
|
1453
|
+
if (compareIsoDesc(existing.updatedAt, nextRow.updatedAt) > 0) {
|
|
1454
|
+
existing.updatedAt = nextRow.updatedAt;
|
|
1455
|
+
existing.publishedAt = nextRow.publishedAt;
|
|
1456
|
+
}
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
|
|
1460
|
+
const candidates = Array.from(candidateMap.values()).sort((a, b) => {
|
|
1461
|
+
const confidenceDiff = Number(b.confidence || 0) - Number(a.confidence || 0);
|
|
1462
|
+
if (confidenceDiff !== 0) {
|
|
1463
|
+
return confidenceDiff;
|
|
1464
|
+
}
|
|
1465
|
+
const rankingDiff = Number(b.ranking || 0) - Number(a.ranking || 0);
|
|
1466
|
+
if (rankingDiff !== 0) {
|
|
1467
|
+
return rankingDiff;
|
|
1468
|
+
}
|
|
1469
|
+
const updatedCmp = compareIsoDesc(a.updatedAt, b.updatedAt);
|
|
1470
|
+
if (updatedCmp !== 0) {
|
|
1471
|
+
return updatedCmp;
|
|
1472
|
+
}
|
|
1473
|
+
return String(a.title || "").localeCompare(String(b.title || ""));
|
|
1474
|
+
});
|
|
1475
|
+
|
|
1476
|
+
const strictCandidates = strict ? candidates.filter((item) => Number(item.confidence || 0) >= strictThreshold) : candidates;
|
|
1477
|
+
const trimmedCandidates = strictCandidates.slice(0, limit);
|
|
1478
|
+
const bestMatch = trimmedCandidates[0] || null;
|
|
1479
|
+
|
|
1480
|
+
return {
|
|
1481
|
+
url: parsed.input,
|
|
1482
|
+
parsed,
|
|
1483
|
+
bestMatch: bestMatch ? shapeResolveUrlCandidate(bestMatch, view, excerptChars) : null,
|
|
1484
|
+
candidates: trimmedCandidates.map((item) => shapeResolveUrlCandidate(item, view, excerptChars)),
|
|
1485
|
+
stats: {
|
|
1486
|
+
queryCount: queryHints.length,
|
|
1487
|
+
candidateCount: trimmedCandidates.length,
|
|
1488
|
+
strict,
|
|
1489
|
+
strictThreshold,
|
|
1490
|
+
shareLookupAttempted,
|
|
1491
|
+
},
|
|
1492
|
+
warnings,
|
|
1493
|
+
};
|
|
1494
|
+
}
|
|
1495
|
+
|
|
1496
|
+
async function documentsResolveUrlsTool(ctx, args) {
|
|
1497
|
+
const rawUrls = ensureStringArray(args.urls, "urls") || (args.url ? [String(args.url)] : []);
|
|
1498
|
+
const urls = uniqueStrings(rawUrls);
|
|
1499
|
+
if (urls.length === 0) {
|
|
1500
|
+
throw new CliError("documents.resolve_urls requires args.url or args.urls[]");
|
|
1501
|
+
}
|
|
1502
|
+
|
|
1503
|
+
const concurrency = Math.max(1, toInteger(args.concurrency, 4));
|
|
1504
|
+
const perUrl = await mapLimit(urls, concurrency, async (url) => resolveSingleUrlReference(ctx, url, args));
|
|
1505
|
+
|
|
1506
|
+
if (urls.length === 1 && !args.forceGroupedResult) {
|
|
1507
|
+
return {
|
|
1508
|
+
tool: "documents.resolve_urls",
|
|
1509
|
+
profile: ctx.profile.id,
|
|
1510
|
+
url: perUrl[0].url,
|
|
1511
|
+
result: perUrl[0],
|
|
1512
|
+
};
|
|
1513
|
+
}
|
|
1514
|
+
|
|
1515
|
+
const mergedBestMatches = perUrl
|
|
1516
|
+
.map((item) => item.bestMatch)
|
|
1517
|
+
.filter(Boolean)
|
|
1518
|
+
.sort((a, b) => Number(b.confidence || 0) - Number(a.confidence || 0));
|
|
1519
|
+
|
|
1520
|
+
return {
|
|
1521
|
+
tool: "documents.resolve_urls",
|
|
1522
|
+
profile: ctx.profile.id,
|
|
1523
|
+
urlCount: perUrl.length,
|
|
1524
|
+
result: {
|
|
1525
|
+
perUrl,
|
|
1526
|
+
mergedBestMatches,
|
|
1527
|
+
},
|
|
1528
|
+
};
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
function canonicalClusterSimilarity(a, b) {
|
|
1532
|
+
const aUrlId = String(a?.urlId || "");
|
|
1533
|
+
const bUrlId = String(b?.urlId || "");
|
|
1534
|
+
if (aUrlId && bUrlId && aUrlId === bUrlId) {
|
|
1535
|
+
return {
|
|
1536
|
+
score: 1,
|
|
1537
|
+
reason: "url_id_exact",
|
|
1538
|
+
};
|
|
1539
|
+
}
|
|
1540
|
+
const lexicalForward = lexicalScore(a?.title, b?.title);
|
|
1541
|
+
const lexicalBackward = lexicalScore(b?.title, a?.title);
|
|
1542
|
+
const lexical = Math.max(lexicalForward, lexicalBackward);
|
|
1543
|
+
const jaccard = tokenJaccardSimilarity(a?.title, b?.title);
|
|
1544
|
+
const score = Math.max(jaccard, lexical);
|
|
1545
|
+
return {
|
|
1546
|
+
score,
|
|
1547
|
+
reason: lexical >= jaccard ? "title_lexical" : "title_similarity",
|
|
1548
|
+
};
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
function shapeCanonicalCandidate(candidate, view, excerptChars) {
|
|
1552
|
+
return makeCandidateView(candidate, view, excerptChars);
|
|
1553
|
+
}
|
|
1554
|
+
|
|
1555
|
+
async function documentsCanonicalizeCandidatesTool(ctx, args) {
|
|
1556
|
+
const queries = uniqueStrings(ensureStringArray(args.queries, "queries") || (args.query ? [String(args.query)] : []));
|
|
1557
|
+
const ids = uniqueStrings(ensureStringArray(args.ids, "ids") || []);
|
|
1558
|
+
if (queries.length === 0 && ids.length === 0) {
|
|
1559
|
+
throw new CliError("documents.canonicalize_candidates requires args.query/args.queries[] or args.ids[]");
|
|
1560
|
+
}
|
|
1561
|
+
|
|
1562
|
+
const candidateMap = new Map();
|
|
1563
|
+
const limit = Math.max(1, toInteger(args.limit, 8));
|
|
1564
|
+
const strict = !!args.strict;
|
|
1565
|
+
const strictThreshold = Number.isFinite(Number(args.strictThreshold))
|
|
1566
|
+
? Number(args.strictThreshold)
|
|
1567
|
+
: 0.82;
|
|
1568
|
+
const view = args.view || "summary";
|
|
1569
|
+
const excerptChars = toInteger(args.excerptChars, 220);
|
|
1570
|
+
const maxAttempts = toInteger(args.maxAttempts, 2);
|
|
1571
|
+
const titleSimilarityThreshold = Number.isFinite(Number(args.titleSimilarityThreshold))
|
|
1572
|
+
? clamp(Number(args.titleSimilarityThreshold), 0, 1)
|
|
1573
|
+
: 0.82;
|
|
1574
|
+
|
|
1575
|
+
if (ids.length > 0) {
|
|
1576
|
+
const hydrated = await fetchDocumentsByIds(ctx, ids, {
|
|
1577
|
+
maxAttempts,
|
|
1578
|
+
concurrency: Math.max(1, toInteger(args.hydrateConcurrency, 4)),
|
|
1579
|
+
});
|
|
1580
|
+
for (const [id, doc] of hydrated.byId.entries()) {
|
|
1581
|
+
const candidate = buildCandidateFromDocument(doc, {
|
|
1582
|
+
confidence: 1,
|
|
1583
|
+
source: "explicit",
|
|
1584
|
+
ranking: 1,
|
|
1585
|
+
});
|
|
1586
|
+
if (!candidate) {
|
|
1587
|
+
continue;
|
|
1588
|
+
}
|
|
1589
|
+
candidate.matchedQueries = [];
|
|
1590
|
+
candidate.rawConfidence = candidate.confidence;
|
|
1591
|
+
candidateMap.set(id, candidate);
|
|
1592
|
+
}
|
|
1593
|
+
}
|
|
1594
|
+
|
|
1595
|
+
const queryResults = await mapLimit(queries, Math.max(1, toInteger(args.concurrency, 4)), async (query) =>
|
|
1596
|
+
resolveSingleQuery(ctx, query, {
|
|
1597
|
+
...args,
|
|
1598
|
+
strict: false,
|
|
1599
|
+
limit,
|
|
1600
|
+
view: "full",
|
|
1601
|
+
maxAttempts,
|
|
1602
|
+
})
|
|
1603
|
+
);
|
|
1604
|
+
|
|
1605
|
+
for (const group of queryResults) {
|
|
1606
|
+
for (const candidate of group.candidates || []) {
|
|
1607
|
+
const id = candidate?.id;
|
|
1608
|
+
if (!id) {
|
|
1609
|
+
continue;
|
|
1610
|
+
}
|
|
1611
|
+
const existing = candidateMap.get(id);
|
|
1612
|
+
if (!existing) {
|
|
1613
|
+
candidateMap.set(id, {
|
|
1614
|
+
...candidate,
|
|
1615
|
+
rawConfidence: candidate.confidence,
|
|
1616
|
+
matchedQueries: [group.query],
|
|
1617
|
+
});
|
|
1618
|
+
continue;
|
|
1619
|
+
}
|
|
1620
|
+
existing.confidence = Math.max(Number(existing.confidence || 0), Number(candidate.confidence || 0));
|
|
1621
|
+
existing.rawConfidence = Math.max(Number(existing.rawConfidence || 0), Number(candidate.confidence || 0));
|
|
1622
|
+
existing.sources = uniqueStrings([...(existing.sources || []), ...(candidate.sources || [])]);
|
|
1623
|
+
existing.matchedQueries = uniqueStrings([...(existing.matchedQueries || []), group.query]);
|
|
1624
|
+
if (existing.ranking === undefined && candidate.ranking !== undefined) {
|
|
1625
|
+
existing.ranking = candidate.ranking;
|
|
1626
|
+
}
|
|
1627
|
+
if (!existing.text && candidate.text) {
|
|
1628
|
+
existing.text = candidate.text;
|
|
1629
|
+
}
|
|
1630
|
+
if (compareIsoDesc(existing.updatedAt, candidate.updatedAt) > 0) {
|
|
1631
|
+
existing.updatedAt = candidate.updatedAt;
|
|
1632
|
+
existing.publishedAt = candidate.publishedAt;
|
|
1633
|
+
}
|
|
1634
|
+
}
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
const candidateRows = Array.from(candidateMap.values())
|
|
1638
|
+
.filter((row) => (!strict ? true : Number(row.confidence || 0) >= strictThreshold))
|
|
1639
|
+
.sort((a, b) => {
|
|
1640
|
+
const confidenceDiff = Number(b.confidence || 0) - Number(a.confidence || 0);
|
|
1641
|
+
if (confidenceDiff !== 0) {
|
|
1642
|
+
return confidenceDiff;
|
|
1643
|
+
}
|
|
1644
|
+
const updatedCmp = compareIsoDesc(a.updatedAt, b.updatedAt);
|
|
1645
|
+
if (updatedCmp !== 0) {
|
|
1646
|
+
return updatedCmp;
|
|
1647
|
+
}
|
|
1648
|
+
return String(a.title || "").localeCompare(String(b.title || ""));
|
|
1649
|
+
});
|
|
1650
|
+
|
|
1651
|
+
const clusters = [];
|
|
1652
|
+
for (const candidate of candidateRows) {
|
|
1653
|
+
let bestCluster = null;
|
|
1654
|
+
let bestScore = -1;
|
|
1655
|
+
let bestReason = "";
|
|
1656
|
+
for (const cluster of clusters) {
|
|
1657
|
+
const similarity = canonicalClusterSimilarity(candidate, cluster.canonical);
|
|
1658
|
+
if (similarity.score >= titleSimilarityThreshold && similarity.score > bestScore) {
|
|
1659
|
+
bestCluster = cluster;
|
|
1660
|
+
bestScore = similarity.score;
|
|
1661
|
+
bestReason = similarity.reason;
|
|
1662
|
+
}
|
|
1663
|
+
}
|
|
1664
|
+
|
|
1665
|
+
if (!bestCluster) {
|
|
1666
|
+
clusters.push({
|
|
1667
|
+
canonical: candidate,
|
|
1668
|
+
members: [{ ...candidate, similarity: 1, similarityReason: "seed" }],
|
|
1669
|
+
});
|
|
1670
|
+
continue;
|
|
1671
|
+
}
|
|
1672
|
+
|
|
1673
|
+
bestCluster.members.push({
|
|
1674
|
+
...candidate,
|
|
1675
|
+
similarity: Number(bestScore.toFixed(4)),
|
|
1676
|
+
similarityReason: bestReason,
|
|
1677
|
+
});
|
|
1678
|
+
}
|
|
1679
|
+
|
|
1680
|
+
for (const cluster of clusters) {
|
|
1681
|
+
cluster.members.sort((a, b) => {
|
|
1682
|
+
const aExplicit = (a.sources || []).includes("explicit");
|
|
1683
|
+
const bExplicit = (b.sources || []).includes("explicit");
|
|
1684
|
+
if (aExplicit !== bExplicit) {
|
|
1685
|
+
return aExplicit ? -1 : 1;
|
|
1686
|
+
}
|
|
1687
|
+
const confidenceDiff = Number(b.confidence || 0) - Number(a.confidence || 0);
|
|
1688
|
+
if (confidenceDiff !== 0) {
|
|
1689
|
+
return confidenceDiff;
|
|
1690
|
+
}
|
|
1691
|
+
const updatedCmp = compareIsoDesc(a.updatedAt, b.updatedAt);
|
|
1692
|
+
if (updatedCmp !== 0) {
|
|
1693
|
+
return updatedCmp;
|
|
1694
|
+
}
|
|
1695
|
+
return String(a.title || "").localeCompare(String(b.title || ""));
|
|
1696
|
+
});
|
|
1697
|
+
cluster.canonical = cluster.members[0];
|
|
1698
|
+
}
|
|
1699
|
+
|
|
1700
|
+
clusters.sort((a, b) => {
|
|
1701
|
+
const confidenceDiff = Number(b.canonical?.confidence || 0) - Number(a.canonical?.confidence || 0);
|
|
1702
|
+
if (confidenceDiff !== 0) {
|
|
1703
|
+
return confidenceDiff;
|
|
1704
|
+
}
|
|
1705
|
+
return String(a.canonical?.title || "").localeCompare(String(b.canonical?.title || ""));
|
|
1706
|
+
});
|
|
1707
|
+
|
|
1708
|
+
const canonical = clusters.map((cluster) =>
|
|
1709
|
+
compactValue({
|
|
1710
|
+
...shapeCanonicalCandidate(cluster.canonical, view, excerptChars),
|
|
1711
|
+
memberCount: cluster.members.length,
|
|
1712
|
+
duplicateIds: cluster.members.slice(1).map((member) => member.id),
|
|
1713
|
+
})
|
|
1714
|
+
);
|
|
1715
|
+
|
|
1716
|
+
const clusterRows =
|
|
1717
|
+
view === "ids"
|
|
1718
|
+
? clusters.map((cluster) => ({
|
|
1719
|
+
canonicalId: cluster.canonical.id,
|
|
1720
|
+
memberIds: cluster.members.map((member) => member.id),
|
|
1721
|
+
memberCount: cluster.members.length,
|
|
1722
|
+
}))
|
|
1723
|
+
: clusters.map((cluster) => ({
|
|
1724
|
+
canonical: shapeCanonicalCandidate(cluster.canonical, view, excerptChars),
|
|
1725
|
+
members: cluster.members.map((member) =>
|
|
1726
|
+
compactValue({
|
|
1727
|
+
...shapeCanonicalCandidate(member, view, excerptChars),
|
|
1728
|
+
similarity: member.similarity,
|
|
1729
|
+
similarityReason: member.similarityReason,
|
|
1730
|
+
})
|
|
1731
|
+
),
|
|
1732
|
+
}));
|
|
1733
|
+
|
|
1734
|
+
return {
|
|
1735
|
+
tool: "documents.canonicalize_candidates",
|
|
1736
|
+
profile: ctx.profile.id,
|
|
1737
|
+
result: {
|
|
1738
|
+
queryCount: queries.length,
|
|
1739
|
+
requestedIdCount: ids.length,
|
|
1740
|
+
candidateCount: candidateRows.length,
|
|
1741
|
+
clusterCount: clusters.length,
|
|
1742
|
+
duplicateClusterCount: clusters.filter((cluster) => cluster.members.length > 1).length,
|
|
1743
|
+
strict,
|
|
1744
|
+
strictThreshold,
|
|
1745
|
+
titleSimilarityThreshold,
|
|
1746
|
+
canonical,
|
|
1747
|
+
clusters: clusterRows,
|
|
1748
|
+
},
|
|
1749
|
+
};
|
|
1750
|
+
}
|
|
1751
|
+
|
|
1752
|
+
function shapeTreeNode(doc, children, view, depth) {
|
|
1753
|
+
if (view === "full") {
|
|
1754
|
+
return {
|
|
1755
|
+
depth,
|
|
1756
|
+
document: doc,
|
|
1757
|
+
children,
|
|
1758
|
+
};
|
|
1759
|
+
}
|
|
1760
|
+
|
|
1761
|
+
return {
|
|
1762
|
+
depth,
|
|
1763
|
+
id: doc.id,
|
|
1764
|
+
title: doc.title,
|
|
1765
|
+
collectionId: doc.collectionId,
|
|
1766
|
+
parentDocumentId: doc.parentDocumentId,
|
|
1767
|
+
updatedAt: doc.updatedAt,
|
|
1768
|
+
publishedAt: doc.publishedAt,
|
|
1769
|
+
urlId: doc.urlId,
|
|
1770
|
+
childCount: children.length,
|
|
1771
|
+
children,
|
|
1772
|
+
};
|
|
1773
|
+
}
|
|
1774
|
+
|
|
1775
|
+
function buildTree({ docs, view, maxDepth }) {
|
|
1776
|
+
const byId = new Map();
|
|
1777
|
+
const order = new Map();
|
|
1778
|
+
|
|
1779
|
+
docs.forEach((doc, index) => {
|
|
1780
|
+
byId.set(doc.id, doc);
|
|
1781
|
+
order.set(doc.id, index);
|
|
1782
|
+
});
|
|
1783
|
+
|
|
1784
|
+
const childIds = new Map();
|
|
1785
|
+
for (const doc of docs) {
|
|
1786
|
+
if (!doc.parentDocumentId || !byId.has(doc.parentDocumentId)) {
|
|
1787
|
+
continue;
|
|
1788
|
+
}
|
|
1789
|
+
if (!childIds.has(doc.parentDocumentId)) {
|
|
1790
|
+
childIds.set(doc.parentDocumentId, []);
|
|
1791
|
+
}
|
|
1792
|
+
childIds.get(doc.parentDocumentId).push(doc.id);
|
|
1793
|
+
}
|
|
1794
|
+
|
|
1795
|
+
const roots = docs
|
|
1796
|
+
.filter((doc) => !doc.parentDocumentId || !byId.has(doc.parentDocumentId))
|
|
1797
|
+
.sort((a, b) => (order.get(a.id) || 0) - (order.get(b.id) || 0));
|
|
1798
|
+
|
|
1799
|
+
function buildNode(docId, depth, trail) {
|
|
1800
|
+
const doc = byId.get(docId);
|
|
1801
|
+
if (!doc) {
|
|
1802
|
+
return null;
|
|
1803
|
+
}
|
|
1804
|
+
|
|
1805
|
+
if (trail.has(docId)) {
|
|
1806
|
+
return {
|
|
1807
|
+
depth,
|
|
1808
|
+
id: doc.id,
|
|
1809
|
+
title: doc.title,
|
|
1810
|
+
cycleDetected: true,
|
|
1811
|
+
children: [],
|
|
1812
|
+
};
|
|
1813
|
+
}
|
|
1814
|
+
|
|
1815
|
+
const nextTrail = new Set(trail);
|
|
1816
|
+
nextTrail.add(docId);
|
|
1817
|
+
|
|
1818
|
+
const rawChildren = (childIds.get(docId) || [])
|
|
1819
|
+
.sort((a, b) => (order.get(a) || 0) - (order.get(b) || 0))
|
|
1820
|
+
.map((childId) => {
|
|
1821
|
+
if (depth >= maxDepth) {
|
|
1822
|
+
return {
|
|
1823
|
+
depth: depth + 1,
|
|
1824
|
+
id: childId,
|
|
1825
|
+
truncated: true,
|
|
1826
|
+
children: [],
|
|
1827
|
+
};
|
|
1828
|
+
}
|
|
1829
|
+
return buildNode(childId, depth + 1, nextTrail);
|
|
1830
|
+
})
|
|
1831
|
+
.filter(Boolean);
|
|
1832
|
+
|
|
1833
|
+
return shapeTreeNode(doc, rawChildren, view, depth);
|
|
1834
|
+
}
|
|
1835
|
+
|
|
1836
|
+
const tree = roots.map((doc) => buildNode(doc.id, 0, new Set())).filter(Boolean);
|
|
1837
|
+
return { tree, rootCount: roots.length };
|
|
1838
|
+
}
|
|
1839
|
+
|
|
1840
|
+
async function collectionsTreeTool(ctx, args) {
|
|
1841
|
+
if (!args.collectionId) {
|
|
1842
|
+
throw new CliError("collections.tree requires args.collectionId");
|
|
1843
|
+
}
|
|
1844
|
+
|
|
1845
|
+
const includeDrafts = !!args.includeDrafts;
|
|
1846
|
+
const maxDepth = Math.max(0, toInteger(args.maxDepth, 6));
|
|
1847
|
+
const pageSize = Math.max(1, Math.min(100, toInteger(args.pageSize, 100)));
|
|
1848
|
+
const maxPages = Math.max(1, toInteger(args.maxPages, 20));
|
|
1849
|
+
const maxAttempts = toInteger(args.maxAttempts, 2);
|
|
1850
|
+
const view = args.view || "summary";
|
|
1851
|
+
|
|
1852
|
+
const docs = [];
|
|
1853
|
+
for (let page = 0; page < maxPages; page += 1) {
|
|
1854
|
+
const offset = page * pageSize;
|
|
1855
|
+
const body = compactValue({
|
|
1856
|
+
collectionId: args.collectionId,
|
|
1857
|
+
limit: pageSize,
|
|
1858
|
+
offset,
|
|
1859
|
+
sort: args.sort || "index",
|
|
1860
|
+
direction: args.direction || "ASC",
|
|
1861
|
+
statusFilter: normalizeStatusFilter(args.statusFilter),
|
|
1862
|
+
}) || {};
|
|
1863
|
+
|
|
1864
|
+
const res = await ctx.client.call("documents.list", body, { maxAttempts });
|
|
1865
|
+
const chunk = Array.isArray(res.body?.data) ? res.body.data : [];
|
|
1866
|
+
docs.push(...chunk);
|
|
1867
|
+
|
|
1868
|
+
if (chunk.length < pageSize) {
|
|
1869
|
+
break;
|
|
1870
|
+
}
|
|
1871
|
+
}
|
|
1872
|
+
|
|
1873
|
+
const filtered = includeDrafts ? docs : docs.filter((doc) => !!doc.publishedAt);
|
|
1874
|
+
const normalizedDocs = filtered.map((doc) => normalizeDocumentRow(doc, "full")).filter(Boolean);
|
|
1875
|
+
const { tree, rootCount } = buildTree({ docs: normalizedDocs, view, maxDepth });
|
|
1876
|
+
|
|
1877
|
+
return {
|
|
1878
|
+
tool: "collections.tree",
|
|
1879
|
+
profile: ctx.profile.id,
|
|
1880
|
+
collectionId: args.collectionId,
|
|
1881
|
+
result: {
|
|
1882
|
+
includeDrafts,
|
|
1883
|
+
maxDepth,
|
|
1884
|
+
totalDocuments: normalizedDocs.length,
|
|
1885
|
+
rootCount,
|
|
1886
|
+
tree,
|
|
1887
|
+
},
|
|
1888
|
+
};
|
|
1889
|
+
}
|
|
1890
|
+
|
|
1891
|
+
async function fetchDocumentsByIds(ctx, ids, { maxAttempts, concurrency, cache }) {
|
|
1892
|
+
const uniqueIds = [...new Set(ids.filter(Boolean))];
|
|
1893
|
+
const useCache = cache instanceof Map ? cache : null;
|
|
1894
|
+
const items = await mapLimit(uniqueIds, Math.max(1, concurrency), async (id) => {
|
|
1895
|
+
if (useCache && useCache.has(id)) {
|
|
1896
|
+
return Promise.resolve(useCache.get(id));
|
|
1897
|
+
}
|
|
1898
|
+
|
|
1899
|
+
const fetchPromise = (async () => {
|
|
1900
|
+
try {
|
|
1901
|
+
const res = await ctx.client.call("documents.info", { id }, { maxAttempts });
|
|
1902
|
+
return {
|
|
1903
|
+
id,
|
|
1904
|
+
ok: true,
|
|
1905
|
+
document: res.body?.data || null,
|
|
1906
|
+
};
|
|
1907
|
+
} catch (err) {
|
|
1908
|
+
return {
|
|
1909
|
+
id,
|
|
1910
|
+
ok: false,
|
|
1911
|
+
error: err?.message || String(err),
|
|
1912
|
+
};
|
|
1913
|
+
}
|
|
1914
|
+
})();
|
|
1915
|
+
|
|
1916
|
+
if (useCache) {
|
|
1917
|
+
useCache.set(id, fetchPromise);
|
|
1918
|
+
}
|
|
1919
|
+
const item = await fetchPromise;
|
|
1920
|
+
if (useCache) {
|
|
1921
|
+
useCache.set(id, item);
|
|
1922
|
+
}
|
|
1923
|
+
return item;
|
|
1924
|
+
});
|
|
1925
|
+
|
|
1926
|
+
const byId = new Map();
|
|
1927
|
+
for (const item of items) {
|
|
1928
|
+
if (item.ok && item.document) {
|
|
1929
|
+
byId.set(item.id, item.document);
|
|
1930
|
+
}
|
|
1931
|
+
}
|
|
1932
|
+
|
|
1933
|
+
return { byId, items };
|
|
1934
|
+
}
|
|
1935
|
+
|
|
1936
|
+
async function fetchBacklinksByDocumentIds(ctx, ids, options = {}) {
|
|
1937
|
+
const uniqueIds = [...new Set(ids.filter(Boolean))];
|
|
1938
|
+
const maxAttempts = toInteger(options.maxAttempts, 2);
|
|
1939
|
+
const limit = Math.max(1, toInteger(options.limit, 5));
|
|
1940
|
+
const concurrency = Math.max(1, toInteger(options.concurrency, 4));
|
|
1941
|
+
const view = options.view || "summary";
|
|
1942
|
+
const excerptChars = toInteger(options.excerptChars, 180);
|
|
1943
|
+
|
|
1944
|
+
const items = await mapLimit(uniqueIds, concurrency, async (id) => {
|
|
1945
|
+
try {
|
|
1946
|
+
const res = await ctx.client.call(
|
|
1947
|
+
"documents.list",
|
|
1948
|
+
{
|
|
1949
|
+
backlinkDocumentId: id,
|
|
1950
|
+
limit,
|
|
1951
|
+
offset: 0,
|
|
1952
|
+
sort: "updatedAt",
|
|
1953
|
+
direction: "DESC",
|
|
1954
|
+
},
|
|
1955
|
+
{ maxAttempts }
|
|
1956
|
+
);
|
|
1957
|
+
const rows = Array.isArray(res.body?.data) ? res.body.data : [];
|
|
1958
|
+
return {
|
|
1959
|
+
id,
|
|
1960
|
+
ok: true,
|
|
1961
|
+
backlinks: rows.map((row) => normalizeDocumentRow(row, view, excerptChars)).filter(Boolean),
|
|
1962
|
+
};
|
|
1963
|
+
} catch (err) {
|
|
1964
|
+
return {
|
|
1965
|
+
id,
|
|
1966
|
+
ok: false,
|
|
1967
|
+
backlinks: [],
|
|
1968
|
+
error: err?.message || String(err),
|
|
1969
|
+
};
|
|
1970
|
+
}
|
|
1971
|
+
});
|
|
1972
|
+
|
|
1973
|
+
const byId = new Map();
|
|
1974
|
+
for (const item of items) {
|
|
1975
|
+
byId.set(item.id, item.backlinks || []);
|
|
1976
|
+
}
|
|
1977
|
+
return { byId, items };
|
|
1978
|
+
}
|
|
1979
|
+
|
|
1980
|
+
async function expandSingleQuery(ctx, query, args, hydrationCache) {
|
|
1981
|
+
const mode = args.mode === "titles" ? "titles" : "semantic";
|
|
1982
|
+
const endpoint = mode === "titles" ? "documents.search_titles" : "documents.search";
|
|
1983
|
+
const limit = Math.max(1, toInteger(args.limit, 8));
|
|
1984
|
+
const expandLimit = Math.max(1, toInteger(args.expandLimit, 3));
|
|
1985
|
+
const maxAttempts = toInteger(args.maxAttempts, 2);
|
|
1986
|
+
|
|
1987
|
+
const body = compactValue({
|
|
1988
|
+
query,
|
|
1989
|
+
collectionId: args.collectionId,
|
|
1990
|
+
documentId: args.documentId,
|
|
1991
|
+
userId: args.userId,
|
|
1992
|
+
limit,
|
|
1993
|
+
offset: toInteger(args.offset, 0),
|
|
1994
|
+
snippetMinWords: mode === "semantic" ? toInteger(args.snippetMinWords, 16) : undefined,
|
|
1995
|
+
snippetMaxWords: mode === "semantic" ? toInteger(args.snippetMaxWords, 24) : undefined,
|
|
1996
|
+
sort: args.sort,
|
|
1997
|
+
direction: args.direction,
|
|
1998
|
+
}) || {};
|
|
1999
|
+
|
|
2000
|
+
const searchRes = await ctx.client.call(endpoint, body, { maxAttempts });
|
|
2001
|
+
const hits = Array.isArray(searchRes.body?.data) ? searchRes.body.data : [];
|
|
2002
|
+
|
|
2003
|
+
const topIds = [];
|
|
2004
|
+
for (const hit of hits) {
|
|
2005
|
+
const id = hit?.document?.id || hit?.id;
|
|
2006
|
+
if (id && !topIds.includes(id)) {
|
|
2007
|
+
topIds.push(id);
|
|
2008
|
+
}
|
|
2009
|
+
if (topIds.length >= expandLimit) {
|
|
2010
|
+
break;
|
|
2011
|
+
}
|
|
2012
|
+
}
|
|
2013
|
+
|
|
2014
|
+
const hydrate = await fetchDocumentsByIds(ctx, topIds, {
|
|
2015
|
+
maxAttempts,
|
|
2016
|
+
concurrency: Math.max(1, toInteger(args.hydrateConcurrency, 4)),
|
|
2017
|
+
cache: hydrationCache,
|
|
2018
|
+
});
|
|
2019
|
+
|
|
2020
|
+
const view = args.view || "summary";
|
|
2021
|
+
const contextChars = toInteger(args.contextChars, 200);
|
|
2022
|
+
const expanded = topIds
|
|
2023
|
+
.map((id) => {
|
|
2024
|
+
const doc = hydrate.byId.get(id);
|
|
2025
|
+
if (!doc) {
|
|
2026
|
+
return null;
|
|
2027
|
+
}
|
|
2028
|
+
const hit = hits.find((item) => (item?.document?.id || item?.id) === id) || {};
|
|
2029
|
+
const base = {
|
|
2030
|
+
id,
|
|
2031
|
+
ranking: Number.isFinite(Number(hit?.ranking)) ? Number(hit.ranking) : undefined,
|
|
2032
|
+
context:
|
|
2033
|
+
typeof hit?.context === "string"
|
|
2034
|
+
? hit.context.length > contextChars
|
|
2035
|
+
? `${hit.context.slice(0, contextChars)}...`
|
|
2036
|
+
: hit.context
|
|
2037
|
+
: undefined,
|
|
2038
|
+
document: normalizeDocumentRow(doc, view, toInteger(args.excerptChars, 200)),
|
|
2039
|
+
};
|
|
2040
|
+
|
|
2041
|
+
if (view === "ids") {
|
|
2042
|
+
return {
|
|
2043
|
+
id,
|
|
2044
|
+
title: doc.title,
|
|
2045
|
+
ranking: base.ranking,
|
|
2046
|
+
};
|
|
2047
|
+
}
|
|
2048
|
+
|
|
2049
|
+
if (view === "full") {
|
|
2050
|
+
return {
|
|
2051
|
+
...base,
|
|
2052
|
+
searchHit: hit,
|
|
2053
|
+
};
|
|
2054
|
+
}
|
|
2055
|
+
|
|
2056
|
+
return {
|
|
2057
|
+
id,
|
|
2058
|
+
title: doc.title,
|
|
2059
|
+
ranking: base.ranking,
|
|
2060
|
+
context: base.context,
|
|
2061
|
+
document: base.document,
|
|
2062
|
+
};
|
|
2063
|
+
})
|
|
2064
|
+
.filter(Boolean);
|
|
2065
|
+
|
|
2066
|
+
const compactSearchHits = hits.map((hit) => normalizeSearchHit(hit, view === "full" ? "summary" : view, contextChars));
|
|
2067
|
+
|
|
2068
|
+
return {
|
|
2069
|
+
query,
|
|
2070
|
+
mode,
|
|
2071
|
+
searchCount: hits.length,
|
|
2072
|
+
expandedCount: expanded.length,
|
|
2073
|
+
search: compactSearchHits,
|
|
2074
|
+
expanded,
|
|
2075
|
+
hydration: {
|
|
2076
|
+
requested: topIds.length,
|
|
2077
|
+
ok: hydrate.items.filter((item) => item.ok).length,
|
|
2078
|
+
failed: hydrate.items.filter((item) => !item.ok).length,
|
|
2079
|
+
},
|
|
2080
|
+
};
|
|
2081
|
+
}
|
|
2082
|
+
|
|
2083
|
+
async function searchExpandTool(ctx, args) {
|
|
2084
|
+
const queries = ensureStringArray(args.queries, "queries") || (args.query ? [String(args.query)] : []);
|
|
2085
|
+
if (queries.length === 0) {
|
|
2086
|
+
throw new CliError("search.expand requires args.query or args.queries[]");
|
|
2087
|
+
}
|
|
2088
|
+
|
|
2089
|
+
const hydrationCache = new Map();
|
|
2090
|
+
const perQuery = await mapLimit(queries, Math.max(1, toInteger(args.concurrency, 4)), async (query) =>
|
|
2091
|
+
expandSingleQuery(ctx, query, args, hydrationCache)
|
|
2092
|
+
);
|
|
2093
|
+
|
|
2094
|
+
if (queries.length === 1 && !args.forceGroupedResult) {
|
|
2095
|
+
return {
|
|
2096
|
+
tool: "search.expand",
|
|
2097
|
+
profile: ctx.profile.id,
|
|
2098
|
+
query: perQuery[0].query,
|
|
2099
|
+
result: perQuery[0],
|
|
2100
|
+
};
|
|
2101
|
+
}
|
|
2102
|
+
|
|
2103
|
+
const mergedMap = new Map();
|
|
2104
|
+
for (const group of perQuery) {
|
|
2105
|
+
for (const item of group.expanded || []) {
|
|
2106
|
+
const id = item.id;
|
|
2107
|
+
if (!id) {
|
|
2108
|
+
continue;
|
|
2109
|
+
}
|
|
2110
|
+
if (!mergedMap.has(id)) {
|
|
2111
|
+
mergedMap.set(id, {
|
|
2112
|
+
...item,
|
|
2113
|
+
queries: [group.query],
|
|
2114
|
+
});
|
|
2115
|
+
} else {
|
|
2116
|
+
const existing = mergedMap.get(id);
|
|
2117
|
+
existing.queries = [...new Set([...(existing.queries || []), group.query])];
|
|
2118
|
+
}
|
|
2119
|
+
}
|
|
2120
|
+
}
|
|
2121
|
+
|
|
2122
|
+
return {
|
|
2123
|
+
tool: "search.expand",
|
|
2124
|
+
profile: ctx.profile.id,
|
|
2125
|
+
queryCount: perQuery.length,
|
|
2126
|
+
result: {
|
|
2127
|
+
perQuery,
|
|
2128
|
+
mergedExpanded: Array.from(mergedMap.values()),
|
|
2129
|
+
},
|
|
2130
|
+
};
|
|
2131
|
+
}
|
|
2132
|
+
|
|
2133
|
+
export const NAVIGATION_TOOLS = {
|
|
2134
|
+
"documents.resolve": {
|
|
2135
|
+
signature:
|
|
2136
|
+
"documents.resolve(args: { query?: string; queries?: string[]; collectionId?: string; limit?: number; strict?: boolean; strictThreshold?: number; view?: 'ids'|'summary'|'full'; concurrency?: number; })",
|
|
2137
|
+
description:
|
|
2138
|
+
"Resolve fuzzy document references by combining title search with semantic fallback and returning confidence-ranked candidates.",
|
|
2139
|
+
usageExample: {
|
|
2140
|
+
tool: "documents.resolve",
|
|
2141
|
+
args: {
|
|
2142
|
+
queries: ["incident handbook", "oncall escalation"],
|
|
2143
|
+
limit: 6,
|
|
2144
|
+
view: "summary",
|
|
2145
|
+
},
|
|
2146
|
+
},
|
|
2147
|
+
bestPractices: [
|
|
2148
|
+
"Use `strict=true` when only near-exact matches should be auto-selected.",
|
|
2149
|
+
"Start with `view=ids` in planner loops, then hydrate selected IDs separately.",
|
|
2150
|
+
"Send multiple references in `queries[]` to reduce tool round trips.",
|
|
2151
|
+
],
|
|
2152
|
+
handler: documentsResolveTool,
|
|
2153
|
+
},
|
|
2154
|
+
"documents.resolve_urls": {
|
|
2155
|
+
signature:
|
|
2156
|
+
"documents.resolve_urls(args: { url?: string; urls?: string[]; collectionId?: string; limit?: number; strict?: boolean; strictHost?: boolean; strictThreshold?: number; view?: 'ids'|'summary'|'full'; concurrency?: number; snippetMinWords?: number; snippetMaxWords?: number; excerptChars?: number; forceGroupedResult?: boolean; maxAttempts?: number; })",
|
|
2157
|
+
description:
|
|
2158
|
+
"Resolve document URLs (doc/share links) into confidence-ranked document candidates with URL-id/host-aware boosts.",
|
|
2159
|
+
usageExample: {
|
|
2160
|
+
tool: "documents.resolve_urls",
|
|
2161
|
+
args: {
|
|
2162
|
+
urls: [
|
|
2163
|
+
"https://handbook.example.com/doc/event-tracking-data-A7hLXuHZJl",
|
|
2164
|
+
"https://handbook.example.com/doc/campaign-detail-page-GWK1uA8w35#d-GWK1uA8w35",
|
|
2165
|
+
],
|
|
2166
|
+
strict: true,
|
|
2167
|
+
strictThreshold: 0.85,
|
|
2168
|
+
view: "summary",
|
|
2169
|
+
},
|
|
2170
|
+
},
|
|
2171
|
+
bestPractices: [
|
|
2172
|
+
"Use strictHost=true when links should belong to the currently selected profile host only.",
|
|
2173
|
+
"Use strict=true for automation paths that should avoid weak URL matches.",
|
|
2174
|
+
"Start with view=ids, then hydrate selected IDs with documents.info for low-token loops.",
|
|
2175
|
+
],
|
|
2176
|
+
handler: documentsResolveUrlsTool,
|
|
2177
|
+
},
|
|
2178
|
+
"documents.canonicalize_candidates": {
|
|
2179
|
+
signature:
|
|
2180
|
+
"documents.canonicalize_candidates(args: { query?: string; queries?: string[]; ids?: string[]; collectionId?: string; limit?: number; strict?: boolean; strictThreshold?: number; titleSimilarityThreshold?: number; view?: 'ids'|'summary'|'full'; concurrency?: number; hydrateConcurrency?: number; snippetMinWords?: number; snippetMaxWords?: number; excerptChars?: number; maxAttempts?: number; })",
|
|
2181
|
+
description:
|
|
2182
|
+
"Canonicalize noisy/duplicate candidate sets into stable clusters with one preferred canonical document per cluster.",
|
|
2183
|
+
usageExample: {
|
|
2184
|
+
tool: "documents.canonicalize_candidates",
|
|
2185
|
+
args: {
|
|
2186
|
+
queries: ["campaign detail", "campaign tracking event"],
|
|
2187
|
+
strict: true,
|
|
2188
|
+
titleSimilarityThreshold: 0.8,
|
|
2189
|
+
view: "summary",
|
|
2190
|
+
},
|
|
2191
|
+
},
|
|
2192
|
+
bestPractices: [
|
|
2193
|
+
"Feed this tool with multi-query retrieval inputs before answer generation to reduce duplicate/noisy context.",
|
|
2194
|
+
"Use strict=true + strictThreshold when low-confidence matches should be dropped from canonical clusters.",
|
|
2195
|
+
"Inspect duplicateIds/memberCount to detect ambiguous sources before applying changes.",
|
|
2196
|
+
],
|
|
2197
|
+
handler: documentsCanonicalizeCandidatesTool,
|
|
2198
|
+
},
|
|
2199
|
+
"collections.tree": {
|
|
2200
|
+
signature:
|
|
2201
|
+
"collections.tree(args: { collectionId: string; includeDrafts?: boolean; maxDepth?: number; view?: 'summary'|'full'; pageSize?: number; maxPages?: number; })",
|
|
2202
|
+
description: "Build a parent/child document tree for a collection without modifying server data.",
|
|
2203
|
+
usageExample: {
|
|
2204
|
+
tool: "collections.tree",
|
|
2205
|
+
args: {
|
|
2206
|
+
collectionId: "collection-id",
|
|
2207
|
+
includeDrafts: false,
|
|
2208
|
+
maxDepth: 4,
|
|
2209
|
+
view: "summary",
|
|
2210
|
+
},
|
|
2211
|
+
},
|
|
2212
|
+
bestPractices: [
|
|
2213
|
+
"Keep `view=summary` and low `maxDepth` for navigation tasks to save tokens.",
|
|
2214
|
+
"Set includeDrafts=true only if draft pages matter for your workflow.",
|
|
2215
|
+
"Use output tree IDs as anchors for targeted `documents.info` calls.",
|
|
2216
|
+
],
|
|
2217
|
+
handler: collectionsTreeTool,
|
|
2218
|
+
},
|
|
2219
|
+
"search.expand": {
|
|
2220
|
+
signature:
|
|
2221
|
+
"search.expand(args: { query?: string; queries?: string[]; mode?: 'semantic'|'titles'; limit?: number; expandLimit?: number; view?: 'ids'|'summary'|'full'; concurrency?: number; hydrateConcurrency?: number; })",
|
|
2222
|
+
description:
|
|
2223
|
+
"Search and then hydrate top-ranked documents in one call, returning compact joined search+document output.",
|
|
2224
|
+
usageExample: {
|
|
2225
|
+
tool: "search.expand",
|
|
2226
|
+
args: {
|
|
2227
|
+
query: "postmortem template",
|
|
2228
|
+
mode: "semantic",
|
|
2229
|
+
limit: 8,
|
|
2230
|
+
expandLimit: 3,
|
|
2231
|
+
view: "summary",
|
|
2232
|
+
},
|
|
2233
|
+
},
|
|
2234
|
+
bestPractices: [
|
|
2235
|
+
"Use low `expandLimit` (2-5) to minimize payload while preserving answer quality.",
|
|
2236
|
+
"Use `queries[]` for multi-intent retrieval in one request.",
|
|
2237
|
+
"For multi-query runs, duplicate document hydration is automatically cached within the same tool call.",
|
|
2238
|
+
"Prefer `view=summary` unless a full markdown body is strictly needed.",
|
|
2239
|
+
],
|
|
2240
|
+
handler: searchExpandTool,
|
|
2241
|
+
},
|
|
2242
|
+
"search.research": {
|
|
2243
|
+
signature:
|
|
2244
|
+
"search.research(args: { question?: string; query?: string; queries?: string[]; collectionId?: string; limitPerQuery?: number; offset?: number; includeTitleSearch?: boolean; includeSemanticSearch?: boolean; precisionMode?: 'balanced'|'precision'|'recall'; minScore?: number; diversify?: boolean; diversityLambda?: number; rrfK?: number; expandLimit?: number; maxDocuments?: number; seenIds?: string[]; view?: 'ids'|'summary'|'full'; perQueryView?: 'ids'|'summary'|'full'; perQueryHitLimit?: number; evidencePerDocument?: number; suggestedQueryLimit?: number; includePerQuery?: boolean; includeExpanded?: boolean; includeCoverage?: boolean; includeBacklinks?: boolean; backlinksLimit?: number; backlinksConcurrency?: number; concurrency?: number; hydrateConcurrency?: number; contextChars?: number; excerptChars?: number; maxAttempts?: number; })",
|
|
2245
|
+
description:
|
|
2246
|
+
"Run multi-query, multi-source research retrieval with weighted reranking, optional diversification, hydration, and follow-up cursor support for multi-turn QA.",
|
|
2247
|
+
usageExample: {
|
|
2248
|
+
tool: "search.research",
|
|
2249
|
+
args: {
|
|
2250
|
+
question: "How do we run incident communication and escalation?",
|
|
2251
|
+
queries: ["incident comms", "escalation matrix"],
|
|
2252
|
+
includeTitleSearch: true,
|
|
2253
|
+
includeSemanticSearch: true,
|
|
2254
|
+
precisionMode: "precision",
|
|
2255
|
+
limitPerQuery: 8,
|
|
2256
|
+
perQueryHitLimit: 4,
|
|
2257
|
+
evidencePerDocument: 3,
|
|
2258
|
+
expandLimit: 5,
|
|
2259
|
+
includeBacklinks: true,
|
|
2260
|
+
backlinksLimit: 3,
|
|
2261
|
+
view: "summary",
|
|
2262
|
+
},
|
|
2263
|
+
},
|
|
2264
|
+
bestPractices: [
|
|
2265
|
+
"Pass prior `next.seenIds` into `seenIds` for follow-up turns to avoid repetition.",
|
|
2266
|
+
"Use `precisionMode=precision` for answer-grade retrieval and `precisionMode=recall` for exploration.",
|
|
2267
|
+
"Set `perQueryView=ids` + `perQueryHitLimit` to reduce token cost while preserving traceability.",
|
|
2268
|
+
"Enable `includeBacklinks` when one-call context gathering is more important than raw latency.",
|
|
2269
|
+
"Keep `expandLimit` small and raise only when answer confidence is insufficient.",
|
|
2270
|
+
],
|
|
2271
|
+
handler: searchResearchTool,
|
|
2272
|
+
},
|
|
2273
|
+
};
|