@apmantza/greedysearch-pi 1.9.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +110 -14
- package/README.md +86 -41
- package/bin/cdp.mjs +1153 -1108
- package/bin/launch.mjs +11 -0
- package/bin/search.mjs +886 -674
- package/extractors/bing-copilot.mjs +528 -374
- package/extractors/chatgpt.mjs +436 -0
- package/extractors/common.mjs +837 -645
- package/extractors/consensus.mjs +655 -0
- package/extractors/consent.mjs +421 -388
- package/extractors/gemini.mjs +335 -217
- package/extractors/logically.mjs +567 -0
- package/extractors/selectors.mjs +3 -2
- package/extractors/semantic-scholar.mjs +219 -0
- package/index.ts +2 -1
- package/package.json +14 -6
- package/skills/greedy-search/skill.md +9 -12
- package/src/fetcher.mjs +8 -1
- package/src/formatters/results.ts +163 -128
- package/src/search/browser-lifecycle.mjs +27 -5
- package/src/search/chrome.mjs +653 -590
- package/src/search/constants.mjs +150 -39
- package/src/search/engines.mjs +114 -76
- package/src/search/fetch-source.mjs +566 -451
- package/src/search/pdf.mjs +68 -0
- package/src/search/recovery.mjs +51 -45
- package/src/search/research.mjs +2579 -0
- package/src/search/sources.mjs +77 -25
- package/src/search/synthesis-runner.mjs +142 -57
- package/src/search/synthesis.mjs +286 -246
- package/src/tools/greedy-search-handler.ts +189 -45
- package/src/tools/shared.ts +187 -186
- package/src/types.ts +110 -104
- package/test.mjs +1342 -534
|
@@ -0,0 +1,2579 @@
|
|
|
1
|
+
// src/search/research.mjs — Iterative deep-research orchestration
|
|
2
|
+
//
|
|
3
|
+
// Research mode borrows the small-loop architecture from open deep-research:
|
|
4
|
+
// plan focused queries, run broad search, extract compact learnings + follow-up
|
|
5
|
+
// directions, then produce a final report. It deliberately reuses GreedySearch's
|
|
6
|
+
// no-API browser engines and source fetchers instead of Firecrawl/OpenAI.
|
|
7
|
+
|
|
8
|
+
import { spawn } from "node:child_process";
|
|
9
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
10
|
+
import { join } from "node:path";
|
|
11
|
+
import { fileURLToPath } from "node:url";
|
|
12
|
+
import {
|
|
13
|
+
buildSourceRegistry,
|
|
14
|
+
classifySourceType,
|
|
15
|
+
computeCompositeScore,
|
|
16
|
+
mergeFetchDataIntoSources,
|
|
17
|
+
normalizeUrl,
|
|
18
|
+
trimText,
|
|
19
|
+
} from "./sources.mjs";
|
|
20
|
+
import { parseStructuredJson } from "./synthesis.mjs";
|
|
21
|
+
import { RESEARCH_ENGINES } from "./constants.mjs";
|
|
22
|
+
import { runGeminiPrompt } from "./synthesis-runner.mjs";
|
|
23
|
+
|
|
24
|
+
const __dir = fileURLToPath(new URL(".", import.meta.url)).replace(
|
|
25
|
+
/^\/([A-Z]:)/,
|
|
26
|
+
"$1",
|
|
27
|
+
);
|
|
28
|
+
const SEARCH_BIN = join(__dir, "..", "..", "bin", "search.mjs");
|
|
29
|
+
const DEFAULT_RESEARCH_BUNDLE_ROOT = join(
|
|
30
|
+
process.cwd(),
|
|
31
|
+
".pi",
|
|
32
|
+
"greedysearch-research",
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
function slugifyResearchName(value) {
|
|
36
|
+
const slug = String(value || "research")
|
|
37
|
+
.toLowerCase()
|
|
38
|
+
.replaceAll(/[^a-z0-9]+/g, "-")
|
|
39
|
+
.replaceAll(/^-|-$/g, "")
|
|
40
|
+
.slice(0, 60);
|
|
41
|
+
return slug || "research";
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function uniqueStrings(items, limit = Infinity) {
|
|
45
|
+
const seen = new Set();
|
|
46
|
+
const out = [];
|
|
47
|
+
for (const item of items || []) {
|
|
48
|
+
const clean = trimText(String(item || ""), 1000);
|
|
49
|
+
if (!clean || seen.has(clean)) continue;
|
|
50
|
+
seen.add(clean);
|
|
51
|
+
out.push(clean);
|
|
52
|
+
if (out.length >= limit) break;
|
|
53
|
+
}
|
|
54
|
+
return out;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async function fetchMultipleResearchSources(...args) {
|
|
58
|
+
const { fetchMultipleSources } = await import("./fetch-source.mjs");
|
|
59
|
+
return fetchMultipleSources(...args);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async function writeResearchSourcesToFiles(...args) {
|
|
63
|
+
const { writeSourcesToFiles } = await import("./file-sources.mjs");
|
|
64
|
+
return writeSourcesToFiles(...args);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function clampResearchOptions({
|
|
68
|
+
breadth = 3,
|
|
69
|
+
iterations = 2,
|
|
70
|
+
maxSources,
|
|
71
|
+
}) {
|
|
72
|
+
const safeBreadth = clampInt(breadth, 1, 5, 3);
|
|
73
|
+
const safeIterations = clampInt(iterations, 1, 3, 2);
|
|
74
|
+
const safeMaxSources = clampInt(
|
|
75
|
+
maxSources ?? Math.max(5, safeBreadth * safeIterations * 2),
|
|
76
|
+
3,
|
|
77
|
+
12,
|
|
78
|
+
8,
|
|
79
|
+
);
|
|
80
|
+
return {
|
|
81
|
+
breadth: safeBreadth,
|
|
82
|
+
iterations: safeIterations,
|
|
83
|
+
maxSources: safeMaxSources,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function clampInt(value, min, max, fallback) {
|
|
88
|
+
const n = Number.parseInt(String(value ?? ""), 10);
|
|
89
|
+
if (!Number.isFinite(n)) return fallback;
|
|
90
|
+
return Math.min(max, Math.max(min, n));
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export function normalizeResearchQueries(
|
|
94
|
+
plan,
|
|
95
|
+
originalQuery,
|
|
96
|
+
breadth,
|
|
97
|
+
{ expand = true, includeOriginal = true, exclude = [] } = {},
|
|
98
|
+
) {
|
|
99
|
+
const rawQueries = Array.isArray(plan?.queries) ? plan.queries : [];
|
|
100
|
+
const queries = [];
|
|
101
|
+
const excluded = new Set(
|
|
102
|
+
[...exclude].map((item) => sanitizeResearchQuery(item).toLowerCase()),
|
|
103
|
+
);
|
|
104
|
+
for (const item of rawQueries) {
|
|
105
|
+
const query = typeof item === "string" ? item : item?.query;
|
|
106
|
+
const researchGoal =
|
|
107
|
+
typeof item === "string" ? "" : item?.researchGoal || "";
|
|
108
|
+
addResearchQuery(queries, query, researchGoal, { exclude: excluded });
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (includeOriginal) {
|
|
112
|
+
addResearchQuery(queries, originalQuery, "Original user query", {
|
|
113
|
+
prepend: true,
|
|
114
|
+
exclude: excluded,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (expand) {
|
|
119
|
+
const expansionQueries = [
|
|
120
|
+
{
|
|
121
|
+
query: `${originalQuery} official docs GitHub`,
|
|
122
|
+
researchGoal:
|
|
123
|
+
"Find primary project docs, repository details, and maintainer claims.",
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
query: `${originalQuery} benchmarks limitations compatibility`,
|
|
127
|
+
researchGoal:
|
|
128
|
+
"Validate performance claims and uncover unsupported APIs or caveats.",
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
query: `${originalQuery} alternatives comparison production use cases`,
|
|
132
|
+
researchGoal:
|
|
133
|
+
"Compare against conventional headless browsers and identify when to choose it.",
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
query: `${originalQuery} anti bot detection Cloudflare screenshots visual rendering`,
|
|
137
|
+
researchGoal:
|
|
138
|
+
"Check automation risks, rendering gaps, screenshots, and bot-detection behavior.",
|
|
139
|
+
},
|
|
140
|
+
];
|
|
141
|
+
for (const item of expansionQueries) {
|
|
142
|
+
if (queries.length >= breadth) break;
|
|
143
|
+
addResearchQuery(queries, item.query, item.researchGoal, {
|
|
144
|
+
exclude: excluded,
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return queries.slice(0, breadth);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function addResearchQuery(
|
|
153
|
+
queries,
|
|
154
|
+
query,
|
|
155
|
+
researchGoal = "",
|
|
156
|
+
{ prepend = false, exclude = new Set() } = {},
|
|
157
|
+
) {
|
|
158
|
+
if (!query || typeof query !== "string") return;
|
|
159
|
+
const clean = sanitizeResearchQuery(query);
|
|
160
|
+
if (
|
|
161
|
+
!clean ||
|
|
162
|
+
exclude.has(clean.toLowerCase()) ||
|
|
163
|
+
queries.some((q) => q.query.toLowerCase() === clean.toLowerCase())
|
|
164
|
+
) {
|
|
165
|
+
return;
|
|
166
|
+
}
|
|
167
|
+
const item = { query: clean, researchGoal: trimText(researchGoal, 320) };
|
|
168
|
+
if (prepend) queries.unshift(item);
|
|
169
|
+
else queries.push(item);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function sanitizeResearchQuery(query) {
|
|
173
|
+
return collapseWhitespace(stripMarkdownLinks(String(query)));
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function stripMarkdownLinks(value) {
|
|
177
|
+
let output = "";
|
|
178
|
+
let index = 0;
|
|
179
|
+
while (index < value.length) {
|
|
180
|
+
const openLabel = value.indexOf("[", index);
|
|
181
|
+
if (openLabel === -1) {
|
|
182
|
+
output += value.slice(index);
|
|
183
|
+
break;
|
|
184
|
+
}
|
|
185
|
+
const closeLabel = value.indexOf("]", openLabel + 1);
|
|
186
|
+
if (
|
|
187
|
+
closeLabel === -1 ||
|
|
188
|
+
value[closeLabel + 1] !== "(" ||
|
|
189
|
+
closeLabel === openLabel + 1
|
|
190
|
+
) {
|
|
191
|
+
output += value.slice(index, openLabel + 1);
|
|
192
|
+
index = openLabel + 1;
|
|
193
|
+
continue;
|
|
194
|
+
}
|
|
195
|
+
const closeUrl = value.indexOf(")", closeLabel + 2);
|
|
196
|
+
if (closeUrl === -1) {
|
|
197
|
+
output += value.slice(index, openLabel + 1);
|
|
198
|
+
index = openLabel + 1;
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
201
|
+
const url = value.slice(closeLabel + 2, closeUrl).trimStart();
|
|
202
|
+
if (!url.startsWith("http://") && !url.startsWith("https://")) {
|
|
203
|
+
output += value.slice(index, openLabel + 1);
|
|
204
|
+
index = openLabel + 1;
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
output += value.slice(index, openLabel);
|
|
208
|
+
output += value.slice(openLabel + 1, closeLabel);
|
|
209
|
+
index = closeUrl + 1;
|
|
210
|
+
}
|
|
211
|
+
return output;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function collapseWhitespace(value) {
|
|
215
|
+
let output = "";
|
|
216
|
+
let previousWasWhitespace = false;
|
|
217
|
+
for (const char of value) {
|
|
218
|
+
if (char === " " || char === "\t" || char === "\n" || char === "\r") {
|
|
219
|
+
if (!previousWasWhitespace) output += " ";
|
|
220
|
+
previousWasWhitespace = true;
|
|
221
|
+
} else {
|
|
222
|
+
output += char;
|
|
223
|
+
previousWasWhitespace = false;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
return output.trim();
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Tokenize a string into lowercase word tokens for Jaccard similarity.
|
|
231
|
+
*/
|
|
232
|
+
export function tokenSet(value) {
|
|
233
|
+
return new Set(
|
|
234
|
+
String(value)
|
|
235
|
+
.toLowerCase()
|
|
236
|
+
.normalize("NFD")
|
|
237
|
+
.replaceAll(/[\u0300-\u036f]/g, "")
|
|
238
|
+
.split(/[^\w]+/)
|
|
239
|
+
.filter((t) => t.length > 1),
|
|
240
|
+
);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Jaccard similarity between two strings based on word tokens.
|
|
245
|
+
* Returns 0..1 where 1 = identical token sets.
|
|
246
|
+
*/
|
|
247
|
+
export function jaccardSimilarity(a, b) {
|
|
248
|
+
const tokensA = tokenSet(a);
|
|
249
|
+
const tokensB = tokenSet(b);
|
|
250
|
+
const unionSize = new Set([...tokensA, ...tokensB]).size;
|
|
251
|
+
if (unionSize === 0) return 1;
|
|
252
|
+
let intersection = 0;
|
|
253
|
+
for (const t of tokensA) {
|
|
254
|
+
if (tokensB.has(t)) intersection++;
|
|
255
|
+
}
|
|
256
|
+
return intersection / unionSize;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Check if a query is a duplicate or near-duplicate of already-used queries.
|
|
261
|
+
* Returns true if the query should be rejected.
|
|
262
|
+
*/
|
|
263
|
+
export function isDuplicateQuery(
|
|
264
|
+
query,
|
|
265
|
+
usedQueries,
|
|
266
|
+
{ threshold = 0.75, roundIndex = 0, originalQuery = null } = {},
|
|
267
|
+
) {
|
|
268
|
+
const normalized = sanitizeResearchQuery(query).toLowerCase();
|
|
269
|
+
|
|
270
|
+
// Exact duplicate check
|
|
271
|
+
if (usedQueries.has(normalized)) return true;
|
|
272
|
+
|
|
273
|
+
// Reject the original query after round 1
|
|
274
|
+
if (
|
|
275
|
+
originalQuery &&
|
|
276
|
+
roundIndex > 0 &&
|
|
277
|
+
normalized === sanitizeResearchQuery(originalQuery).toLowerCase()
|
|
278
|
+
) {
|
|
279
|
+
return true;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Near-duplicate check via Jaccard similarity
|
|
283
|
+
for (const used of usedQueries) {
|
|
284
|
+
if (jaccardSimilarity(normalized, used) >= threshold) {
|
|
285
|
+
return true;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Evaluate research quality using Gemini and return structured assessment.
|
|
294
|
+
*/
|
|
295
|
+
function buildQualityEvaluationPrompt(
|
|
296
|
+
originalQuery,
|
|
297
|
+
rounds,
|
|
298
|
+
allLearnings,
|
|
299
|
+
allGaps,
|
|
300
|
+
) {
|
|
301
|
+
const roundSummaries = rounds.map((round) => ({
|
|
302
|
+
queries: round.queries?.map((q) => q.query || "") || [],
|
|
303
|
+
learnings: round.learnings || [],
|
|
304
|
+
gaps: round.gaps || [],
|
|
305
|
+
}));
|
|
306
|
+
|
|
307
|
+
return [
|
|
308
|
+
"You are evaluating the quality of an iterative research run.",
|
|
309
|
+
"Assess coverage across: official sources, limitations/risks, benchmarks/performance, production usage, and counter-evidence.",
|
|
310
|
+
"Score each dimension 0-10. Overall score 0-10.",
|
|
311
|
+
"Identify remaining knowledge gaps.",
|
|
312
|
+
"Propose targeted next actions (search queries or direct URL fetches) that would most improve the research.",
|
|
313
|
+
"Decide whether to continue or stop.",
|
|
314
|
+
"terminationReason must be one of: quality_threshold | max_rounds | no_novel_actions | insufficient_evidence.",
|
|
315
|
+
"",
|
|
316
|
+
`Original research question: ${originalQuery}`,
|
|
317
|
+
`Rounds completed: ${JSON.stringify(roundSummaries, null, 2)}`,
|
|
318
|
+
`Accumulated learnings: ${JSON.stringify(allLearnings.slice(0, 12), null, 2)}`,
|
|
319
|
+
`Known gaps: ${JSON.stringify(allGaps.slice(0, 8), null, 2)}`,
|
|
320
|
+
"",
|
|
321
|
+
"Respond ONLY with JSON wrapped in BEGIN_JSON / END_JSON markers:",
|
|
322
|
+
"BEGIN_JSON",
|
|
323
|
+
JSON.stringify(
|
|
324
|
+
{
|
|
325
|
+
score: 7.5,
|
|
326
|
+
coverage: {
|
|
327
|
+
officialSources: 8,
|
|
328
|
+
limitations: 5,
|
|
329
|
+
benchmarks: 7,
|
|
330
|
+
productionUseCases: 6,
|
|
331
|
+
counterEvidence: 4,
|
|
332
|
+
},
|
|
333
|
+
knowledgeGaps: ["specific gap or missing evidence"],
|
|
334
|
+
shouldContinue: true,
|
|
335
|
+
terminationReason: "quality_threshold",
|
|
336
|
+
nextActions: [
|
|
337
|
+
{ type: "search", query: "targeted search query" },
|
|
338
|
+
{ type: "fetchUrl", url: "https://example.com/primary-doc" },
|
|
339
|
+
],
|
|
340
|
+
},
|
|
341
|
+
null,
|
|
342
|
+
2,
|
|
343
|
+
),
|
|
344
|
+
"END_JSON",
|
|
345
|
+
].join("\n");
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Generate fallback queries based on identified gaps when the planner produces insufficient novel actions.
|
|
350
|
+
*/
|
|
351
|
+
export function buildFallbackQueriesFromGaps(
|
|
352
|
+
gaps,
|
|
353
|
+
originalQuery,
|
|
354
|
+
usedQueries,
|
|
355
|
+
nextBreadth,
|
|
356
|
+
roundIndex,
|
|
357
|
+
) {
|
|
358
|
+
const fallbacks = [];
|
|
359
|
+
const angles = [
|
|
360
|
+
{
|
|
361
|
+
template: (gap) => `${gap} official documentation`,
|
|
362
|
+
label: "official docs",
|
|
363
|
+
},
|
|
364
|
+
{
|
|
365
|
+
template: (gap) => `${gap} GitHub issues discussions`,
|
|
366
|
+
label: "community signals",
|
|
367
|
+
},
|
|
368
|
+
{
|
|
369
|
+
template: (gap) => `${gap} benchmarks performance comparison`,
|
|
370
|
+
label: "benchmarks",
|
|
371
|
+
},
|
|
372
|
+
{
|
|
373
|
+
template: (gap) => `${gap} limitations risks caveats`,
|
|
374
|
+
label: "limitations",
|
|
375
|
+
},
|
|
376
|
+
{
|
|
377
|
+
template: (gap) => `${gap} production deployment experience`,
|
|
378
|
+
label: "production usage",
|
|
379
|
+
},
|
|
380
|
+
{
|
|
381
|
+
template: (gap) => `${originalQuery} ${gap} counter evidence`,
|
|
382
|
+
label: "counter-evidence",
|
|
383
|
+
},
|
|
384
|
+
];
|
|
385
|
+
|
|
386
|
+
for (let i = 0; i < gaps.length && fallbacks.length < nextBreadth; i++) {
|
|
387
|
+
const gap = gaps[i];
|
|
388
|
+
const angle = angles[i % angles.length];
|
|
389
|
+
const candidate = angle.template(gap);
|
|
390
|
+
if (!isDuplicateQuery(candidate, usedQueries, { roundIndex })) {
|
|
391
|
+
fallbacks.push({
|
|
392
|
+
query: candidate,
|
|
393
|
+
researchGoal: `Gap-driven: ${gap} (${angle.label})`,
|
|
394
|
+
});
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
return fallbacks;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
async function evaluateResearchQuality(
|
|
402
|
+
originalQuery,
|
|
403
|
+
rounds,
|
|
404
|
+
allLearnings,
|
|
405
|
+
allGaps,
|
|
406
|
+
qualityHistory,
|
|
407
|
+
) {
|
|
408
|
+
try {
|
|
409
|
+
const rawEvaluation = await runGeminiPrompt(
|
|
410
|
+
buildQualityEvaluationPrompt(
|
|
411
|
+
originalQuery,
|
|
412
|
+
rounds,
|
|
413
|
+
allLearnings,
|
|
414
|
+
allGaps,
|
|
415
|
+
),
|
|
416
|
+
{ timeoutMs: 120000 },
|
|
417
|
+
);
|
|
418
|
+
const evaluation = parseGeminiJson(rawEvaluation, {});
|
|
419
|
+
|
|
420
|
+
// Normalize score
|
|
421
|
+
const score =
|
|
422
|
+
typeof evaluation.score === "number"
|
|
423
|
+
? Math.min(10, Math.max(0, evaluation.score))
|
|
424
|
+
: qualityHistory.length > 0
|
|
425
|
+
? qualityHistory[qualityHistory.length - 1]
|
|
426
|
+
: 5;
|
|
427
|
+
|
|
428
|
+
const gaps = Array.isArray(evaluation.knowledgeGaps)
|
|
429
|
+
? evaluation.knowledgeGaps
|
|
430
|
+
.map((g) => String(g))
|
|
431
|
+
.filter(Boolean)
|
|
432
|
+
.slice(0, 6)
|
|
433
|
+
: [];
|
|
434
|
+
|
|
435
|
+
const nextActions = Array.isArray(evaluation.nextActions)
|
|
436
|
+
? evaluation.nextActions.slice(0, 5)
|
|
437
|
+
: [];
|
|
438
|
+
|
|
439
|
+
const shouldContinue =
|
|
440
|
+
typeof evaluation.shouldContinue === "boolean"
|
|
441
|
+
? evaluation.shouldContinue
|
|
442
|
+
: score < 8;
|
|
443
|
+
|
|
444
|
+
const terminationReason = evaluation.terminationReason || null;
|
|
445
|
+
|
|
446
|
+
return {
|
|
447
|
+
score,
|
|
448
|
+
coverage: evaluation.coverage || {},
|
|
449
|
+
knowledgeGaps: gaps,
|
|
450
|
+
shouldContinue,
|
|
451
|
+
nextActions,
|
|
452
|
+
terminationReason:
|
|
453
|
+
terminationReason || (score >= 8.5 ? "quality_threshold" : null),
|
|
454
|
+
evaluationError: "",
|
|
455
|
+
};
|
|
456
|
+
} catch (error) {
|
|
457
|
+
process.stderr.write(
|
|
458
|
+
`[greedysearch] Quality evaluation failed: ${error.message}\n`,
|
|
459
|
+
);
|
|
460
|
+
return {
|
|
461
|
+
score:
|
|
462
|
+
qualityHistory.length > 0
|
|
463
|
+
? qualityHistory[qualityHistory.length - 1]
|
|
464
|
+
: 5,
|
|
465
|
+
coverage: {},
|
|
466
|
+
knowledgeGaps: [],
|
|
467
|
+
shouldContinue: true,
|
|
468
|
+
nextActions: [],
|
|
469
|
+
terminationReason: null,
|
|
470
|
+
evaluationError: error.message,
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
function summarizeEngineAnswers(result) {
|
|
476
|
+
const summaries = {};
|
|
477
|
+
for (const engine of Object.keys(result || {}).filter(
|
|
478
|
+
(key) => !key.startsWith("_"),
|
|
479
|
+
)) {
|
|
480
|
+
const value = result?.[engine];
|
|
481
|
+
if (!value) continue;
|
|
482
|
+
summaries[engine] = value.error
|
|
483
|
+
? { status: "error", error: String(value.error) }
|
|
484
|
+
: {
|
|
485
|
+
status: "ok",
|
|
486
|
+
answer: trimText(value.answer || "", 1400),
|
|
487
|
+
sources: Array.isArray(value.sources)
|
|
488
|
+
? value.sources.slice(0, 5).map((s) => ({
|
|
489
|
+
title: trimText(s.title || "", 160),
|
|
490
|
+
url: s.url || "",
|
|
491
|
+
}))
|
|
492
|
+
: [],
|
|
493
|
+
};
|
|
494
|
+
}
|
|
495
|
+
return summaries;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
/**
|
|
499
|
+
* Action-based research planning prompt.
|
|
500
|
+
* Returns actions: { type: "search" | "fetchUrl", query?, url?, researchGoal? }
|
|
501
|
+
*/
|
|
502
|
+
function buildResearchActionPrompt(
|
|
503
|
+
query,
|
|
504
|
+
breadth,
|
|
505
|
+
learnings = [],
|
|
506
|
+
gaps = [],
|
|
507
|
+
usedUrls = [],
|
|
508
|
+
) {
|
|
509
|
+
const gapSection =
|
|
510
|
+
gaps.length > 0
|
|
511
|
+
? `\nKnown knowledge gaps to target:\n${gaps.map((g) => `- ${g}`).join("\n")}`
|
|
512
|
+
: "";
|
|
513
|
+
const usedUrlSection =
|
|
514
|
+
usedUrls.length > 0
|
|
515
|
+
? `\nAlready fetched URLs (do not re-fetch):\n${usedUrls.map((u) => `- ${u}`).join("\n")}`
|
|
516
|
+
: "";
|
|
517
|
+
|
|
518
|
+
return [
|
|
519
|
+
"You are planning web research actions for a multi-engine search agent.",
|
|
520
|
+
"You can plan two types of actions:",
|
|
521
|
+
' - "search": run a multi-engine SERP search query',
|
|
522
|
+
' - "fetchUrl": directly fetch a specific URL (docs page, GitHub repo, specification, etc.)',
|
|
523
|
+
'Prefer "fetchUrl" when a specific primary source URL is known or obvious.',
|
|
524
|
+
'Use "search" for broad discovery or when specific URLs are unknown.',
|
|
525
|
+
`Return at most ${breadth} actions.`,
|
|
526
|
+
"Avoid near-duplicate search queries and already-fetched URLs.",
|
|
527
|
+
"",
|
|
528
|
+
`User topic: ${query}`,
|
|
529
|
+
learnings.length
|
|
530
|
+
? `\nPrior learnings to build on:\n${learnings.map((l) => `- ${l}`).join("\n")}`
|
|
531
|
+
: "",
|
|
532
|
+
gapSection,
|
|
533
|
+
usedUrlSection,
|
|
534
|
+
"",
|
|
535
|
+
"Respond ONLY with JSON wrapped in BEGIN_JSON / END_JSON markers:",
|
|
536
|
+
"BEGIN_JSON",
|
|
537
|
+
JSON.stringify(
|
|
538
|
+
{
|
|
539
|
+
actions: [
|
|
540
|
+
{
|
|
541
|
+
type: "search",
|
|
542
|
+
query: "specific search query",
|
|
543
|
+
researchGoal: "what this action should clarify",
|
|
544
|
+
},
|
|
545
|
+
{
|
|
546
|
+
type: "fetchUrl",
|
|
547
|
+
url: "https://example.com/docs/relevant-page",
|
|
548
|
+
researchGoal: "extract specific information from this page",
|
|
549
|
+
},
|
|
550
|
+
],
|
|
551
|
+
},
|
|
552
|
+
null,
|
|
553
|
+
2,
|
|
554
|
+
),
|
|
555
|
+
"END_JSON",
|
|
556
|
+
].join("\n");
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
/**
|
|
560
|
+
* Validate and normalize a single research action.
|
|
561
|
+
*/
|
|
562
|
+
export function validateAction(action) {
|
|
563
|
+
if (!action || typeof action !== "object") return null;
|
|
564
|
+
const type = action.type;
|
|
565
|
+
const researchGoal = trimText(action.researchGoal || "", 320);
|
|
566
|
+
|
|
567
|
+
if (type === "search") {
|
|
568
|
+
if (action.query == null) return null;
|
|
569
|
+
const query = sanitizeResearchQuery(action.query);
|
|
570
|
+
return query ? { type: "search", query, researchGoal } : null;
|
|
571
|
+
}
|
|
572
|
+
if (type === "fetchUrl") {
|
|
573
|
+
if (action.url == null) return null;
|
|
574
|
+
const url = normalizeUrl(action.url);
|
|
575
|
+
return url ? { type: "fetchUrl", url, researchGoal } : null;
|
|
576
|
+
}
|
|
577
|
+
return null;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
/**
|
|
581
|
+
* Execute a research action. Returns { ok, result?, error?, sources?, fetchResult? }
|
|
582
|
+
*/
|
|
583
|
+
async function executeResearchAction(
|
|
584
|
+
action,
|
|
585
|
+
{ locale = null, short = true, usedQueries, usedUrls, maxChars = 8000 } = {},
|
|
586
|
+
) {
|
|
587
|
+
if (action.type === "search") {
|
|
588
|
+
const normalizedQuery = sanitizeResearchQuery(action.query).toLowerCase();
|
|
589
|
+
usedQueries.add(normalizedQuery);
|
|
590
|
+
|
|
591
|
+
try {
|
|
592
|
+
const result = await runFastAllSearch(action.query, { locale, short });
|
|
593
|
+
const sources = buildSourceRegistry(result, action.query);
|
|
594
|
+
return {
|
|
595
|
+
ok: true,
|
|
596
|
+
action,
|
|
597
|
+
result,
|
|
598
|
+
sources,
|
|
599
|
+
};
|
|
600
|
+
} catch (error) {
|
|
601
|
+
return {
|
|
602
|
+
ok: false,
|
|
603
|
+
action,
|
|
604
|
+
error: error.message,
|
|
605
|
+
sources: [],
|
|
606
|
+
};
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
if (action.type === "fetchUrl") {
|
|
611
|
+
const normalizedUrl = normalizeUrl(action.url);
|
|
612
|
+
if (usedUrls.has(normalizedUrl)) {
|
|
613
|
+
return {
|
|
614
|
+
ok: false,
|
|
615
|
+
action,
|
|
616
|
+
error: `URL already fetched: ${normalizedUrl}`,
|
|
617
|
+
sources: [],
|
|
618
|
+
};
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
try {
|
|
622
|
+
const fetchResult = await fetchSingleResearchSource(
|
|
623
|
+
normalizedUrl,
|
|
624
|
+
maxChars,
|
|
625
|
+
);
|
|
626
|
+
usedUrls.add(normalizedUrl);
|
|
627
|
+
|
|
628
|
+
// Build a source entry from the fetch result
|
|
629
|
+
const domain = getDomainFromUrl(normalizedUrl);
|
|
630
|
+
const source = {
|
|
631
|
+
id: "",
|
|
632
|
+
canonicalUrl: fetchResult.finalUrl || normalizedUrl,
|
|
633
|
+
displayUrl: fetchResult.url || normalizedUrl,
|
|
634
|
+
domain,
|
|
635
|
+
title: fetchResult.title || normalizedUrl,
|
|
636
|
+
engines: ["fetch"],
|
|
637
|
+
engineCount: 1,
|
|
638
|
+
perEngine: {},
|
|
639
|
+
sourceType: classifySourceType(
|
|
640
|
+
domain,
|
|
641
|
+
fetchResult.title || "",
|
|
642
|
+
fetchResult.finalUrl || normalizedUrl,
|
|
643
|
+
),
|
|
644
|
+
isOfficial: false,
|
|
645
|
+
smartScore: 0,
|
|
646
|
+
fetch: {
|
|
647
|
+
attempted: true,
|
|
648
|
+
ok: !fetchResult.error && (fetchResult.contentChars || 0) > 100,
|
|
649
|
+
status: fetchResult.status || null,
|
|
650
|
+
finalUrl: fetchResult.finalUrl || normalizedUrl,
|
|
651
|
+
content: fetchResult.content || "",
|
|
652
|
+
contentChars: fetchResult.contentChars || 0,
|
|
653
|
+
snippet: fetchResult.snippet || "",
|
|
654
|
+
error: fetchResult.error || "",
|
|
655
|
+
},
|
|
656
|
+
};
|
|
657
|
+
|
|
658
|
+
return {
|
|
659
|
+
ok: true,
|
|
660
|
+
action,
|
|
661
|
+
result: null,
|
|
662
|
+
sources: [source],
|
|
663
|
+
fetchResult: {
|
|
664
|
+
id: source.id,
|
|
665
|
+
url: normalizedUrl,
|
|
666
|
+
finalUrl: fetchResult.finalUrl || normalizedUrl,
|
|
667
|
+
title: fetchResult.title || "",
|
|
668
|
+
content: fetchResult.content || "",
|
|
669
|
+
contentChars: fetchResult.contentChars || 0,
|
|
670
|
+
snippet: fetchResult.snippet || "",
|
|
671
|
+
status: fetchResult.status || null,
|
|
672
|
+
error: fetchResult.error || "",
|
|
673
|
+
source: fetchResult.source || "http",
|
|
674
|
+
duration: fetchResult.duration || 0,
|
|
675
|
+
},
|
|
676
|
+
};
|
|
677
|
+
} catch (error) {
|
|
678
|
+
return {
|
|
679
|
+
ok: false,
|
|
680
|
+
action,
|
|
681
|
+
error: error.message,
|
|
682
|
+
sources: [],
|
|
683
|
+
};
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
return {
|
|
688
|
+
ok: false,
|
|
689
|
+
action,
|
|
690
|
+
error: `Unknown action type: ${action.type}`,
|
|
691
|
+
sources: [],
|
|
692
|
+
};
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
async function fetchSingleResearchSource(url, maxChars) {
|
|
696
|
+
return await fetchSourceContentDirect(url, maxChars);
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
async function fetchSourceContentDirect(url, maxChars = 8000) {
|
|
700
|
+
const start = Date.now();
|
|
701
|
+
|
|
702
|
+
// GitHub URL — use API for rich content
|
|
703
|
+
try {
|
|
704
|
+
const { parseGitHubUrl, fetchGitHubContent } = await import(
|
|
705
|
+
"../github.mjs"
|
|
706
|
+
);
|
|
707
|
+
const parsed = parseGitHubUrl(url);
|
|
708
|
+
if (
|
|
709
|
+
parsed &&
|
|
710
|
+
(parsed.type === "root" ||
|
|
711
|
+
parsed.type === "tree" ||
|
|
712
|
+
(parsed.type === "blob" && !parsed.path?.includes(".")))
|
|
713
|
+
) {
|
|
714
|
+
const ghResult = await fetchGitHubContent(url);
|
|
715
|
+
if (ghResult.ok) {
|
|
716
|
+
const { trimContentHeadTail } = await import("../utils/content.mjs");
|
|
717
|
+
const content = trimContentHeadTail(ghResult.content, maxChars);
|
|
718
|
+
return {
|
|
719
|
+
url,
|
|
720
|
+
finalUrl: url,
|
|
721
|
+
status: 200,
|
|
722
|
+
title: ghResult.title,
|
|
723
|
+
snippet: content.slice(0, 320),
|
|
724
|
+
content,
|
|
725
|
+
contentChars: content.length,
|
|
726
|
+
source: "github-api",
|
|
727
|
+
duration: Date.now() - start,
|
|
728
|
+
};
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
} catch {
|
|
732
|
+
// Not a GitHub URL or API failed — fall through to HTTP
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
// Standard HTTP fetch
|
|
736
|
+
try {
|
|
737
|
+
const { fetchSourceHttp } = await import("../fetcher.mjs");
|
|
738
|
+
const { trimContentHeadTail } = await import("../utils/content.mjs");
|
|
739
|
+
const httpResult = await fetchSourceHttp(url, { timeoutMs: 10000 });
|
|
740
|
+
if (httpResult.ok) {
|
|
741
|
+
const content = trimContentHeadTail(httpResult.markdown, maxChars);
|
|
742
|
+
return {
|
|
743
|
+
url,
|
|
744
|
+
finalUrl: httpResult.finalUrl,
|
|
745
|
+
status: httpResult.status,
|
|
746
|
+
title: httpResult.title,
|
|
747
|
+
snippet: httpResult.excerpt,
|
|
748
|
+
content,
|
|
749
|
+
contentChars: content.length,
|
|
750
|
+
source: "http",
|
|
751
|
+
duration: Date.now() - start,
|
|
752
|
+
};
|
|
753
|
+
}
|
|
754
|
+
} catch {
|
|
755
|
+
// HTTP failed — return error
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
return {
|
|
759
|
+
url,
|
|
760
|
+
title: "",
|
|
761
|
+
content: "",
|
|
762
|
+
contentChars: 0,
|
|
763
|
+
snippet: "",
|
|
764
|
+
error: "HTTP fetch failed",
|
|
765
|
+
source: "error",
|
|
766
|
+
duration: Date.now() - start,
|
|
767
|
+
};
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
function getDomainFromUrl(rawUrl) {
|
|
771
|
+
try {
|
|
772
|
+
const domain = new URL(rawUrl).hostname.toLowerCase();
|
|
773
|
+
return domain.replace(/^www\./, "");
|
|
774
|
+
} catch {
|
|
775
|
+
return "";
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
/**
|
|
780
|
+
* Normalize a GitHub root/tree URL into specific fetchable pages.
|
|
781
|
+
* Expands github.com/owner/repo into [README, CONTRIBUTING, CHANGELOG, key files].
|
|
782
|
+
*/
|
|
783
|
+
async function normalizeGitHubFetchActions(actions, usedUrls) {
|
|
784
|
+
const normalized = [];
|
|
785
|
+
const { parseGitHubUrl } = await import("../github.mjs");
|
|
786
|
+
|
|
787
|
+
for (const action of actions) {
|
|
788
|
+
if (action.type !== "fetchUrl") {
|
|
789
|
+
normalized.push(action);
|
|
790
|
+
continue;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
const parsed = parseGitHubUrl(action.url);
|
|
794
|
+
if (!parsed || parsed.type !== "root") {
|
|
795
|
+
normalized.push(action);
|
|
796
|
+
continue;
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
const { owner, repo } = parsed;
|
|
800
|
+
const base = `https://github.com/${owner}/${repo}`;
|
|
801
|
+
|
|
802
|
+
// Check if we already fetched the root
|
|
803
|
+
if (usedUrls.has(base)) {
|
|
804
|
+
continue;
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
// Expand into specific fetch targets (limit to avoid overwhelming)
|
|
808
|
+
const targets = [
|
|
809
|
+
base, // root (gets README + tree)
|
|
810
|
+
];
|
|
811
|
+
|
|
812
|
+
// Add docs/CONTRIBUTING/CHANGELOG if they exist in the tree
|
|
813
|
+
const candidatePaths = [
|
|
814
|
+
`${base}/blob/main/CONTRIBUTING.md`,
|
|
815
|
+
`${base}/blob/master/CONTRIBUTING.md`,
|
|
816
|
+
`${base}/blob/main/CHANGELOG.md`,
|
|
817
|
+
`${base}/blob/master/CHANGELOG.md`,
|
|
818
|
+
`${base}/blob/main/docs/README.md`,
|
|
819
|
+
];
|
|
820
|
+
|
|
821
|
+
// Only add a few supplemental targets to avoid excessive fetches
|
|
822
|
+
for (const candidate of candidatePaths) {
|
|
823
|
+
if (targets.length >= 3) break;
|
|
824
|
+
if (!usedUrls.has(candidate)) {
|
|
825
|
+
targets.push(candidate);
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
for (const url of targets) {
|
|
830
|
+
normalized.push({
|
|
831
|
+
type: "fetchUrl",
|
|
832
|
+
url,
|
|
833
|
+
researchGoal:
|
|
834
|
+
action.researchGoal || `Fetch GitHub content for ${owner}/${repo}`,
|
|
835
|
+
});
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
return normalized;
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
/**
|
|
843
|
+
* Parse action plan from Gemini response into validated actions.
|
|
844
|
+
*/
|
|
845
|
+
export function parseActionPlan(rawJson, breadth) {
|
|
846
|
+
const parsed = parseStructuredJson(rawJson?.answer || "") || {};
|
|
847
|
+
const rawActions = Array.isArray(parsed?.actions) ? parsed.actions : [];
|
|
848
|
+
const actions = [];
|
|
849
|
+
|
|
850
|
+
for (const item of rawActions) {
|
|
851
|
+
const action = validateAction(item);
|
|
852
|
+
if (action && actions.length < breadth) {
|
|
853
|
+
actions.push(action);
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
return actions;
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
/**
|
|
861
|
+
* Backward-compatible: convert old query-only plan to action list.
|
|
862
|
+
*/
|
|
863
|
+
export function queriesToActions(queries) {
|
|
864
|
+
return (queries || [])
|
|
865
|
+
.map((q) => ({
|
|
866
|
+
type: "search",
|
|
867
|
+
query: typeof q === "string" ? q : q.query,
|
|
868
|
+
researchGoal: typeof q === "string" ? "" : q.researchGoal || "",
|
|
869
|
+
}))
|
|
870
|
+
.filter((a) => a.query);
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
function sourceKey(source) {
|
|
874
|
+
return (
|
|
875
|
+
normalizeUrl(
|
|
876
|
+
source?.finalUrl || source?.canonicalUrl || source?.url || "",
|
|
877
|
+
) ||
|
|
878
|
+
source?.id ||
|
|
879
|
+
""
|
|
880
|
+
);
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
function buildEvidenceExtractionPrompt(
|
|
884
|
+
originalQuery,
|
|
885
|
+
questions,
|
|
886
|
+
fetchedSources,
|
|
887
|
+
alreadyExtracted = new Set(),
|
|
888
|
+
) {
|
|
889
|
+
const openQuestions = (questions || [])
|
|
890
|
+
.filter((q) => q.status !== "closed")
|
|
891
|
+
.slice(0, 12)
|
|
892
|
+
.map((q) => ({ id: q.id, question: q.question }));
|
|
893
|
+
const sourceSnippets = (fetchedSources || [])
|
|
894
|
+
.filter((source) => source?.content || source?.snippet)
|
|
895
|
+
.filter((source) => !alreadyExtracted.has(sourceKey(source)))
|
|
896
|
+
.slice(0, 6)
|
|
897
|
+
.map((source, index) => ({
|
|
898
|
+
id: source.id || `F${index + 1}`,
|
|
899
|
+
title: source.title || "",
|
|
900
|
+
url: source.finalUrl || source.url || source.canonicalUrl || "",
|
|
901
|
+
content: trimText(source.content || source.snippet || "", 5000),
|
|
902
|
+
}));
|
|
903
|
+
|
|
904
|
+
return [
|
|
905
|
+
"You are doing goal-based evidence extraction for an iterative research run.",
|
|
906
|
+
"For each source, extract only information that helps answer the open questions.",
|
|
907
|
+
"Use original wording/details where useful. Do not invent answers; leave questions open if evidence is insufficient.",
|
|
908
|
+
"If a source answers one or more tracked questions, identify those question IDs explicitly.",
|
|
909
|
+
"Also propose genuinely new sub-questions discovered from the evidence.",
|
|
910
|
+
"",
|
|
911
|
+
`Original research question: ${originalQuery}`,
|
|
912
|
+
`Open question ledger: ${JSON.stringify(openQuestions, null, 2)}`,
|
|
913
|
+
`Fetched sources: ${JSON.stringify(sourceSnippets, null, 2)}`,
|
|
914
|
+
"",
|
|
915
|
+
"Respond ONLY with JSON wrapped in BEGIN_JSON / END_JSON markers:",
|
|
916
|
+
"BEGIN_JSON",
|
|
917
|
+
JSON.stringify(
|
|
918
|
+
{
|
|
919
|
+
extractions: [
|
|
920
|
+
{
|
|
921
|
+
sourceId: "S1",
|
|
922
|
+
url: "https://example.com/source",
|
|
923
|
+
rational: "why this source matters for the goal",
|
|
924
|
+
evidence:
|
|
925
|
+
"specific quoted/paraphrased evidence with numbers, dates, caveats",
|
|
926
|
+
summary: "concise contribution to the research question",
|
|
927
|
+
answers: [
|
|
928
|
+
{
|
|
929
|
+
id: "Q1",
|
|
930
|
+
evidence: "brief evidence that closes the question",
|
|
931
|
+
},
|
|
932
|
+
],
|
|
933
|
+
newQuestions: ["new sub-question raised by this source"],
|
|
934
|
+
},
|
|
935
|
+
],
|
|
936
|
+
},
|
|
937
|
+
null,
|
|
938
|
+
2,
|
|
939
|
+
),
|
|
940
|
+
"END_JSON",
|
|
941
|
+
].join("\n");
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
function normalizeEvidenceExtractions(payload, fetchedSources) {
|
|
945
|
+
const raw = Array.isArray(payload?.extractions) ? payload.extractions : [];
|
|
946
|
+
const byUrl = new Map();
|
|
947
|
+
const byId = new Map();
|
|
948
|
+
for (const source of fetchedSources || []) {
|
|
949
|
+
if (source?.id) byId.set(String(source.id), source);
|
|
950
|
+
const key = sourceKey(source);
|
|
951
|
+
if (key) byUrl.set(key, source);
|
|
952
|
+
}
|
|
953
|
+
return raw
|
|
954
|
+
.map((item) => {
|
|
955
|
+
const source =
|
|
956
|
+
byId.get(String(item?.sourceId || "")) ||
|
|
957
|
+
byUrl.get(normalizeUrl(item?.url || "") || "");
|
|
958
|
+
const sourceId = String(item?.sourceId || source?.id || "");
|
|
959
|
+
const url = normalizeUrl(
|
|
960
|
+
item?.url || source?.finalUrl || source?.url || "",
|
|
961
|
+
);
|
|
962
|
+
const answers = Array.isArray(item?.answers)
|
|
963
|
+
? item.answers
|
|
964
|
+
.map((answer) => ({
|
|
965
|
+
id: String(answer?.id || ""),
|
|
966
|
+
evidence: trimText(answer?.evidence || "", 500),
|
|
967
|
+
sourceIds: [sourceId].filter(Boolean),
|
|
968
|
+
}))
|
|
969
|
+
.filter((answer) => answer.id)
|
|
970
|
+
: [];
|
|
971
|
+
return {
|
|
972
|
+
sourceId,
|
|
973
|
+
url,
|
|
974
|
+
title: source?.title || item?.title || "",
|
|
975
|
+
rational: trimText(item?.rational || "", 700),
|
|
976
|
+
evidence: trimText(item?.evidence || "", 1600),
|
|
977
|
+
summary: trimText(item?.summary || "", 700),
|
|
978
|
+
answers,
|
|
979
|
+
newQuestions: uniqueStrings(item?.newQuestions || [], 6),
|
|
980
|
+
};
|
|
981
|
+
})
|
|
982
|
+
.filter(
|
|
983
|
+
(item) => item.sourceId || item.url || item.summary || item.evidence,
|
|
984
|
+
);
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
async function extractEvidenceFromSources({
|
|
988
|
+
query,
|
|
989
|
+
questions,
|
|
990
|
+
fetchedSources,
|
|
991
|
+
extractedSourceKeys,
|
|
992
|
+
}) {
|
|
993
|
+
const pending = (fetchedSources || []).filter(
|
|
994
|
+
(source) =>
|
|
995
|
+
(source?.content || source?.snippet) &&
|
|
996
|
+
!extractedSourceKeys.has(sourceKey(source)),
|
|
997
|
+
);
|
|
998
|
+
if (pending.length === 0) return { evidence: [], error: "" };
|
|
999
|
+
try {
|
|
1000
|
+
const raw = await runGeminiPrompt(
|
|
1001
|
+
buildEvidenceExtractionPrompt(
|
|
1002
|
+
query,
|
|
1003
|
+
questions,
|
|
1004
|
+
pending,
|
|
1005
|
+
extractedSourceKeys,
|
|
1006
|
+
),
|
|
1007
|
+
{ timeoutMs: 120000 },
|
|
1008
|
+
);
|
|
1009
|
+
const parsed = parseGeminiJson(raw, { extractions: [] });
|
|
1010
|
+
const evidence = normalizeEvidenceExtractions(parsed, pending);
|
|
1011
|
+
for (const source of pending) {
|
|
1012
|
+
const key = sourceKey(source);
|
|
1013
|
+
if (key) extractedSourceKeys.add(key);
|
|
1014
|
+
}
|
|
1015
|
+
return { evidence, error: "" };
|
|
1016
|
+
} catch (error) {
|
|
1017
|
+
return { evidence: [], error: error.message || String(error) };
|
|
1018
|
+
}
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
function buildLearningPrompt(
|
|
1022
|
+
originalQuery,
|
|
1023
|
+
roundQueries,
|
|
1024
|
+
searchSummaries,
|
|
1025
|
+
fetchedSources,
|
|
1026
|
+
questions = [],
|
|
1027
|
+
evidenceItems = [],
|
|
1028
|
+
) {
|
|
1029
|
+
const sourceSnippets = fetchedSources
|
|
1030
|
+
.filter((source) => source?.content || source?.snippet)
|
|
1031
|
+
.slice(0, 10)
|
|
1032
|
+
.map((source, index) => ({
|
|
1033
|
+
id: `F${index + 1}`,
|
|
1034
|
+
title: source.title || "",
|
|
1035
|
+
url: source.finalUrl || source.url || "",
|
|
1036
|
+
snippet: trimText(source.content || source.snippet || "", 3000),
|
|
1037
|
+
}));
|
|
1038
|
+
|
|
1039
|
+
return [
|
|
1040
|
+
"You are extracting compact research state from live multi-engine search results.",
|
|
1041
|
+
"Create dense, non-overlapping learnings with exact names, numbers, dates, limitations, and caveats where available.",
|
|
1042
|
+
"Also propose follow-up search queries that would most improve confidence or fill gaps.",
|
|
1043
|
+
"",
|
|
1044
|
+
`Original research question: ${originalQuery}`,
|
|
1045
|
+
`Round queries: ${JSON.stringify(roundQueries, null, 2)}`,
|
|
1046
|
+
`Question ledger: ${JSON.stringify(questions, null, 2)}`,
|
|
1047
|
+
`Extracted source evidence: ${JSON.stringify(evidenceItems.slice(-12), null, 2)}`,
|
|
1048
|
+
`Engine summaries: ${JSON.stringify(searchSummaries, null, 2)}`,
|
|
1049
|
+
`Fetched source snippets: ${JSON.stringify(sourceSnippets, null, 2)}`,
|
|
1050
|
+
"",
|
|
1051
|
+
"Respond ONLY with JSON wrapped in BEGIN_JSON / END_JSON markers:",
|
|
1052
|
+
"BEGIN_JSON",
|
|
1053
|
+
JSON.stringify(
|
|
1054
|
+
{
|
|
1055
|
+
learnings: ["concise, information-dense learning"],
|
|
1056
|
+
answeredQuestions: [
|
|
1057
|
+
{
|
|
1058
|
+
id: "Q1",
|
|
1059
|
+
evidence: "brief evidence that closes this question",
|
|
1060
|
+
sourceIds: ["S1"],
|
|
1061
|
+
},
|
|
1062
|
+
],
|
|
1063
|
+
newQuestions: ["new sub-question discovered from the evidence"],
|
|
1064
|
+
followUpQueries: ["specific next search query"],
|
|
1065
|
+
gaps: ["important uncertainty or missing evidence"],
|
|
1066
|
+
},
|
|
1067
|
+
null,
|
|
1068
|
+
2,
|
|
1069
|
+
),
|
|
1070
|
+
"END_JSON",
|
|
1071
|
+
].join("\n");
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
function buildFinalReportPrompt(
|
|
1075
|
+
originalQuery,
|
|
1076
|
+
rounds,
|
|
1077
|
+
sources,
|
|
1078
|
+
questions = [],
|
|
1079
|
+
evidenceItems = [],
|
|
1080
|
+
) {
|
|
1081
|
+
const learnings = rounds.flatMap((round) => round.learnings || []);
|
|
1082
|
+
const gaps = rounds.flatMap((round) => round.gaps || []);
|
|
1083
|
+
const sourceRegistry = sources.slice(0, 12).map((source) => ({
|
|
1084
|
+
id: source.id,
|
|
1085
|
+
title: source.title,
|
|
1086
|
+
domain: source.domain,
|
|
1087
|
+
url: source.canonicalUrl,
|
|
1088
|
+
type: source.sourceType,
|
|
1089
|
+
engines: source.engines,
|
|
1090
|
+
fetch: source.fetch?.attempted
|
|
1091
|
+
? {
|
|
1092
|
+
ok: source.fetch.ok,
|
|
1093
|
+
snippet: trimText(source.fetch.snippet || "", 1200),
|
|
1094
|
+
publishedTime: source.fetch.publishedTime || "",
|
|
1095
|
+
}
|
|
1096
|
+
: undefined,
|
|
1097
|
+
}));
|
|
1098
|
+
|
|
1099
|
+
return [
|
|
1100
|
+
"You are writing the final research report for an iterative deep-research run.",
|
|
1101
|
+
"Produce a thorough markdown report organized into clear sections.",
|
|
1102
|
+
"",
|
|
1103
|
+
"Use the learnings and source registry below. Every substantive claim MUST be backed by an [S1] citation.",
|
|
1104
|
+
'Where engines disagree, surface the conflicting claims explicitly in the "differences" array.',
|
|
1105
|
+
'Include a "Key Claims" structure that maps each distinct claim to its supporting source IDs.',
|
|
1106
|
+
"",
|
|
1107
|
+
"Report structure:",
|
|
1108
|
+
"1. ## Summary — A 2-4 sentence executive summary of findings",
|
|
1109
|
+
"2. ## Key Findings — The main findings, organized by theme or question, each with inline citations",
|
|
1110
|
+
"3. ## Areas of Disagreement — Where engines or sources conflict (if any)",
|
|
1111
|
+
"4. ## Limitations & Caveats — Important qualifiers, gaps, or uncertainties",
|
|
1112
|
+
"",
|
|
1113
|
+
`Original research question: ${originalQuery}`,
|
|
1114
|
+
`Learnings: ${JSON.stringify(learnings, null, 2)}`,
|
|
1115
|
+
`Known gaps/caveats: ${JSON.stringify(gaps, null, 2)}`,
|
|
1116
|
+
`Question ledger: ${JSON.stringify(questions, null, 2)}`,
|
|
1117
|
+
`Goal-based extracted evidence: ${JSON.stringify(evidenceItems.slice(-20), null, 2)}`,
|
|
1118
|
+
`Source registry: ${JSON.stringify(sourceRegistry, null, 2)}`,
|
|
1119
|
+
"",
|
|
1120
|
+
"Respond ONLY with JSON wrapped in BEGIN_JSON / END_JSON markers:",
|
|
1121
|
+
"BEGIN_JSON",
|
|
1122
|
+
JSON.stringify(
|
|
1123
|
+
{
|
|
1124
|
+
answer: "markdown report with sections and inline [S1] citations",
|
|
1125
|
+
agreement: {
|
|
1126
|
+
level: "high|medium|low|mixed|conflicting",
|
|
1127
|
+
summary: "one-sentence confidence summary",
|
|
1128
|
+
},
|
|
1129
|
+
differences: ["notable disagreement or conflict between sources"],
|
|
1130
|
+
caveats: ["important caveat or qualification"],
|
|
1131
|
+
claims: [
|
|
1132
|
+
{
|
|
1133
|
+
claim: "specific factual statement from the research",
|
|
1134
|
+
support: "strong|moderate|weak|conflicting",
|
|
1135
|
+
sourceIds: ["S1", "S2"],
|
|
1136
|
+
},
|
|
1137
|
+
],
|
|
1138
|
+
recommendedSources: ["S1", "S2"],
|
|
1139
|
+
},
|
|
1140
|
+
null,
|
|
1141
|
+
2,
|
|
1142
|
+
),
|
|
1143
|
+
"END_JSON",
|
|
1144
|
+
].join("\n");
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
/**
|
|
1148
|
+
* Build a synthesis prompt that derives the final report directly from
|
|
1149
|
+
* previously extracted evidence (no per-round learnings required). This is
|
|
1150
|
+
* used as a fallback when the regular final-report path returns no
|
|
1151
|
+
* structured learnings (for example when Gemini's input field rejected the
|
|
1152
|
+
* per-round learning prompt but the goal-based extraction step succeeded).
|
|
1153
|
+
*/
|
|
1154
|
+
function buildSynthesisFromEvidencePrompt(
|
|
1155
|
+
originalQuery,
|
|
1156
|
+
sources = [],
|
|
1157
|
+
questions = [],
|
|
1158
|
+
evidenceItems = [],
|
|
1159
|
+
) {
|
|
1160
|
+
const sourceRegistry = sources.slice(0, 12).map((source) => ({
|
|
1161
|
+
id: source.id,
|
|
1162
|
+
title: source.title,
|
|
1163
|
+
domain: source.domain,
|
|
1164
|
+
url: source.canonicalUrl,
|
|
1165
|
+
type: source.sourceType,
|
|
1166
|
+
engines: source.engines,
|
|
1167
|
+
}));
|
|
1168
|
+
const evidenceSlice = evidenceItems.slice(-20);
|
|
1169
|
+
const answerableQuestionIds = new Set();
|
|
1170
|
+
for (const item of evidenceSlice) {
|
|
1171
|
+
for (const ans of item.answers || []) {
|
|
1172
|
+
if (ans?.id) answerableQuestionIds.add(ans.id);
|
|
1173
|
+
}
|
|
1174
|
+
}
|
|
1175
|
+
const openQuestionSummary = (questions || [])
|
|
1176
|
+
.filter((q) => q.status !== "closed")
|
|
1177
|
+
.map((q) => ({ id: q.id, question: q.question }));
|
|
1178
|
+
|
|
1179
|
+
return [
|
|
1180
|
+
"You are writing the final research report from goal-based extracted evidence.",
|
|
1181
|
+
"Per-round learnings were not produced, but the per-source evidence extraction step succeeded.",
|
|
1182
|
+
"Synthesize a thorough markdown report using ONLY the evidence below. Every substantive claim MUST be backed by an [S1] citation.",
|
|
1183
|
+
"",
|
|
1184
|
+
"Report structure:",
|
|
1185
|
+
"1. ## Summary — A 2-4 sentence executive summary of findings",
|
|
1186
|
+
"2. ## Key Findings — The main findings, organized by theme or question, each with inline citations",
|
|
1187
|
+
"3. ## Limitations & Caveats — Important qualifiers, gaps, or uncertainties",
|
|
1188
|
+
"",
|
|
1189
|
+
`Original research question: ${originalQuery}`,
|
|
1190
|
+
`Per-source extracted evidence: ${JSON.stringify(evidenceSlice, null, 2)}`,
|
|
1191
|
+
`Source registry: ${JSON.stringify(sourceRegistry, null, 2)}`,
|
|
1192
|
+
`Questions already answered by the evidence: ${JSON.stringify(Array.from(answerableQuestionIds))}`,
|
|
1193
|
+
`Questions still open after this evidence: ${JSON.stringify(openQuestionSummary)}`,
|
|
1194
|
+
"",
|
|
1195
|
+
"Respond ONLY with JSON wrapped in BEGIN_JSON / END_JSON markers:",
|
|
1196
|
+
"BEGIN_JSON",
|
|
1197
|
+
JSON.stringify(
|
|
1198
|
+
{
|
|
1199
|
+
answer: "markdown report with sections and inline [S1] citations",
|
|
1200
|
+
agreement: {
|
|
1201
|
+
level: "high|medium|low|mixed|conflicting",
|
|
1202
|
+
summary: "one-sentence confidence summary",
|
|
1203
|
+
},
|
|
1204
|
+
differences: ["notable disagreement or conflict between sources"],
|
|
1205
|
+
caveats: ["important caveat or qualification"],
|
|
1206
|
+
claims: [
|
|
1207
|
+
{
|
|
1208
|
+
claim: "specific factual statement supported by the evidence",
|
|
1209
|
+
support: "strong|moderate|weak|conflicting",
|
|
1210
|
+
sourceIds: ["S1", "S2"],
|
|
1211
|
+
},
|
|
1212
|
+
],
|
|
1213
|
+
recommendedSources: ["S1", "S2"],
|
|
1214
|
+
},
|
|
1215
|
+
null,
|
|
1216
|
+
2,
|
|
1217
|
+
),
|
|
1218
|
+
"END_JSON",
|
|
1219
|
+
].join("\n");
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
async function runFastAllSearch(query, { locale = null, short = true } = {}) {
|
|
1223
|
+
const args = [SEARCH_BIN, "all", "--inline", "--stdin", "--fast"];
|
|
1224
|
+
if (!short) args.push("--full");
|
|
1225
|
+
if (locale) args.push("--locale", locale);
|
|
1226
|
+
|
|
1227
|
+
return new Promise((resolve, reject) => {
|
|
1228
|
+
const proc = spawn(process.execPath, args, {
|
|
1229
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
1230
|
+
env: { ...process.env, GREEDY_SEARCH_RESEARCH_CHILD: "1" },
|
|
1231
|
+
});
|
|
1232
|
+
proc.stdin.write(query);
|
|
1233
|
+
proc.stdin.end();
|
|
1234
|
+
|
|
1235
|
+
let out = "";
|
|
1236
|
+
let err = "";
|
|
1237
|
+
let stderrBuffer = "";
|
|
1238
|
+
proc.stdout.on("data", (d) => (out += d));
|
|
1239
|
+
proc.stderr.on("data", (d) => {
|
|
1240
|
+
err += d;
|
|
1241
|
+
stderrBuffer += d.toString();
|
|
1242
|
+
const lines = stderrBuffer.split("\n");
|
|
1243
|
+
stderrBuffer = lines.pop() || "";
|
|
1244
|
+
for (const line of lines) {
|
|
1245
|
+
if (shouldForwardChildStderr(line)) {
|
|
1246
|
+
process.stderr.write(`${line}\n`);
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
});
|
|
1250
|
+
const t = setTimeout(() => {
|
|
1251
|
+
proc.kill();
|
|
1252
|
+
reject(new Error(`research child search timed out for: ${query}`));
|
|
1253
|
+
}, 140000);
|
|
1254
|
+
proc.on("close", (code) => {
|
|
1255
|
+
clearTimeout(t);
|
|
1256
|
+
if (code !== 0) {
|
|
1257
|
+
reject(
|
|
1258
|
+
new Error(err.trim() || `search child exited with code ${code}`),
|
|
1259
|
+
);
|
|
1260
|
+
return;
|
|
1261
|
+
}
|
|
1262
|
+
try {
|
|
1263
|
+
resolve(JSON.parse(out.trim()));
|
|
1264
|
+
} catch {
|
|
1265
|
+
reject(
|
|
1266
|
+
new Error(`Invalid JSON from research child: ${out.slice(0, 200)}`),
|
|
1267
|
+
);
|
|
1268
|
+
}
|
|
1269
|
+
});
|
|
1270
|
+
});
|
|
1271
|
+
}
|
|
1272
|
+
|
|
1273
|
+
function dedupeSources(sourceLists) {
|
|
1274
|
+
const seen = new Map();
|
|
1275
|
+
for (const source of sourceLists.flat()) {
|
|
1276
|
+
const canonicalUrl = normalizeUrl(source.canonicalUrl || source.url);
|
|
1277
|
+
if (!canonicalUrl) continue;
|
|
1278
|
+
const existing = seen.get(canonicalUrl);
|
|
1279
|
+
if (!existing) {
|
|
1280
|
+
seen.set(canonicalUrl, { ...source, canonicalUrl });
|
|
1281
|
+
continue;
|
|
1282
|
+
}
|
|
1283
|
+
existing.engines = [
|
|
1284
|
+
...new Set([...(existing.engines || []), ...(source.engines || [])]),
|
|
1285
|
+
];
|
|
1286
|
+
existing.engineCount = existing.engines.length;
|
|
1287
|
+
existing.smartScore = Math.max(
|
|
1288
|
+
existing.smartScore || 0,
|
|
1289
|
+
source.smartScore || 0,
|
|
1290
|
+
);
|
|
1291
|
+
}
|
|
1292
|
+
|
|
1293
|
+
return Array.from(seen.values())
|
|
1294
|
+
.sort((a, b) => {
|
|
1295
|
+
const diff = computeCompositeScore(b) - computeCompositeScore(a);
|
|
1296
|
+
if (diff !== 0) return diff;
|
|
1297
|
+
return (a.domain || "").localeCompare(b.domain || "");
|
|
1298
|
+
})
|
|
1299
|
+
.slice(0, 12)
|
|
1300
|
+
.map((source, index) => ({ ...source, id: `S${index + 1}` }));
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
function shouldForwardChildStderr(line) {
|
|
1304
|
+
return (
|
|
1305
|
+
/^PROGRESS:/.test(line) ||
|
|
1306
|
+
/^\[greedysearch\]/.test(line) ||
|
|
1307
|
+
/^\[(bing|perplexity|google|gemini|chatgpt|logically|semantic-scholar)\]/.test(
|
|
1308
|
+
line,
|
|
1309
|
+
) ||
|
|
1310
|
+
/^GreedySearch Chrome/.test(line) ||
|
|
1311
|
+
/^Launching GreedySearch Chrome/.test(line) ||
|
|
1312
|
+
/^Headless mode/.test(line) ||
|
|
1313
|
+
/^Ready\.?$/.test(line)
|
|
1314
|
+
);
|
|
1315
|
+
}
|
|
1316
|
+
|
|
1317
|
+
function parseGeminiJson(raw, fallback = {}) {
|
|
1318
|
+
return parseStructuredJson(raw?.answer || "") || fallback;
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1321
|
+
/**
|
|
1322
|
+
* Audit citations in the final answer against known sources.
|
|
1323
|
+
* Extracts source IDs (e.g. "S1", "S2") from the answer text and verifies
|
|
1324
|
+
* each maps to a valid source with fetch data.
|
|
1325
|
+
*/
|
|
1326
|
+
export function auditCitations(answer, sources) {
|
|
1327
|
+
if (!answer || !Array.isArray(sources)) {
|
|
1328
|
+
return {
|
|
1329
|
+
cited: [],
|
|
1330
|
+
missing: [],
|
|
1331
|
+
unfetched: [],
|
|
1332
|
+
ok: true,
|
|
1333
|
+
};
|
|
1334
|
+
}
|
|
1335
|
+
|
|
1336
|
+
// Extract source IDs: matches patterns like [S1], [S2], [S3, S4], (S1), S1,
|
|
1337
|
+
// and also F1, F2 (fetched source IDs)
|
|
1338
|
+
const idPattern = /\b[SF](\d+)\b/g;
|
|
1339
|
+
const citedIds = new Set();
|
|
1340
|
+
let match;
|
|
1341
|
+
while ((match = idPattern.exec(answer)) !== null) {
|
|
1342
|
+
citedIds.add(`S${match[1]}`);
|
|
1343
|
+
citedIds.add(`F${match[1]}`);
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
// Also check for "recommendedSources" or "sources" array in synthesis
|
|
1347
|
+
// Build lookup map
|
|
1348
|
+
const sourceMap = new Map();
|
|
1349
|
+
for (const source of sources) {
|
|
1350
|
+
const id = source?.id;
|
|
1351
|
+
if (id) {
|
|
1352
|
+
sourceMap.set(id, source);
|
|
1353
|
+
}
|
|
1354
|
+
}
|
|
1355
|
+
|
|
1356
|
+
// Check each cited ID
|
|
1357
|
+
const cited = Array.from(citedIds);
|
|
1358
|
+
const missing = [];
|
|
1359
|
+
const unfetched = [];
|
|
1360
|
+
|
|
1361
|
+
for (const id of cited) {
|
|
1362
|
+
const source = sourceMap.get(id);
|
|
1363
|
+
if (!source) {
|
|
1364
|
+
// Try matching by index: S1 -> sources[0]
|
|
1365
|
+
const indexMatch = id.match(/^(S|F)(\d+)$/);
|
|
1366
|
+
if (indexMatch) {
|
|
1367
|
+
const idx = parseInt(indexMatch[2], 10) - 1;
|
|
1368
|
+
if (idx >= 0 && idx < sources.length) {
|
|
1369
|
+
const matched = sources[idx];
|
|
1370
|
+
if (matched) {
|
|
1371
|
+
// Check if source was fetched successfully
|
|
1372
|
+
const fetchOk =
|
|
1373
|
+
matched.fetch?.ok ||
|
|
1374
|
+
(matched.content && matched.content.length > 100) ||
|
|
1375
|
+
(matched.contentChars && matched.contentChars > 100);
|
|
1376
|
+
if (!fetchOk) {
|
|
1377
|
+
unfetched.push(id);
|
|
1378
|
+
}
|
|
1379
|
+
continue;
|
|
1380
|
+
}
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
missing.push(id);
|
|
1384
|
+
} else {
|
|
1385
|
+
// Source exists but check if it was fetched
|
|
1386
|
+
const fetchOk =
|
|
1387
|
+
source.fetch?.ok ||
|
|
1388
|
+
(source.content && source.content.length > 100) ||
|
|
1389
|
+
(source.contentChars && source.contentChars > 100);
|
|
1390
|
+
if (!fetchOk) {
|
|
1391
|
+
unfetched.push(id);
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
|
|
1396
|
+
return {
|
|
1397
|
+
cited,
|
|
1398
|
+
missing,
|
|
1399
|
+
unfetched,
|
|
1400
|
+
ok: missing.length === 0,
|
|
1401
|
+
};
|
|
1402
|
+
}
|
|
1403
|
+
|
|
1404
|
+
export function computeResearchFloor({
|
|
1405
|
+
sources = [],
|
|
1406
|
+
fetchedSources = [],
|
|
1407
|
+
synthesis = {},
|
|
1408
|
+
citationAudit = null,
|
|
1409
|
+
gaps = [],
|
|
1410
|
+
questions = [],
|
|
1411
|
+
rounds = [],
|
|
1412
|
+
qualityScore = 0,
|
|
1413
|
+
qualityThreshold = 8.5,
|
|
1414
|
+
maxSources = 8,
|
|
1415
|
+
requireCitations = true,
|
|
1416
|
+
requireQuestions = true,
|
|
1417
|
+
} = {}) {
|
|
1418
|
+
const fetchedOk = fetchedSources.filter(
|
|
1419
|
+
(source) =>
|
|
1420
|
+
source?.fetch?.ok ||
|
|
1421
|
+
(source?.contentChars || 0) > 100 ||
|
|
1422
|
+
String(source?.content || "").length > 100,
|
|
1423
|
+
);
|
|
1424
|
+
const primarySources = sources.filter((source) =>
|
|
1425
|
+
["official-docs", "repo", "maintainer-blog", "academic"].includes(
|
|
1426
|
+
String(source?.sourceType || ""),
|
|
1427
|
+
),
|
|
1428
|
+
);
|
|
1429
|
+
const claims = Array.isArray(synthesis?.claims) ? synthesis.claims : [];
|
|
1430
|
+
const citedCount = citationAudit ? citationAudit.cited?.length || 0 : 0;
|
|
1431
|
+
const questionStats = questionProgress(questions);
|
|
1432
|
+
// Follow-up questions discovered during a run are useful handoff gaps, not a
|
|
1433
|
+
// reason to fail a short research run forever. The deterministic floor only
|
|
1434
|
+
// requires the original/root questions to close; newly-created questions stay
|
|
1435
|
+
// visible in STATUS.md and `gaps` for deeper follow-up rounds.
|
|
1436
|
+
const requiredQuestions = (questions || []).filter(
|
|
1437
|
+
(q) => !q.createdRound || q.reason === "Original research question",
|
|
1438
|
+
);
|
|
1439
|
+
const requiredQuestionStats = questionProgress(requiredQuestions);
|
|
1440
|
+
const minFetched = Math.min(4, Math.max(2, Number(maxSources) || 8));
|
|
1441
|
+
const checks = {
|
|
1442
|
+
roundsRun: rounds.length >= 1,
|
|
1443
|
+
fetchedSources: fetchedOk.length >= minFetched,
|
|
1444
|
+
primarySources: primarySources.length >= 1,
|
|
1445
|
+
qualityScore: qualityScore >= Math.min(qualityThreshold, 8),
|
|
1446
|
+
claimsExtracted: !requireCitations || claims.length > 0,
|
|
1447
|
+
citationsPresent: !requireCitations || citedCount > 0,
|
|
1448
|
+
citationsValid: !requireCitations || citationAudit?.ok === true,
|
|
1449
|
+
unfetchedCitations:
|
|
1450
|
+
!requireCitations || (citationAudit?.unfetched || []).length === 0,
|
|
1451
|
+
requiredQuestionsClosed:
|
|
1452
|
+
!requireQuestions || requiredQuestionStats.open === 0,
|
|
1453
|
+
};
|
|
1454
|
+
return {
|
|
1455
|
+
floorMet: Object.values(checks).every(Boolean),
|
|
1456
|
+
checks,
|
|
1457
|
+
metrics: {
|
|
1458
|
+
fetchedOk: fetchedOk.length,
|
|
1459
|
+
primarySources: primarySources.length,
|
|
1460
|
+
claims: claims.length,
|
|
1461
|
+
cited: citedCount,
|
|
1462
|
+
gaps: gaps.length,
|
|
1463
|
+
openQuestions: questionStats.open,
|
|
1464
|
+
closedQuestions: questionStats.closed,
|
|
1465
|
+
totalQuestions: questionStats.total,
|
|
1466
|
+
openRequiredQuestions: requiredQuestionStats.open,
|
|
1467
|
+
closedRequiredQuestions: requiredQuestionStats.closed,
|
|
1468
|
+
totalRequiredQuestions: requiredQuestionStats.total,
|
|
1469
|
+
qualityScore,
|
|
1470
|
+
minFetched,
|
|
1471
|
+
},
|
|
1472
|
+
};
|
|
1473
|
+
}
|
|
1474
|
+
|
|
1475
|
+
function annotateFetchedSourcesWithIds(fetchedSources, sources) {
|
|
1476
|
+
const byUrl = new Map();
|
|
1477
|
+
for (const source of sources || []) {
|
|
1478
|
+
const key = normalizeUrl(
|
|
1479
|
+
source?.canonicalUrl || source?.finalUrl || source?.url,
|
|
1480
|
+
);
|
|
1481
|
+
if (key && source?.id) byUrl.set(key, source.id);
|
|
1482
|
+
}
|
|
1483
|
+
return (fetchedSources || []).map((source, index) => {
|
|
1484
|
+
const key = normalizeUrl(
|
|
1485
|
+
source?.finalUrl || source?.canonicalUrl || source?.url,
|
|
1486
|
+
);
|
|
1487
|
+
return {
|
|
1488
|
+
...source,
|
|
1489
|
+
id: source?.id || byUrl.get(key) || `F${index + 1}`,
|
|
1490
|
+
};
|
|
1491
|
+
});
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
export function createQuestionLedger(query) {
|
|
1495
|
+
return [
|
|
1496
|
+
{
|
|
1497
|
+
id: "Q1",
|
|
1498
|
+
question: trimText(sanitizeResearchQuery(query), 320),
|
|
1499
|
+
status: "open",
|
|
1500
|
+
reason: "Original research question",
|
|
1501
|
+
evidence: [],
|
|
1502
|
+
sourceIds: [],
|
|
1503
|
+
},
|
|
1504
|
+
];
|
|
1505
|
+
}
|
|
1506
|
+
|
|
1507
|
+
function nextQuestionId(questions) {
|
|
1508
|
+
let max = 0;
|
|
1509
|
+
for (const q of questions || []) {
|
|
1510
|
+
const n = Number.parseInt(String(q.id || "").replace(/^Q/i, ""), 10);
|
|
1511
|
+
if (Number.isFinite(n)) max = Math.max(max, n);
|
|
1512
|
+
}
|
|
1513
|
+
return `Q${max + 1}`;
|
|
1514
|
+
}
|
|
1515
|
+
|
|
1516
|
+
function findSimilarQuestion(questions, question) {
|
|
1517
|
+
const normalized = sanitizeResearchQuery(question).toLowerCase();
|
|
1518
|
+
return (questions || []).find(
|
|
1519
|
+
(q) =>
|
|
1520
|
+
q.question?.toLowerCase() === normalized ||
|
|
1521
|
+
jaccardSimilarity(q.question || "", normalized) >= 0.82,
|
|
1522
|
+
);
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1525
|
+
function addQuestion(questions, question, { reason = "", round = null } = {}) {
|
|
1526
|
+
const clean = trimText(sanitizeResearchQuery(question), 320);
|
|
1527
|
+
if (!clean) return null;
|
|
1528
|
+
const existing = findSimilarQuestion(questions, clean);
|
|
1529
|
+
if (existing) return existing;
|
|
1530
|
+
const item = {
|
|
1531
|
+
id: nextQuestionId(questions),
|
|
1532
|
+
question: clean,
|
|
1533
|
+
status: "open",
|
|
1534
|
+
reason: trimText(reason, 240),
|
|
1535
|
+
createdRound: round,
|
|
1536
|
+
evidence: [],
|
|
1537
|
+
sourceIds: [],
|
|
1538
|
+
};
|
|
1539
|
+
questions.push(item);
|
|
1540
|
+
return item;
|
|
1541
|
+
}
|
|
1542
|
+
|
|
1543
|
+
function closeQuestion(
|
|
1544
|
+
questions,
|
|
1545
|
+
idOrQuestion,
|
|
1546
|
+
{ evidence = "", sourceIds = [], round = null } = {},
|
|
1547
|
+
) {
|
|
1548
|
+
const target =
|
|
1549
|
+
questions.find((q) => q.id === idOrQuestion) ||
|
|
1550
|
+
findSimilarQuestion(questions, idOrQuestion);
|
|
1551
|
+
if (!target) return null;
|
|
1552
|
+
target.status = "closed";
|
|
1553
|
+
target.closedRound = target.closedRound || round;
|
|
1554
|
+
if (evidence)
|
|
1555
|
+
target.evidence = uniqueStrings([...(target.evidence || []), evidence], 4);
|
|
1556
|
+
if (Array.isArray(sourceIds)) {
|
|
1557
|
+
target.sourceIds = uniqueStrings(
|
|
1558
|
+
[...(target.sourceIds || []), ...sourceIds],
|
|
1559
|
+
8,
|
|
1560
|
+
);
|
|
1561
|
+
}
|
|
1562
|
+
return target;
|
|
1563
|
+
}
|
|
1564
|
+
|
|
1565
|
+
function questionProgress(questions) {
|
|
1566
|
+
const total = questions.length;
|
|
1567
|
+
const closed = questions.filter((q) => q.status === "closed").length;
|
|
1568
|
+
return { total, closed, open: Math.max(0, total - closed) };
|
|
1569
|
+
}
|
|
1570
|
+
|
|
1571
|
+
export function updateQuestionLedger(
|
|
1572
|
+
questions,
|
|
1573
|
+
{ roundNumber, actions = [], learningPayload = {} } = {},
|
|
1574
|
+
) {
|
|
1575
|
+
for (const run of actions) {
|
|
1576
|
+
const action = run?.action || run;
|
|
1577
|
+
const goal =
|
|
1578
|
+
action?.researchGoal && action.researchGoal !== "Original user query"
|
|
1579
|
+
? action.researchGoal
|
|
1580
|
+
: action?.query || action?.url || "";
|
|
1581
|
+
if (goal) {
|
|
1582
|
+
addQuestion(questions, goal, {
|
|
1583
|
+
reason: "Planned research action",
|
|
1584
|
+
round: roundNumber,
|
|
1585
|
+
});
|
|
1586
|
+
}
|
|
1587
|
+
}
|
|
1588
|
+
|
|
1589
|
+
// Cap the open-question ledger growth. Discovered gap/follow-up questions
|
|
1590
|
+
// are useful handoffs but Gemini tends to emit one per evidence slot, which
|
|
1591
|
+
// blows up the ledger and inflates the `requiredQuestionsClosed` floor
|
|
1592
|
+
// check. Keep at most MAX_OPEN_FOLLOWUPS of them across the whole run;
|
|
1593
|
+
// older ones are auto-resolved as "covered by later evidence" so they
|
|
1594
|
+
// don't block the floor forever.
|
|
1595
|
+
const MAX_OPEN_FOLLOWUPS = 5;
|
|
1596
|
+
const followupOpen = questions.filter(
|
|
1597
|
+
(q) => q.status === "open" && q.reason === "Discovered gap/follow-up",
|
|
1598
|
+
);
|
|
1599
|
+
if (followupOpen.length > MAX_OPEN_FOLLOWUPS) {
|
|
1600
|
+
const overflow = followupOpen
|
|
1601
|
+
.sort((a, b) => (a.createdRound || 0) - (b.createdRound || 0))
|
|
1602
|
+
.slice(0, followupOpen.length - MAX_OPEN_FOLLOWUPS);
|
|
1603
|
+
for (const q of overflow) {
|
|
1604
|
+
q.status = "resolved";
|
|
1605
|
+
q.closedRound = roundNumber;
|
|
1606
|
+
q.evidence = uniqueStrings(
|
|
1607
|
+
[...(q.evidence || []), "Auto-resolved to cap open-question ledger"],
|
|
1608
|
+
4,
|
|
1609
|
+
);
|
|
1610
|
+
}
|
|
1611
|
+
}
|
|
1612
|
+
|
|
1613
|
+
const answered = Array.isArray(learningPayload.answeredQuestions)
|
|
1614
|
+
? learningPayload.answeredQuestions
|
|
1615
|
+
: [];
|
|
1616
|
+
for (const item of answered) {
|
|
1617
|
+
if (typeof item === "string") {
|
|
1618
|
+
closeQuestion(questions, item, { round: roundNumber });
|
|
1619
|
+
continue;
|
|
1620
|
+
}
|
|
1621
|
+
const id = item?.id || item?.question;
|
|
1622
|
+
if (!id && item?.question) {
|
|
1623
|
+
const added = addQuestion(questions, item.question, {
|
|
1624
|
+
reason: "Answered during learning extraction",
|
|
1625
|
+
round: roundNumber,
|
|
1626
|
+
});
|
|
1627
|
+
if (added) closeQuestion(questions, added.id, { round: roundNumber });
|
|
1628
|
+
continue;
|
|
1629
|
+
}
|
|
1630
|
+
closeQuestion(questions, id, {
|
|
1631
|
+
evidence: item?.evidence || item?.answer || "",
|
|
1632
|
+
sourceIds: Array.isArray(item?.sourceIds) ? item.sourceIds : [],
|
|
1633
|
+
round: roundNumber,
|
|
1634
|
+
});
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
// Keep STATUS.md as a true question ledger, not a dump of every search query
|
|
1638
|
+
// or caveat. Follow-up queries and raw gaps stay in their own fields; only
|
|
1639
|
+
// explicit newQuestions become open ledger items.
|
|
1640
|
+
const newQuestions = Array.isArray(learningPayload.newQuestions)
|
|
1641
|
+
? learningPayload.newQuestions
|
|
1642
|
+
: [];
|
|
1643
|
+
for (const question of newQuestions) {
|
|
1644
|
+
addQuestion(questions, question, {
|
|
1645
|
+
reason: "Discovered gap/follow-up",
|
|
1646
|
+
round: roundNumber,
|
|
1647
|
+
});
|
|
1648
|
+
}
|
|
1649
|
+
|
|
1650
|
+
return questions;
|
|
1651
|
+
}
|
|
1652
|
+
|
|
1653
|
+
/**
|
|
1654
|
+
* Pick direct-fetch targets from known academic source domains (arXiv,
|
|
1655
|
+
* semanticscholar.org, DOI redirect). Returns the canonical URL plus a
|
|
1656
|
+
* short label for the researchGoal. Filters out anything already fetched.
|
|
1657
|
+
*/
|
|
1658
|
+
function pickAcademicFetchTargets(combinedSources, usedUrls) {
|
|
1659
|
+
if (!Array.isArray(combinedSources) || combinedSources.length === 0)
|
|
1660
|
+
return [];
|
|
1661
|
+
const ACADEMIC_HOSTS = ["arxiv.org", "semanticscholar.org", "doi.org"];
|
|
1662
|
+
const seen = new Set();
|
|
1663
|
+
const targets = [];
|
|
1664
|
+
for (const source of combinedSources) {
|
|
1665
|
+
const url = source?.canonicalUrl || source?.finalUrl || source?.url || "";
|
|
1666
|
+
if (!url) continue;
|
|
1667
|
+
let domain = "";
|
|
1668
|
+
try {
|
|
1669
|
+
domain = new URL(url).hostname.toLowerCase().replace(/^www\./, "");
|
|
1670
|
+
} catch {
|
|
1671
|
+
continue;
|
|
1672
|
+
}
|
|
1673
|
+
if (!ACADEMIC_HOSTS.some((h) => domain === h || domain.endsWith(`.${h}`))) {
|
|
1674
|
+
continue;
|
|
1675
|
+
}
|
|
1676
|
+
if (usedUrls.has(url) || seen.has(url)) continue;
|
|
1677
|
+
seen.add(url);
|
|
1678
|
+
// Prefer the HTML/abs page over PDF for direct fetch — the source
|
|
1679
|
+
// fetcher handles both, but the HTML page gives the synthesizer
|
|
1680
|
+
// readable text + abstract immediately.
|
|
1681
|
+
const htmlUrl = url.includes("/pdf/")
|
|
1682
|
+
? url.replace(/\/pdf\//, "/html/").replace(/\.pdf$/i, "")
|
|
1683
|
+
: url;
|
|
1684
|
+
targets.push({
|
|
1685
|
+
url: htmlUrl,
|
|
1686
|
+
label: source?.title || source?.id || domain,
|
|
1687
|
+
});
|
|
1688
|
+
}
|
|
1689
|
+
return targets.slice(0, 2);
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
function reconcileQuestionsFromSynthesis(questions, synthesis, citationAudit) {
|
|
1693
|
+
if (!synthesis?.answer || citationAudit?.ok !== true) return questions;
|
|
1694
|
+
const claims = Array.isArray(synthesis.claims) ? synthesis.claims : [];
|
|
1695
|
+
const citedIds = Array.isArray(citationAudit.cited)
|
|
1696
|
+
? citationAudit.cited
|
|
1697
|
+
: [];
|
|
1698
|
+
if (claims.length === 0 || citedIds.length === 0) return questions;
|
|
1699
|
+
|
|
1700
|
+
for (const question of questions) {
|
|
1701
|
+
if (question.status === "closed") continue;
|
|
1702
|
+
let bestClaim = null;
|
|
1703
|
+
let bestScore = 0;
|
|
1704
|
+
for (const claim of claims) {
|
|
1705
|
+
const score = jaccardSimilarity(
|
|
1706
|
+
question.question || "",
|
|
1707
|
+
claim.claim || "",
|
|
1708
|
+
);
|
|
1709
|
+
if (score > bestScore) {
|
|
1710
|
+
bestScore = score;
|
|
1711
|
+
bestClaim = claim;
|
|
1712
|
+
}
|
|
1713
|
+
}
|
|
1714
|
+
if (question.id === "Q1" || bestScore >= 0.18) {
|
|
1715
|
+
closeQuestion(questions, question.id, {
|
|
1716
|
+
evidence: bestClaim?.claim || "Answered in final cited synthesis",
|
|
1717
|
+
sourceIds: Array.isArray(bestClaim?.sourceIds)
|
|
1718
|
+
? bestClaim.sourceIds
|
|
1719
|
+
: citedIds.slice(0, 4),
|
|
1720
|
+
});
|
|
1721
|
+
}
|
|
1722
|
+
}
|
|
1723
|
+
return questions;
|
|
1724
|
+
}
|
|
1725
|
+
|
|
1726
|
+
function renderQuestionStatus(questions) {
|
|
1727
|
+
if (!questions.length) return "No tracked questions.";
|
|
1728
|
+
return questions
|
|
1729
|
+
.map((q) => {
|
|
1730
|
+
const ids = q.sourceIds?.length ? ` (${q.sourceIds.join(", ")})` : "";
|
|
1731
|
+
return `- [${q.status === "closed" ? "x" : " "}] ${q.id}: ${q.question}${ids}`;
|
|
1732
|
+
})
|
|
1733
|
+
.join("\n");
|
|
1734
|
+
}
|
|
1735
|
+
|
|
1736
|
+
function markdownList(items, fallback = "None recorded.") {
|
|
1737
|
+
const unique = uniqueStrings(items);
|
|
1738
|
+
return unique.length
|
|
1739
|
+
? unique.map((item) => `- ${item}`).join("\n")
|
|
1740
|
+
: fallback;
|
|
1741
|
+
}
|
|
1742
|
+
|
|
1743
|
+
async function writeResearchBundle({
|
|
1744
|
+
query,
|
|
1745
|
+
rounds,
|
|
1746
|
+
sources,
|
|
1747
|
+
fetchedSources,
|
|
1748
|
+
evidenceItems = [],
|
|
1749
|
+
synthesis,
|
|
1750
|
+
citationAudit,
|
|
1751
|
+
floor,
|
|
1752
|
+
manifest,
|
|
1753
|
+
allGaps = [],
|
|
1754
|
+
questions = [],
|
|
1755
|
+
outDir = null,
|
|
1756
|
+
}) {
|
|
1757
|
+
const stamp = new Date().toISOString().replaceAll(/[:.]/g, "-").slice(0, 19);
|
|
1758
|
+
const dir =
|
|
1759
|
+
outDir ||
|
|
1760
|
+
join(
|
|
1761
|
+
DEFAULT_RESEARCH_BUNDLE_ROOT,
|
|
1762
|
+
`${stamp}_${slugifyResearchName(query)}`,
|
|
1763
|
+
);
|
|
1764
|
+
const reportsDir = join(dir, "reports");
|
|
1765
|
+
const sourcesDir = join(dir, "sources");
|
|
1766
|
+
const dataDir = join(dir, "data");
|
|
1767
|
+
mkdirSync(reportsDir, { recursive: true });
|
|
1768
|
+
mkdirSync(sourcesDir, { recursive: true });
|
|
1769
|
+
mkdirSync(dataDir, { recursive: true });
|
|
1770
|
+
|
|
1771
|
+
const sourceFiles = await writeResearchSourcesToFiles(
|
|
1772
|
+
fetchedSources,
|
|
1773
|
+
sourcesDir,
|
|
1774
|
+
);
|
|
1775
|
+
const gaps = uniqueStrings([
|
|
1776
|
+
...allGaps,
|
|
1777
|
+
...rounds.flatMap((round) => round.gaps || []),
|
|
1778
|
+
]);
|
|
1779
|
+
writeFileSync(
|
|
1780
|
+
join(dir, "STATUS.md"),
|
|
1781
|
+
[
|
|
1782
|
+
floor.floorMet ? "STATUS: DONE" : "STATUS: PARTIAL",
|
|
1783
|
+
"",
|
|
1784
|
+
`Query: ${query}`,
|
|
1785
|
+
`Stop reason: ${manifest.terminationReason || "max_rounds"}`,
|
|
1786
|
+
"",
|
|
1787
|
+
"## Deterministic floor checks",
|
|
1788
|
+
...Object.entries(floor.checks).map(
|
|
1789
|
+
([name, ok]) => `- [${ok ? "x" : " "}] ${name}`,
|
|
1790
|
+
),
|
|
1791
|
+
"",
|
|
1792
|
+
"## Questions",
|
|
1793
|
+
renderQuestionStatus(questions),
|
|
1794
|
+
"",
|
|
1795
|
+
"## Open gaps",
|
|
1796
|
+
markdownList(gaps),
|
|
1797
|
+
"",
|
|
1798
|
+
].join("\n"),
|
|
1799
|
+
"utf8",
|
|
1800
|
+
);
|
|
1801
|
+
writeFileSync(
|
|
1802
|
+
join(dir, "OUTLINE.md"),
|
|
1803
|
+
[
|
|
1804
|
+
"# Research bundle outline",
|
|
1805
|
+
"",
|
|
1806
|
+
"- `reports/SUMMARY.md` — final cited report",
|
|
1807
|
+
"- `reports/CLAIMS.md` — extracted claims with support/source IDs",
|
|
1808
|
+
"- `reports/EVIDENCE.md` — goal-based source evidence",
|
|
1809
|
+
"- `reports/GAPS.md` — remaining caveats and uncertainties",
|
|
1810
|
+
"- `sources/` — fetched source markdown files",
|
|
1811
|
+
"- `data/manifest.json` — machine-readable run metadata",
|
|
1812
|
+
"- `data/rounds.json` — per-round actions/learnings/gaps",
|
|
1813
|
+
"- `data/sources.json` — ranked source registry",
|
|
1814
|
+
"- `data/questions.json` — open/closed question ledger",
|
|
1815
|
+
"",
|
|
1816
|
+
].join("\n"),
|
|
1817
|
+
"utf8",
|
|
1818
|
+
);
|
|
1819
|
+
writeFileSync(
|
|
1820
|
+
join(reportsDir, "SUMMARY.md"),
|
|
1821
|
+
String(synthesis.answer || ""),
|
|
1822
|
+
"utf8",
|
|
1823
|
+
);
|
|
1824
|
+
writeFileSync(
|
|
1825
|
+
join(reportsDir, "CLAIMS.md"),
|
|
1826
|
+
[
|
|
1827
|
+
"# Key claims",
|
|
1828
|
+
"",
|
|
1829
|
+
...(Array.isArray(synthesis.claims) && synthesis.claims.length
|
|
1830
|
+
? synthesis.claims.map((claim) => {
|
|
1831
|
+
const ids = Array.isArray(claim.sourceIds)
|
|
1832
|
+
? claim.sourceIds.join(", ")
|
|
1833
|
+
: "";
|
|
1834
|
+
return `- ${claim.claim || ""} (${claim.support || "support unknown"}${ids ? `; ${ids}` : ""})`;
|
|
1835
|
+
})
|
|
1836
|
+
: ["No structured claims were extracted."]),
|
|
1837
|
+
"",
|
|
1838
|
+
].join("\n"),
|
|
1839
|
+
"utf8",
|
|
1840
|
+
);
|
|
1841
|
+
writeFileSync(
|
|
1842
|
+
join(reportsDir, "EVIDENCE.md"),
|
|
1843
|
+
[
|
|
1844
|
+
"# Extracted evidence",
|
|
1845
|
+
"",
|
|
1846
|
+
...(evidenceItems.length
|
|
1847
|
+
? evidenceItems.map((item) =>
|
|
1848
|
+
[
|
|
1849
|
+
`## ${item.sourceId || item.url || "Source"}`,
|
|
1850
|
+
item.url ? `<${item.url}>` : "",
|
|
1851
|
+
item.rational ? `**Rational:** ${item.rational}` : "",
|
|
1852
|
+
item.evidence ? `**Evidence:** ${item.evidence}` : "",
|
|
1853
|
+
item.summary ? `**Summary:** ${item.summary}` : "",
|
|
1854
|
+
"",
|
|
1855
|
+
]
|
|
1856
|
+
.filter(Boolean)
|
|
1857
|
+
.join("\n"),
|
|
1858
|
+
)
|
|
1859
|
+
: ["No goal-based evidence was extracted."]),
|
|
1860
|
+
"",
|
|
1861
|
+
].join("\n"),
|
|
1862
|
+
"utf8",
|
|
1863
|
+
);
|
|
1864
|
+
writeFileSync(
|
|
1865
|
+
join(reportsDir, "GAPS.md"),
|
|
1866
|
+
[
|
|
1867
|
+
"# Gaps and caveats",
|
|
1868
|
+
"",
|
|
1869
|
+
"## Caveats",
|
|
1870
|
+
markdownList(synthesis.caveats || []),
|
|
1871
|
+
"",
|
|
1872
|
+
"## Research gaps",
|
|
1873
|
+
markdownList(gaps),
|
|
1874
|
+
"",
|
|
1875
|
+
].join("\n"),
|
|
1876
|
+
"utf8",
|
|
1877
|
+
);
|
|
1878
|
+
writeFileSync(
|
|
1879
|
+
join(dataDir, "manifest.json"),
|
|
1880
|
+
JSON.stringify({ ...manifest, floor, citationAudit }, null, 2),
|
|
1881
|
+
"utf8",
|
|
1882
|
+
);
|
|
1883
|
+
writeFileSync(
|
|
1884
|
+
join(dataDir, "rounds.json"),
|
|
1885
|
+
JSON.stringify(rounds, null, 2),
|
|
1886
|
+
"utf8",
|
|
1887
|
+
);
|
|
1888
|
+
writeFileSync(
|
|
1889
|
+
join(dataDir, "sources.json"),
|
|
1890
|
+
JSON.stringify(sources, null, 2),
|
|
1891
|
+
"utf8",
|
|
1892
|
+
);
|
|
1893
|
+
writeFileSync(
|
|
1894
|
+
join(dataDir, "questions.json"),
|
|
1895
|
+
JSON.stringify(questions, null, 2),
|
|
1896
|
+
"utf8",
|
|
1897
|
+
);
|
|
1898
|
+
writeFileSync(
|
|
1899
|
+
join(dataDir, "evidence.json"),
|
|
1900
|
+
JSON.stringify(evidenceItems, null, 2),
|
|
1901
|
+
"utf8",
|
|
1902
|
+
);
|
|
1903
|
+
writeFileSync(
|
|
1904
|
+
join(sourcesDir, "index.md"),
|
|
1905
|
+
[
|
|
1906
|
+
"# Source index",
|
|
1907
|
+
"",
|
|
1908
|
+
...sourceFiles.map((source) => {
|
|
1909
|
+
const label = source.title || source.url;
|
|
1910
|
+
const url = source.finalUrl || source.url;
|
|
1911
|
+
const path = source.contentPath ? ` — ${source.contentPath}` : "";
|
|
1912
|
+
return `- ${source.id || "?"}: [${label}](${url})${path}`;
|
|
1913
|
+
}),
|
|
1914
|
+
"",
|
|
1915
|
+
].join("\n"),
|
|
1916
|
+
"utf8",
|
|
1917
|
+
);
|
|
1918
|
+
return {
|
|
1919
|
+
dir,
|
|
1920
|
+
statusPath: join(dir, "STATUS.md"),
|
|
1921
|
+
summaryPath: join(reportsDir, "SUMMARY.md"),
|
|
1922
|
+
manifestPath: join(dataDir, "manifest.json"),
|
|
1923
|
+
sourceCount: sourceFiles.length,
|
|
1924
|
+
sourceFiles,
|
|
1925
|
+
};
|
|
1926
|
+
}
|
|
1927
|
+
|
|
1928
|
+
export async function runResearchMode({
|
|
1929
|
+
query,
|
|
1930
|
+
breadth = 3,
|
|
1931
|
+
iterations = 2,
|
|
1932
|
+
maxSources,
|
|
1933
|
+
locale = null,
|
|
1934
|
+
short = false,
|
|
1935
|
+
qualityThreshold = 8.5,
|
|
1936
|
+
writeBundle = process.env.GREEDY_RESEARCH_BUNDLE !== "0",
|
|
1937
|
+
researchOutDir = null,
|
|
1938
|
+
} = {}) {
|
|
1939
|
+
const options = clampResearchOptions({ breadth, iterations, maxSources });
|
|
1940
|
+
const rounds = [];
|
|
1941
|
+
let allLearnings = [];
|
|
1942
|
+
let allGaps = [];
|
|
1943
|
+
const questions = createQuestionLedger(query);
|
|
1944
|
+
let activeActions = null;
|
|
1945
|
+
let combinedSources = [];
|
|
1946
|
+
let fetchedSources = [];
|
|
1947
|
+
let evidenceItems = [];
|
|
1948
|
+
const extractedSourceKeys = new Set();
|
|
1949
|
+
const usedQueries = new Set();
|
|
1950
|
+
const usedUrls = new Set();
|
|
1951
|
+
const qualityHistory = [];
|
|
1952
|
+
let terminationReason = "max_rounds";
|
|
1953
|
+
|
|
1954
|
+
// Manifest tracking
|
|
1955
|
+
const startedAt = new Date().toISOString();
|
|
1956
|
+
const startMs = Date.now();
|
|
1957
|
+
let totalActionsRun = 0;
|
|
1958
|
+
let totalSearches = 0;
|
|
1959
|
+
let totalFetches = 0;
|
|
1960
|
+
const engineFailures = [];
|
|
1961
|
+
|
|
1962
|
+
process.stderr.write(
|
|
1963
|
+
`[greedysearch] Research mode: breadth ${options.breadth}, iterations ${options.iterations}, qualityThreshold ${qualityThreshold}, engines ${RESEARCH_ENGINES.join(",")}, synthesizer gemini\n`,
|
|
1964
|
+
);
|
|
1965
|
+
|
|
1966
|
+
for (let roundIndex = 0; roundIndex < options.iterations; roundIndex++) {
|
|
1967
|
+
const roundNumber = roundIndex + 1;
|
|
1968
|
+
const roundBreadth = Math.max(
|
|
1969
|
+
1,
|
|
1970
|
+
Math.ceil(options.breadth / 2 ** roundIndex),
|
|
1971
|
+
);
|
|
1972
|
+
process.stderr.write(`PROGRESS:research:round-${roundNumber}:planning\n`);
|
|
1973
|
+
|
|
1974
|
+
if (!activeActions) {
|
|
1975
|
+
try {
|
|
1976
|
+
// Action-based planning: produces search + fetchUrl actions
|
|
1977
|
+
const rawPlan = await runGeminiPrompt(
|
|
1978
|
+
buildResearchActionPrompt(
|
|
1979
|
+
query,
|
|
1980
|
+
roundBreadth,
|
|
1981
|
+
allLearnings,
|
|
1982
|
+
allGaps,
|
|
1983
|
+
[...usedUrls],
|
|
1984
|
+
),
|
|
1985
|
+
{ timeoutMs: 120000 },
|
|
1986
|
+
);
|
|
1987
|
+
let planActions = parseActionPlan(rawPlan, roundBreadth);
|
|
1988
|
+
|
|
1989
|
+
// On first round, ensure the original query is included
|
|
1990
|
+
if (roundIndex === 0) {
|
|
1991
|
+
planActions.unshift({
|
|
1992
|
+
type: "search",
|
|
1993
|
+
query,
|
|
1994
|
+
researchGoal: "Original user query",
|
|
1995
|
+
});
|
|
1996
|
+
}
|
|
1997
|
+
|
|
1998
|
+
// Normalize GitHub root URLs into specific fetch targets
|
|
1999
|
+
planActions = await normalizeGitHubFetchActions(planActions, usedUrls);
|
|
2000
|
+
activeActions = planActions;
|
|
2001
|
+
} catch (error) {
|
|
2002
|
+
process.stderr.write(
|
|
2003
|
+
`[greedysearch] Action planning failed, using fallback queries: ${error.message}\n`,
|
|
2004
|
+
);
|
|
2005
|
+
// Fallback: use query-only planning
|
|
2006
|
+
const fallbackQueries = normalizeResearchQueries(
|
|
2007
|
+
null,
|
|
2008
|
+
query,
|
|
2009
|
+
roundBreadth,
|
|
2010
|
+
{
|
|
2011
|
+
includeOriginal: roundIndex === 0,
|
|
2012
|
+
exclude: usedQueries,
|
|
2013
|
+
},
|
|
2014
|
+
);
|
|
2015
|
+
activeActions = queriesToActions(fallbackQueries);
|
|
2016
|
+
}
|
|
2017
|
+
}
|
|
2018
|
+
|
|
2019
|
+
// Novelty gate: reject exact and near-duplicate search actions
|
|
2020
|
+
const noveltyFiltered = (activeActions || []).filter((action) => {
|
|
2021
|
+
if (action.type === "search") {
|
|
2022
|
+
const pass = !isDuplicateQuery(action.query, usedQueries, {
|
|
2023
|
+
roundIndex,
|
|
2024
|
+
originalQuery: query,
|
|
2025
|
+
});
|
|
2026
|
+
if (!pass) {
|
|
2027
|
+
process.stderr.write(
|
|
2028
|
+
`[greedysearch] Novelty gate rejected search: ${action.query}\n`,
|
|
2029
|
+
);
|
|
2030
|
+
}
|
|
2031
|
+
return pass;
|
|
2032
|
+
}
|
|
2033
|
+
if (action.type === "fetchUrl") {
|
|
2034
|
+
const pass = !usedUrls.has(action.url);
|
|
2035
|
+
if (!pass) {
|
|
2036
|
+
process.stderr.write(
|
|
2037
|
+
`[greedysearch] Novelty gate rejected fetch: ${action.url}\n`,
|
|
2038
|
+
);
|
|
2039
|
+
}
|
|
2040
|
+
return pass;
|
|
2041
|
+
}
|
|
2042
|
+
return false;
|
|
2043
|
+
});
|
|
2044
|
+
|
|
2045
|
+
const roundActions = noveltyFiltered.slice(0, roundBreadth);
|
|
2046
|
+
|
|
2047
|
+
// Force at least one fetchUrl per round when a known academic source
|
|
2048
|
+
// (arXiv, semantic-scholar, DOI) is present in combinedSources. The
|
|
2049
|
+
// Gemini planner occasionally emits all-search actions even when the
|
|
2050
|
+
// answer is in a single arXiv PDF; direct fetching gives the synthesizer
|
|
2051
|
+
// real PDF text and reliably passes citation audits.
|
|
2052
|
+
const academicTargets = pickAcademicFetchTargets(combinedSources, usedUrls);
|
|
2053
|
+
const hasFetch = roundActions.some((a) => a.type === "fetchUrl");
|
|
2054
|
+
if (!hasFetch && academicTargets.length > 0) {
|
|
2055
|
+
const injectTarget = academicTargets[0];
|
|
2056
|
+
roundActions.push({
|
|
2057
|
+
type: "fetchUrl",
|
|
2058
|
+
url: injectTarget.url,
|
|
2059
|
+
researchGoal: `Direct fetch of known academic source: ${injectTarget.label || injectTarget.url}`,
|
|
2060
|
+
});
|
|
2061
|
+
process.stderr.write(
|
|
2062
|
+
`[greedysearch] Forced fetchUrl for academic source: ${injectTarget.url}\n`,
|
|
2063
|
+
);
|
|
2064
|
+
}
|
|
2065
|
+
|
|
2066
|
+
const actionRuns = [];
|
|
2067
|
+
for (let i = 0; i < roundActions.length; i++) {
|
|
2068
|
+
const action = roundActions[i];
|
|
2069
|
+
process.stderr.write(
|
|
2070
|
+
`PROGRESS:research:round-${roundNumber}:action-${i + 1}/${roundActions.length}\n`,
|
|
2071
|
+
);
|
|
2072
|
+
process.stderr.write(
|
|
2073
|
+
`[greedysearch] Action ${i + 1}/${roundActions.length} [${action.type}]: ${(action.query || action.url).slice(0, 80)}\n`,
|
|
2074
|
+
);
|
|
2075
|
+
const run = await executeResearchAction(action, {
|
|
2076
|
+
locale,
|
|
2077
|
+
short,
|
|
2078
|
+
usedQueries,
|
|
2079
|
+
usedUrls,
|
|
2080
|
+
maxChars: 8000,
|
|
2081
|
+
});
|
|
2082
|
+
actionRuns.push(run);
|
|
2083
|
+
totalActionsRun++;
|
|
2084
|
+
if (action.type === "search") totalSearches++;
|
|
2085
|
+
if (action.type === "fetchUrl") totalFetches++;
|
|
2086
|
+
if (!run.ok) {
|
|
2087
|
+
engineFailures.push({
|
|
2088
|
+
round: roundNumber,
|
|
2089
|
+
type: action.type,
|
|
2090
|
+
target: action.query || action.url,
|
|
2091
|
+
error: run.error,
|
|
2092
|
+
});
|
|
2093
|
+
process.stderr.write(`[greedysearch] Action failed: ${run.error}\n`);
|
|
2094
|
+
}
|
|
2095
|
+
}
|
|
2096
|
+
|
|
2097
|
+
// Collect sources from search actions
|
|
2098
|
+
const searchActionRuns = actionRuns.filter(
|
|
2099
|
+
(r) => r.action.type === "search",
|
|
2100
|
+
);
|
|
2101
|
+
const fetchActionRuns = actionRuns.filter(
|
|
2102
|
+
(r) => r.action.type === "fetchUrl",
|
|
2103
|
+
);
|
|
2104
|
+
updateQuestionLedger(questions, { roundNumber, actions: actionRuns });
|
|
2105
|
+
|
|
2106
|
+
combinedSources = dedupeSources([
|
|
2107
|
+
combinedSources,
|
|
2108
|
+
searchActionRuns.flatMap((run) => run.sources || []),
|
|
2109
|
+
fetchActionRuns.flatMap((run) => run.sources || []),
|
|
2110
|
+
]);
|
|
2111
|
+
|
|
2112
|
+
// Merge direct fetch results into fetchedSources
|
|
2113
|
+
for (const fetchRun of fetchActionRuns) {
|
|
2114
|
+
if (fetchRun.fetchResult) {
|
|
2115
|
+
fetchedSources.push(fetchRun.fetchResult);
|
|
2116
|
+
}
|
|
2117
|
+
}
|
|
2118
|
+
fetchedSources = dedupeFetchedSources(fetchedSources);
|
|
2119
|
+
|
|
2120
|
+
// Fetch additional top-ranked sources from search results
|
|
2121
|
+
const remainingFetchBudget = Math.max(
|
|
2122
|
+
0,
|
|
2123
|
+
options.maxSources -
|
|
2124
|
+
fetchedSources.filter(
|
|
2125
|
+
(source) => source?.content || source?.contentChars > 100,
|
|
2126
|
+
).length,
|
|
2127
|
+
);
|
|
2128
|
+
if (remainingFetchBudget > 0 && combinedSources.length > 0) {
|
|
2129
|
+
process.stderr.write(`PROGRESS:research:round-${roundNumber}:fetching\n`);
|
|
2130
|
+
const fetched = await fetchMultipleResearchSources(
|
|
2131
|
+
combinedSources,
|
|
2132
|
+
Math.min(remainingFetchBudget, combinedSources.length),
|
|
2133
|
+
8000,
|
|
2134
|
+
Math.min(3, remainingFetchBudget || 1),
|
|
2135
|
+
);
|
|
2136
|
+
fetchedSources = dedupeFetchedSources([...fetchedSources, ...fetched]);
|
|
2137
|
+
combinedSources = mergeFetchDataIntoSources(
|
|
2138
|
+
combinedSources,
|
|
2139
|
+
fetchedSources,
|
|
2140
|
+
);
|
|
2141
|
+
}
|
|
2142
|
+
fetchedSources = annotateFetchedSourcesWithIds(
|
|
2143
|
+
fetchedSources,
|
|
2144
|
+
combinedSources,
|
|
2145
|
+
);
|
|
2146
|
+
|
|
2147
|
+
process.stderr.write(`PROGRESS:research:round-${roundNumber}:evidence\n`);
|
|
2148
|
+
const evidenceRun = await extractEvidenceFromSources({
|
|
2149
|
+
query,
|
|
2150
|
+
questions,
|
|
2151
|
+
fetchedSources,
|
|
2152
|
+
extractedSourceKeys,
|
|
2153
|
+
});
|
|
2154
|
+
if (evidenceRun.error) {
|
|
2155
|
+
process.stderr.write(
|
|
2156
|
+
`[greedysearch] Evidence extraction failed: ${evidenceRun.error}\n`,
|
|
2157
|
+
);
|
|
2158
|
+
}
|
|
2159
|
+
evidenceItems = [...evidenceItems, ...evidenceRun.evidence];
|
|
2160
|
+
for (const evidence of evidenceRun.evidence) {
|
|
2161
|
+
updateQuestionLedger(questions, {
|
|
2162
|
+
roundNumber,
|
|
2163
|
+
learningPayload: {
|
|
2164
|
+
answeredQuestions: evidence.answers || [],
|
|
2165
|
+
newQuestions: evidence.newQuestions || [],
|
|
2166
|
+
},
|
|
2167
|
+
});
|
|
2168
|
+
}
|
|
2169
|
+
|
|
2170
|
+
// Build round query summary for learning extraction
|
|
2171
|
+
const roundQueries = actionRuns.map((run) => ({
|
|
2172
|
+
query: run.action.query || run.action.url || "",
|
|
2173
|
+
researchGoal: run.action.researchGoal || "",
|
|
2174
|
+
}));
|
|
2175
|
+
|
|
2176
|
+
process.stderr.write(`PROGRESS:research:round-${roundNumber}:learning\n`);
|
|
2177
|
+
let learningPayload = { learnings: [], followUpQueries: [], gaps: [] };
|
|
2178
|
+
let learningError = "";
|
|
2179
|
+
try {
|
|
2180
|
+
const rawLearning = await runGeminiPrompt(
|
|
2181
|
+
buildLearningPrompt(
|
|
2182
|
+
query,
|
|
2183
|
+
roundQueries,
|
|
2184
|
+
searchActionRuns.map((run) => ({
|
|
2185
|
+
query: run.action.query,
|
|
2186
|
+
researchGoal: run.action.researchGoal,
|
|
2187
|
+
error: run.error || "",
|
|
2188
|
+
engines: summarizeEngineAnswers(run.result),
|
|
2189
|
+
})),
|
|
2190
|
+
fetchedSources,
|
|
2191
|
+
questions,
|
|
2192
|
+
evidenceItems,
|
|
2193
|
+
),
|
|
2194
|
+
{ timeoutMs: 120000 },
|
|
2195
|
+
);
|
|
2196
|
+
learningPayload = {
|
|
2197
|
+
...learningPayload,
|
|
2198
|
+
...parseGeminiJson(rawLearning, learningPayload),
|
|
2199
|
+
};
|
|
2200
|
+
} catch (error) {
|
|
2201
|
+
learningError = error.message;
|
|
2202
|
+
process.stderr.write(
|
|
2203
|
+
`[greedysearch] Learning extraction failed: ${error.message}\n`,
|
|
2204
|
+
);
|
|
2205
|
+
}
|
|
2206
|
+
|
|
2207
|
+
const learnings = Array.isArray(learningPayload.learnings)
|
|
2208
|
+
? learningPayload.learnings
|
|
2209
|
+
.map((l) => String(l))
|
|
2210
|
+
.filter(Boolean)
|
|
2211
|
+
.slice(0, 8)
|
|
2212
|
+
: [];
|
|
2213
|
+
const gaps = Array.isArray(learningPayload.gaps)
|
|
2214
|
+
? learningPayload.gaps
|
|
2215
|
+
.map((g) => String(g))
|
|
2216
|
+
.filter(Boolean)
|
|
2217
|
+
.slice(0, 6)
|
|
2218
|
+
: [];
|
|
2219
|
+
allLearnings = uniqueStrings([...allLearnings, ...learnings]);
|
|
2220
|
+
allGaps = uniqueStrings([...allGaps, ...gaps]);
|
|
2221
|
+
updateQuestionLedger(questions, {
|
|
2222
|
+
roundNumber,
|
|
2223
|
+
actions: [],
|
|
2224
|
+
learningPayload,
|
|
2225
|
+
gaps,
|
|
2226
|
+
});
|
|
2227
|
+
rounds.push({
|
|
2228
|
+
round: roundNumber,
|
|
2229
|
+
actions: actionRuns.map((run) => ({
|
|
2230
|
+
type: run.action.type,
|
|
2231
|
+
query: run.action.query || "",
|
|
2232
|
+
url: run.action.url || "",
|
|
2233
|
+
researchGoal: run.action.researchGoal || "",
|
|
2234
|
+
error: run.error || "",
|
|
2235
|
+
sourceCount: run.sources?.length || 0,
|
|
2236
|
+
})),
|
|
2237
|
+
learnings,
|
|
2238
|
+
gaps,
|
|
2239
|
+
evidence: evidenceRun.evidence,
|
|
2240
|
+
evidenceError: evidenceRun.error,
|
|
2241
|
+
learningError,
|
|
2242
|
+
});
|
|
2243
|
+
|
|
2244
|
+
// Quality evaluation
|
|
2245
|
+
process.stderr.write(`PROGRESS:research:round-${roundNumber}:evaluating\n`);
|
|
2246
|
+
const evaluation = await evaluateResearchQuality(
|
|
2247
|
+
query,
|
|
2248
|
+
rounds,
|
|
2249
|
+
allLearnings,
|
|
2250
|
+
allGaps,
|
|
2251
|
+
qualityHistory,
|
|
2252
|
+
);
|
|
2253
|
+
qualityHistory.push(evaluation.score);
|
|
2254
|
+
allGaps = uniqueStrings([...allGaps, ...(evaluation.knowledgeGaps || [])]);
|
|
2255
|
+
updateQuestionLedger(questions, {
|
|
2256
|
+
roundNumber,
|
|
2257
|
+
gaps: evaluation.knowledgeGaps || [],
|
|
2258
|
+
});
|
|
2259
|
+
const preliminaryFloor = computeResearchFloor({
|
|
2260
|
+
sources: combinedSources,
|
|
2261
|
+
fetchedSources,
|
|
2262
|
+
gaps: allGaps,
|
|
2263
|
+
questions,
|
|
2264
|
+
rounds,
|
|
2265
|
+
qualityScore: evaluation.score,
|
|
2266
|
+
qualityThreshold,
|
|
2267
|
+
maxSources: options.maxSources,
|
|
2268
|
+
requireCitations: false,
|
|
2269
|
+
requireQuestions: false,
|
|
2270
|
+
});
|
|
2271
|
+
process.stderr.write(
|
|
2272
|
+
`[greedysearch] Quality score round ${roundNumber}: ${evaluation.score.toFixed(1)} (shouldContinue: ${evaluation.shouldContinue}, floor: ${preliminaryFloor.floorMet})\n`,
|
|
2273
|
+
);
|
|
2274
|
+
|
|
2275
|
+
// Early termination is outcome-first: Gemini quality alone is not enough.
|
|
2276
|
+
// Stop early only when the score is high AND deterministic source/floor checks pass.
|
|
2277
|
+
if (
|
|
2278
|
+
evaluation.score >= qualityThreshold &&
|
|
2279
|
+
preliminaryFloor.floorMet &&
|
|
2280
|
+
(!evaluation.shouldContinue ||
|
|
2281
|
+
evaluation.terminationReason === "quality_threshold")
|
|
2282
|
+
) {
|
|
2283
|
+
terminationReason = evaluation.terminationReason || "quality_threshold";
|
|
2284
|
+
process.stderr.write(
|
|
2285
|
+
`[greedysearch] Research floor reached (score: ${evaluation.score.toFixed(1)}). Terminating early.\n`,
|
|
2286
|
+
);
|
|
2287
|
+
break;
|
|
2288
|
+
}
|
|
2289
|
+
|
|
2290
|
+
const nextBreadth = Math.max(1, Math.ceil(roundBreadth / 2));
|
|
2291
|
+
|
|
2292
|
+
// Convert learning follow-ups to search actions
|
|
2293
|
+
const followUpActions = (learningPayload.followUpQueries || [])
|
|
2294
|
+
.map((q) => ({
|
|
2295
|
+
type: "search",
|
|
2296
|
+
query: sanitizeResearchQuery(String(q)),
|
|
2297
|
+
researchGoal: "Follow-up from learning extraction",
|
|
2298
|
+
}))
|
|
2299
|
+
.filter((a) => a.query && a.query.toLowerCase() !== query.toLowerCase())
|
|
2300
|
+
.slice(0, nextBreadth);
|
|
2301
|
+
|
|
2302
|
+
// Augment with evaluator's nextActions if follow-ups are insufficient
|
|
2303
|
+
let nextActiveActions = followUpActions;
|
|
2304
|
+
if (
|
|
2305
|
+
nextActiveActions.length < nextBreadth &&
|
|
2306
|
+
evaluation.nextActions.length > 0
|
|
2307
|
+
) {
|
|
2308
|
+
const evaluatorActions = evaluation.nextActions
|
|
2309
|
+
.map((a) => validateAction(a))
|
|
2310
|
+
.filter(Boolean);
|
|
2311
|
+
const merged = [...nextActiveActions, ...evaluatorActions];
|
|
2312
|
+
nextActiveActions = merged.slice(0, nextBreadth);
|
|
2313
|
+
}
|
|
2314
|
+
|
|
2315
|
+
// Gap-driven fallback actions (search type)
|
|
2316
|
+
if (nextActiveActions.length < nextBreadth && allGaps.length > 0) {
|
|
2317
|
+
const fallbacks = buildFallbackQueriesFromGaps(
|
|
2318
|
+
allGaps,
|
|
2319
|
+
query,
|
|
2320
|
+
usedQueries,
|
|
2321
|
+
nextBreadth - nextActiveActions.length,
|
|
2322
|
+
roundIndex + 1,
|
|
2323
|
+
);
|
|
2324
|
+
const fallbackActions = fallbacks.map((f) => ({
|
|
2325
|
+
type: "search",
|
|
2326
|
+
query: f.query,
|
|
2327
|
+
researchGoal: f.researchGoal,
|
|
2328
|
+
}));
|
|
2329
|
+
nextActiveActions = [...nextActiveActions, ...fallbackActions].slice(
|
|
2330
|
+
0,
|
|
2331
|
+
nextBreadth,
|
|
2332
|
+
);
|
|
2333
|
+
if (fallbacks.length > 0) {
|
|
2334
|
+
process.stderr.write(
|
|
2335
|
+
`[greedysearch] Generated ${fallbacks.length} gap-driven fallback actions.\n`,
|
|
2336
|
+
);
|
|
2337
|
+
}
|
|
2338
|
+
}
|
|
2339
|
+
|
|
2340
|
+
// If still insufficient, re-plan from accumulated learnings
|
|
2341
|
+
activeActions =
|
|
2342
|
+
nextActiveActions.length >= nextBreadth ? nextActiveActions : null;
|
|
2343
|
+
}
|
|
2344
|
+
|
|
2345
|
+
process.stderr.write("PROGRESS:research:final-report\n");
|
|
2346
|
+
let synthesis = {
|
|
2347
|
+
answer: allLearnings.length
|
|
2348
|
+
? allLearnings.map((learning) => `- ${learning}`).join("\n")
|
|
2349
|
+
: "Research completed, but no structured learnings were extracted.",
|
|
2350
|
+
agreement: { level: "mixed", summary: "Research synthesis fallback." },
|
|
2351
|
+
differences: [],
|
|
2352
|
+
caveats: [],
|
|
2353
|
+
claims: [],
|
|
2354
|
+
recommendedSources: combinedSources.slice(0, 4).map((source) => source.id),
|
|
2355
|
+
synthesized: false,
|
|
2356
|
+
};
|
|
2357
|
+
try {
|
|
2358
|
+
const rawReport = await runGeminiPrompt(
|
|
2359
|
+
buildFinalReportPrompt(
|
|
2360
|
+
query,
|
|
2361
|
+
rounds,
|
|
2362
|
+
combinedSources,
|
|
2363
|
+
questions,
|
|
2364
|
+
evidenceItems,
|
|
2365
|
+
),
|
|
2366
|
+
{ timeoutMs: 180000 },
|
|
2367
|
+
);
|
|
2368
|
+
const parsed = parseGeminiJson(rawReport, {});
|
|
2369
|
+
const hasClaims = Array.isArray(parsed?.claims) && parsed.claims.length > 0;
|
|
2370
|
+
synthesis = {
|
|
2371
|
+
...synthesis,
|
|
2372
|
+
...parsed,
|
|
2373
|
+
rawAnswer: rawReport.answer || "",
|
|
2374
|
+
geminiSources: rawReport.sources || [],
|
|
2375
|
+
// Only mark as synthesized if Gemini actually returned structured
|
|
2376
|
+
// claims. An empty/minimal response should not block the evidence
|
|
2377
|
+
// fallback from running.
|
|
2378
|
+
synthesized: hasClaims,
|
|
2379
|
+
};
|
|
2380
|
+
} catch (error) {
|
|
2381
|
+
process.stderr.write(
|
|
2382
|
+
`[greedysearch] Final report failed: ${error.message}\n`,
|
|
2383
|
+
);
|
|
2384
|
+
synthesis.error = error.message;
|
|
2385
|
+
}
|
|
2386
|
+
|
|
2387
|
+
// Fallback: when no structured learnings were produced but per-source
|
|
2388
|
+
// evidence was extracted successfully, ask Gemini to synthesize a final
|
|
2389
|
+
// report directly from the evidence. This rescues runs whose per-round
|
|
2390
|
+
// learning prompt failed (e.g. transient Gemini input field rejection)
|
|
2391
|
+
// but whose evidence extraction step still captured real data.
|
|
2392
|
+
const hasStructuredSynthesis =
|
|
2393
|
+
synthesis.synthesized === true &&
|
|
2394
|
+
Array.isArray(synthesis.claims) &&
|
|
2395
|
+
synthesis.claims.length > 0;
|
|
2396
|
+
if (!hasStructuredSynthesis && evidenceItems.length > 0) {
|
|
2397
|
+
process.stderr.write(
|
|
2398
|
+
"[greedysearch] Falling back to evidence-based synthesis (no per-round learnings).\n",
|
|
2399
|
+
);
|
|
2400
|
+
try {
|
|
2401
|
+
const evidencePrompt = buildSynthesisFromEvidencePrompt(
|
|
2402
|
+
query,
|
|
2403
|
+
combinedSources,
|
|
2404
|
+
questions,
|
|
2405
|
+
evidenceItems,
|
|
2406
|
+
);
|
|
2407
|
+
const rawEvidenceReport = await runGeminiPrompt(evidencePrompt, {
|
|
2408
|
+
timeoutMs: 180000,
|
|
2409
|
+
});
|
|
2410
|
+
const parsedEvidence = parseGeminiJson(rawEvidenceReport, {});
|
|
2411
|
+
synthesis = {
|
|
2412
|
+
...synthesis,
|
|
2413
|
+
...parsedEvidence,
|
|
2414
|
+
rawAnswer: rawEvidenceReport.answer || synthesis.answer || "",
|
|
2415
|
+
geminiSources:
|
|
2416
|
+
rawEvidenceReport.sources || synthesis.geminiSources || [],
|
|
2417
|
+
synthesized: true,
|
|
2418
|
+
synthesisMode: "evidence_fallback",
|
|
2419
|
+
};
|
|
2420
|
+
} catch (error) {
|
|
2421
|
+
process.stderr.write(
|
|
2422
|
+
`[greedysearch] Evidence-based synthesis failed: ${error.message}\n`,
|
|
2423
|
+
);
|
|
2424
|
+
synthesis.evidenceFallbackError = error.message;
|
|
2425
|
+
}
|
|
2426
|
+
}
|
|
2427
|
+
|
|
2428
|
+
const finishedAt = new Date().toISOString();
|
|
2429
|
+
const durationMs = Date.now() - startMs;
|
|
2430
|
+
const qualityScore = qualityHistory.at(-1) || 0;
|
|
2431
|
+
fetchedSources = annotateFetchedSourcesWithIds(
|
|
2432
|
+
fetchedSources,
|
|
2433
|
+
combinedSources,
|
|
2434
|
+
);
|
|
2435
|
+
|
|
2436
|
+
// Citation audit + final question reconciliation + deterministic completion floor
|
|
2437
|
+
process.stderr.write("PROGRESS:research:audit-citations\n");
|
|
2438
|
+
const citationAudit = auditCitations(synthesis.answer || "", combinedSources);
|
|
2439
|
+
reconcileQuestionsFromSynthesis(questions, synthesis, citationAudit);
|
|
2440
|
+
const floor = computeResearchFloor({
|
|
2441
|
+
sources: combinedSources,
|
|
2442
|
+
fetchedSources,
|
|
2443
|
+
synthesis,
|
|
2444
|
+
citationAudit,
|
|
2445
|
+
gaps: allGaps,
|
|
2446
|
+
questions,
|
|
2447
|
+
rounds,
|
|
2448
|
+
qualityScore,
|
|
2449
|
+
qualityThreshold,
|
|
2450
|
+
maxSources: options.maxSources,
|
|
2451
|
+
});
|
|
2452
|
+
if (floor.floorMet && terminationReason === "max_rounds") {
|
|
2453
|
+
terminationReason = "done_floor_met";
|
|
2454
|
+
} else if (!floor.floorMet && terminationReason === "quality_threshold") {
|
|
2455
|
+
terminationReason = "max_rounds_floor_unmet";
|
|
2456
|
+
}
|
|
2457
|
+
|
|
2458
|
+
const manifest = {
|
|
2459
|
+
startedAt,
|
|
2460
|
+
finishedAt,
|
|
2461
|
+
durationMs,
|
|
2462
|
+
engines: RESEARCH_ENGINES,
|
|
2463
|
+
synthesizer: "gemini",
|
|
2464
|
+
rounds: rounds.length,
|
|
2465
|
+
actionsRun: totalActionsRun,
|
|
2466
|
+
searches: totalSearches,
|
|
2467
|
+
fetches: totalFetches,
|
|
2468
|
+
sourcesFetched: fetchedSources.filter((s) => s?.contentChars > 100).length,
|
|
2469
|
+
engineFailures,
|
|
2470
|
+
terminationReason,
|
|
2471
|
+
floorMet: floor.floorMet,
|
|
2472
|
+
};
|
|
2473
|
+
let bundle = null;
|
|
2474
|
+
let fetchedFiles;
|
|
2475
|
+
if (writeBundle) {
|
|
2476
|
+
process.stderr.write("PROGRESS:research:bundle\n");
|
|
2477
|
+
try {
|
|
2478
|
+
bundle = await writeResearchBundle({
|
|
2479
|
+
query,
|
|
2480
|
+
rounds,
|
|
2481
|
+
sources: combinedSources,
|
|
2482
|
+
fetchedSources,
|
|
2483
|
+
evidenceItems,
|
|
2484
|
+
synthesis,
|
|
2485
|
+
citationAudit,
|
|
2486
|
+
floor,
|
|
2487
|
+
manifest,
|
|
2488
|
+
allGaps,
|
|
2489
|
+
questions,
|
|
2490
|
+
outDir: researchOutDir,
|
|
2491
|
+
});
|
|
2492
|
+
fetchedFiles = bundle.sourceFiles;
|
|
2493
|
+
delete bundle.sourceFiles;
|
|
2494
|
+
} catch (error) {
|
|
2495
|
+
bundle = { error: error.message || String(error) };
|
|
2496
|
+
fetchedFiles = await writeResearchSourcesToFiles(fetchedSources);
|
|
2497
|
+
}
|
|
2498
|
+
} else {
|
|
2499
|
+
fetchedFiles = await writeResearchSourcesToFiles(fetchedSources);
|
|
2500
|
+
}
|
|
2501
|
+
|
|
2502
|
+
process.stderr.write("PROGRESS:research:done\n");
|
|
2503
|
+
|
|
2504
|
+
return {
|
|
2505
|
+
query,
|
|
2506
|
+
_research: {
|
|
2507
|
+
mode: "iterative",
|
|
2508
|
+
breadth: options.breadth,
|
|
2509
|
+
iterations: options.iterations,
|
|
2510
|
+
maxSources: options.maxSources,
|
|
2511
|
+
rounds,
|
|
2512
|
+
learnings: allLearnings,
|
|
2513
|
+
gaps: allGaps,
|
|
2514
|
+
evidence: evidenceItems,
|
|
2515
|
+
questions,
|
|
2516
|
+
questionProgress: questionProgress(questions),
|
|
2517
|
+
qualityHistory,
|
|
2518
|
+
terminationReason,
|
|
2519
|
+
qualityThreshold,
|
|
2520
|
+
floor,
|
|
2521
|
+
bundle,
|
|
2522
|
+
manifest,
|
|
2523
|
+
},
|
|
2524
|
+
_citationAudit: citationAudit,
|
|
2525
|
+
_sources: combinedSources,
|
|
2526
|
+
_fetchedSources: fetchedFiles,
|
|
2527
|
+
_synthesis: synthesis,
|
|
2528
|
+
_confidence: {
|
|
2529
|
+
sourcesCount: combinedSources.length,
|
|
2530
|
+
fetchedSourceSuccessRate:
|
|
2531
|
+
fetchedSources.length > 0
|
|
2532
|
+
? Number(
|
|
2533
|
+
(
|
|
2534
|
+
fetchedSources.filter((source) => source.contentChars > 100)
|
|
2535
|
+
.length / fetchedSources.length
|
|
2536
|
+
).toFixed(2),
|
|
2537
|
+
)
|
|
2538
|
+
: 0,
|
|
2539
|
+
agreementLevel: synthesis.agreement?.level || "mixed",
|
|
2540
|
+
floorMet: floor.floorMet,
|
|
2541
|
+
},
|
|
2542
|
+
};
|
|
2543
|
+
}
|
|
2544
|
+
|
|
2545
|
+
function dedupeFetchedSources(sources) {
|
|
2546
|
+
const byUrl = new Map();
|
|
2547
|
+
for (const source of sources) {
|
|
2548
|
+
const key =
|
|
2549
|
+
source?.id || normalizeUrl(source?.finalUrl || source?.url || "");
|
|
2550
|
+
if (!key) continue;
|
|
2551
|
+
const existing = byUrl.get(key);
|
|
2552
|
+
if (
|
|
2553
|
+
!existing ||
|
|
2554
|
+
(source.contentChars || 0) > (existing.contentChars || 0)
|
|
2555
|
+
) {
|
|
2556
|
+
byUrl.set(key, source);
|
|
2557
|
+
}
|
|
2558
|
+
}
|
|
2559
|
+
|
|
2560
|
+
const out = [];
|
|
2561
|
+
for (const source of byUrl.values()) {
|
|
2562
|
+
const content = String(source.content || source.snippet || "");
|
|
2563
|
+
const duplicateIndex = out.findIndex((existing) => {
|
|
2564
|
+
const other = String(existing.content || existing.snippet || "");
|
|
2565
|
+
if (content.length < 400 || other.length < 400) return false;
|
|
2566
|
+
return (
|
|
2567
|
+
jaccardSimilarity(content.slice(0, 4000), other.slice(0, 4000)) >= 0.9
|
|
2568
|
+
);
|
|
2569
|
+
});
|
|
2570
|
+
if (duplicateIndex === -1) {
|
|
2571
|
+
out.push(source);
|
|
2572
|
+
continue;
|
|
2573
|
+
}
|
|
2574
|
+
if ((source.contentChars || 0) > (out[duplicateIndex].contentChars || 0)) {
|
|
2575
|
+
out[duplicateIndex] = source;
|
|
2576
|
+
}
|
|
2577
|
+
}
|
|
2578
|
+
return out;
|
|
2579
|
+
}
|