@apmantza/greedysearch-pi 1.9.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,16 +6,19 @@
6
6
  // no-API browser engines and source fetchers instead of Firecrawl/OpenAI.
7
7
 
8
8
  import { spawn } from "node:child_process";
9
+ import { mkdirSync, writeFileSync } from "node:fs";
9
10
  import { join } from "node:path";
10
11
  import { fileURLToPath } from "node:url";
11
12
  import {
12
13
  buildSourceRegistry,
14
+ classifySourceType,
13
15
  computeCompositeScore,
14
16
  mergeFetchDataIntoSources,
15
17
  normalizeUrl,
16
18
  trimText,
17
19
  } from "./sources.mjs";
18
20
  import { parseStructuredJson } from "./synthesis.mjs";
21
+ import { RESEARCH_ENGINES } from "./constants.mjs";
19
22
  import { runGeminiPrompt } from "./synthesis-runner.mjs";
20
23
 
21
24
  const __dir = fileURLToPath(new URL(".", import.meta.url)).replace(
@@ -23,6 +26,33 @@ const __dir = fileURLToPath(new URL(".", import.meta.url)).replace(
23
26
  "$1",
24
27
  );
25
28
  const SEARCH_BIN = join(__dir, "..", "..", "bin", "search.mjs");
29
+ const DEFAULT_RESEARCH_BUNDLE_ROOT = join(
30
+ process.cwd(),
31
+ ".pi",
32
+ "greedysearch-research",
33
+ );
34
+
35
+ function slugifyResearchName(value) {
36
+ const slug = String(value || "research")
37
+ .toLowerCase()
38
+ .replaceAll(/[^a-z0-9]+/g, "-")
39
+ .replaceAll(/^-|-$/g, "")
40
+ .slice(0, 60);
41
+ return slug || "research";
42
+ }
43
+
44
+ function uniqueStrings(items, limit = Infinity) {
45
+ const seen = new Set();
46
+ const out = [];
47
+ for (const item of items || []) {
48
+ const clean = trimText(String(item || ""), 1000);
49
+ if (!clean || seen.has(clean)) continue;
50
+ seen.add(clean);
51
+ out.push(clean);
52
+ if (out.length >= limit) break;
53
+ }
54
+ return out;
55
+ }
26
56
 
27
57
  async function fetchMultipleResearchSources(...args) {
28
58
  const { fetchMultipleSources } = await import("./fetch-source.mjs");
@@ -327,22 +357,28 @@ export function buildFallbackQueriesFromGaps(
327
357
  ) {
328
358
  const fallbacks = [];
329
359
  const angles = [
330
- { template: (g) => `${g} official documentation`, label: "official docs" },
331
360
  {
332
- template: (g) => `${g} GitHub issues discussions`,
361
+ template: (gap) => `${gap} official documentation`,
362
+ label: "official docs",
363
+ },
364
+ {
365
+ template: (gap) => `${gap} GitHub issues discussions`,
333
366
  label: "community signals",
334
367
  },
335
368
  {
336
- template: (g) => `${g} benchmarks performance comparison`,
369
+ template: (gap) => `${gap} benchmarks performance comparison`,
337
370
  label: "benchmarks",
338
371
  },
339
- { template: (g) => `${g} limitations risks caveats`, label: "limitations" },
340
372
  {
341
- template: (g) => `${g} production deployment experience`,
373
+ template: (gap) => `${gap} limitations risks caveats`,
374
+ label: "limitations",
375
+ },
376
+ {
377
+ template: (gap) => `${gap} production deployment experience`,
342
378
  label: "production usage",
343
379
  },
344
380
  {
345
- template: (g) => `${originalQuery} ${g} counter evidence`,
381
+ template: (gap) => `${originalQuery} ${gap} counter evidence`,
346
382
  label: "counter-evidence",
347
383
  },
348
384
  ];
@@ -350,7 +386,7 @@ export function buildFallbackQueriesFromGaps(
350
386
  for (let i = 0; i < gaps.length && fallbacks.length < nextBreadth; i++) {
351
387
  const gap = gaps[i];
352
388
  const angle = angles[i % angles.length];
353
- const candidate = angle.template(originalQuery, gap);
389
+ const candidate = angle.template(gap);
354
390
  if (!isDuplicateQuery(candidate, usedQueries, { roundIndex })) {
355
391
  fallbacks.push({
356
392
  query: candidate,
@@ -438,7 +474,9 @@ async function evaluateResearchQuality(
438
474
 
439
475
  function summarizeEngineAnswers(result) {
440
476
  const summaries = {};
441
- for (const engine of ["perplexity", "bing", "google"]) {
477
+ for (const engine of Object.keys(result || {}).filter(
478
+ (key) => !key.startsWith("_"),
479
+ )) {
442
480
  const value = result?.[engine];
443
481
  if (!value) continue;
444
482
  summaries[engine] = value.error
@@ -598,9 +636,10 @@ async function executeResearchAction(
598
636
  engines: ["fetch"],
599
637
  engineCount: 1,
600
638
  perEngine: {},
601
- sourceType: classifySourceTypeFromDomain(
639
+ sourceType: classifySourceType(
602
640
  domain,
603
641
  fetchResult.title || "",
642
+ fetchResult.finalUrl || normalizedUrl,
604
643
  ),
605
644
  isOfficial: false,
606
645
  smartScore: 0,
@@ -737,30 +776,6 @@ function getDomainFromUrl(rawUrl) {
737
776
  }
738
777
  }
739
778
 
740
- function classifySourceTypeFromDomain(domain, title = "") {
741
- const { matchesDomain, SOCIAL_HOSTS, COMMUNITY_HOSTS, NEWS_HOSTS } =
742
- require("./sources.mjs");
743
- const lowerTitle = title.toLowerCase();
744
-
745
- if (domain === "github.com" || domain === "gitlab.com") return "repo";
746
- if (matchesDomain(domain, SOCIAL_HOSTS)) return "social";
747
- if (matchesDomain(domain, COMMUNITY_HOSTS)) return "community";
748
- if (matchesDomain(domain, NEWS_HOSTS)) return "news";
749
- if (
750
- domain.startsWith("docs.") ||
751
- domain.startsWith("developer.") ||
752
- domain.startsWith("developers.") ||
753
- domain.startsWith("api.") ||
754
- lowerTitle.includes("documentation") ||
755
- lowerTitle.includes("docs") ||
756
- lowerTitle.includes("reference")
757
- ) {
758
- return "official-docs";
759
- }
760
- if (domain.startsWith("blog.")) return "maintainer-blog";
761
- return "website";
762
- }
763
-
764
779
  /**
765
780
  * Normalize a GitHub root/tree URL into specific fetchable pages.
766
781
  * Expands github.com/owner/repo into [README, CONTRIBUTING, CHANGELOG, key files].
@@ -855,11 +870,161 @@ export function queriesToActions(queries) {
855
870
  .filter((a) => a.query);
856
871
  }
857
872
 
873
+ function sourceKey(source) {
874
+ return (
875
+ normalizeUrl(
876
+ source?.finalUrl || source?.canonicalUrl || source?.url || "",
877
+ ) ||
878
+ source?.id ||
879
+ ""
880
+ );
881
+ }
882
+
883
+ function buildEvidenceExtractionPrompt(
884
+ originalQuery,
885
+ questions,
886
+ fetchedSources,
887
+ alreadyExtracted = new Set(),
888
+ ) {
889
+ const openQuestions = (questions || [])
890
+ .filter((q) => q.status !== "closed")
891
+ .slice(0, 12)
892
+ .map((q) => ({ id: q.id, question: q.question }));
893
+ const sourceSnippets = (fetchedSources || [])
894
+ .filter((source) => source?.content || source?.snippet)
895
+ .filter((source) => !alreadyExtracted.has(sourceKey(source)))
896
+ .slice(0, 6)
897
+ .map((source, index) => ({
898
+ id: source.id || `F${index + 1}`,
899
+ title: source.title || "",
900
+ url: source.finalUrl || source.url || source.canonicalUrl || "",
901
+ content: trimText(source.content || source.snippet || "", 5000),
902
+ }));
903
+
904
+ return [
905
+ "You are doing goal-based evidence extraction for an iterative research run.",
906
+ "For each source, extract only information that helps answer the open questions.",
907
+ "Use original wording/details where useful. Do not invent answers; leave questions open if evidence is insufficient.",
908
+ "If a source answers one or more tracked questions, identify those question IDs explicitly.",
909
+ "Also propose genuinely new sub-questions discovered from the evidence.",
910
+ "",
911
+ `Original research question: ${originalQuery}`,
912
+ `Open question ledger: ${JSON.stringify(openQuestions, null, 2)}`,
913
+ `Fetched sources: ${JSON.stringify(sourceSnippets, null, 2)}`,
914
+ "",
915
+ "Respond ONLY with JSON wrapped in BEGIN_JSON / END_JSON markers:",
916
+ "BEGIN_JSON",
917
+ JSON.stringify(
918
+ {
919
+ extractions: [
920
+ {
921
+ sourceId: "S1",
922
+ url: "https://example.com/source",
923
+ rational: "why this source matters for the goal",
924
+ evidence:
925
+ "specific quoted/paraphrased evidence with numbers, dates, caveats",
926
+ summary: "concise contribution to the research question",
927
+ answers: [
928
+ {
929
+ id: "Q1",
930
+ evidence: "brief evidence that closes the question",
931
+ },
932
+ ],
933
+ newQuestions: ["new sub-question raised by this source"],
934
+ },
935
+ ],
936
+ },
937
+ null,
938
+ 2,
939
+ ),
940
+ "END_JSON",
941
+ ].join("\n");
942
+ }
943
+
944
+ function normalizeEvidenceExtractions(payload, fetchedSources) {
945
+ const raw = Array.isArray(payload?.extractions) ? payload.extractions : [];
946
+ const byUrl = new Map();
947
+ const byId = new Map();
948
+ for (const source of fetchedSources || []) {
949
+ if (source?.id) byId.set(String(source.id), source);
950
+ const key = sourceKey(source);
951
+ if (key) byUrl.set(key, source);
952
+ }
953
+ return raw
954
+ .map((item) => {
955
+ const source =
956
+ byId.get(String(item?.sourceId || "")) ||
957
+ byUrl.get(normalizeUrl(item?.url || "") || "");
958
+ const sourceId = String(item?.sourceId || source?.id || "");
959
+ const url = normalizeUrl(
960
+ item?.url || source?.finalUrl || source?.url || "",
961
+ );
962
+ const answers = Array.isArray(item?.answers)
963
+ ? item.answers
964
+ .map((answer) => ({
965
+ id: String(answer?.id || ""),
966
+ evidence: trimText(answer?.evidence || "", 500),
967
+ sourceIds: [sourceId].filter(Boolean),
968
+ }))
969
+ .filter((answer) => answer.id)
970
+ : [];
971
+ return {
972
+ sourceId,
973
+ url,
974
+ title: source?.title || item?.title || "",
975
+ rational: trimText(item?.rational || "", 700),
976
+ evidence: trimText(item?.evidence || "", 1600),
977
+ summary: trimText(item?.summary || "", 700),
978
+ answers,
979
+ newQuestions: uniqueStrings(item?.newQuestions || [], 6),
980
+ };
981
+ })
982
+ .filter(
983
+ (item) => item.sourceId || item.url || item.summary || item.evidence,
984
+ );
985
+ }
986
+
987
+ async function extractEvidenceFromSources({
988
+ query,
989
+ questions,
990
+ fetchedSources,
991
+ extractedSourceKeys,
992
+ }) {
993
+ const pending = (fetchedSources || []).filter(
994
+ (source) =>
995
+ (source?.content || source?.snippet) &&
996
+ !extractedSourceKeys.has(sourceKey(source)),
997
+ );
998
+ if (pending.length === 0) return { evidence: [], error: "" };
999
+ try {
1000
+ const raw = await runGeminiPrompt(
1001
+ buildEvidenceExtractionPrompt(
1002
+ query,
1003
+ questions,
1004
+ pending,
1005
+ extractedSourceKeys,
1006
+ ),
1007
+ { timeoutMs: 120000 },
1008
+ );
1009
+ const parsed = parseGeminiJson(raw, { extractions: [] });
1010
+ const evidence = normalizeEvidenceExtractions(parsed, pending);
1011
+ for (const source of pending) {
1012
+ const key = sourceKey(source);
1013
+ if (key) extractedSourceKeys.add(key);
1014
+ }
1015
+ return { evidence, error: "" };
1016
+ } catch (error) {
1017
+ return { evidence: [], error: error.message || String(error) };
1018
+ }
1019
+ }
1020
+
858
1021
  function buildLearningPrompt(
859
1022
  originalQuery,
860
1023
  roundQueries,
861
1024
  searchSummaries,
862
1025
  fetchedSources,
1026
+ questions = [],
1027
+ evidenceItems = [],
863
1028
  ) {
864
1029
  const sourceSnippets = fetchedSources
865
1030
  .filter((source) => source?.content || source?.snippet)
@@ -878,6 +1043,8 @@ function buildLearningPrompt(
878
1043
  "",
879
1044
  `Original research question: ${originalQuery}`,
880
1045
  `Round queries: ${JSON.stringify(roundQueries, null, 2)}`,
1046
+ `Question ledger: ${JSON.stringify(questions, null, 2)}`,
1047
+ `Extracted source evidence: ${JSON.stringify(evidenceItems.slice(-12), null, 2)}`,
881
1048
  `Engine summaries: ${JSON.stringify(searchSummaries, null, 2)}`,
882
1049
  `Fetched source snippets: ${JSON.stringify(sourceSnippets, null, 2)}`,
883
1050
  "",
@@ -886,6 +1053,14 @@ function buildLearningPrompt(
886
1053
  JSON.stringify(
887
1054
  {
888
1055
  learnings: ["concise, information-dense learning"],
1056
+ answeredQuestions: [
1057
+ {
1058
+ id: "Q1",
1059
+ evidence: "brief evidence that closes this question",
1060
+ sourceIds: ["S1"],
1061
+ },
1062
+ ],
1063
+ newQuestions: ["new sub-question discovered from the evidence"],
889
1064
  followUpQueries: ["specific next search query"],
890
1065
  gaps: ["important uncertainty or missing evidence"],
891
1066
  },
@@ -896,7 +1071,13 @@ function buildLearningPrompt(
896
1071
  ].join("\n");
897
1072
  }
898
1073
 
899
- function buildFinalReportPrompt(originalQuery, rounds, sources) {
1074
+ function buildFinalReportPrompt(
1075
+ originalQuery,
1076
+ rounds,
1077
+ sources,
1078
+ questions = [],
1079
+ evidenceItems = [],
1080
+ ) {
900
1081
  const learnings = rounds.flatMap((round) => round.learnings || []);
901
1082
  const gaps = rounds.flatMap((round) => round.gaps || []);
902
1083
  const sourceRegistry = sources.slice(0, 12).map((source) => ({
@@ -932,6 +1113,8 @@ function buildFinalReportPrompt(originalQuery, rounds, sources) {
932
1113
  `Original research question: ${originalQuery}`,
933
1114
  `Learnings: ${JSON.stringify(learnings, null, 2)}`,
934
1115
  `Known gaps/caveats: ${JSON.stringify(gaps, null, 2)}`,
1116
+ `Question ledger: ${JSON.stringify(questions, null, 2)}`,
1117
+ `Goal-based extracted evidence: ${JSON.stringify(evidenceItems.slice(-20), null, 2)}`,
935
1118
  `Source registry: ${JSON.stringify(sourceRegistry, null, 2)}`,
936
1119
  "",
937
1120
  "Respond ONLY with JSON wrapped in BEGIN_JSON / END_JSON markers:",
@@ -961,6 +1144,81 @@ function buildFinalReportPrompt(originalQuery, rounds, sources) {
961
1144
  ].join("\n");
962
1145
  }
963
1146
 
1147
+ /**
1148
+ * Build a synthesis prompt that derives the final report directly from
1149
+ * previously extracted evidence (no per-round learnings required). This is
1150
+ * used as a fallback when the regular final-report path returns no
1151
+ * structured learnings (for example when Gemini's input field rejected the
1152
+ * per-round learning prompt but the goal-based extraction step succeeded).
1153
+ */
1154
+ function buildSynthesisFromEvidencePrompt(
1155
+ originalQuery,
1156
+ sources = [],
1157
+ questions = [],
1158
+ evidenceItems = [],
1159
+ ) {
1160
+ const sourceRegistry = sources.slice(0, 12).map((source) => ({
1161
+ id: source.id,
1162
+ title: source.title,
1163
+ domain: source.domain,
1164
+ url: source.canonicalUrl,
1165
+ type: source.sourceType,
1166
+ engines: source.engines,
1167
+ }));
1168
+ const evidenceSlice = evidenceItems.slice(-20);
1169
+ const answerableQuestionIds = new Set();
1170
+ for (const item of evidenceSlice) {
1171
+ for (const ans of item.answers || []) {
1172
+ if (ans?.id) answerableQuestionIds.add(ans.id);
1173
+ }
1174
+ }
1175
+ const openQuestionSummary = (questions || [])
1176
+ .filter((q) => q.status !== "closed")
1177
+ .map((q) => ({ id: q.id, question: q.question }));
1178
+
1179
+ return [
1180
+ "You are writing the final research report from goal-based extracted evidence.",
1181
+ "Per-round learnings were not produced, but the per-source evidence extraction step succeeded.",
1182
+ "Synthesize a thorough markdown report using ONLY the evidence below. Every substantive claim MUST be backed by an [S1] citation.",
1183
+ "",
1184
+ "Report structure:",
1185
+ "1. ## Summary — A 2-4 sentence executive summary of findings",
1186
+ "2. ## Key Findings — The main findings, organized by theme or question, each with inline citations",
1187
+ "3. ## Limitations & Caveats — Important qualifiers, gaps, or uncertainties",
1188
+ "",
1189
+ `Original research question: ${originalQuery}`,
1190
+ `Per-source extracted evidence: ${JSON.stringify(evidenceSlice, null, 2)}`,
1191
+ `Source registry: ${JSON.stringify(sourceRegistry, null, 2)}`,
1192
+ `Questions already answered by the evidence: ${JSON.stringify(Array.from(answerableQuestionIds))}`,
1193
+ `Questions still open after this evidence: ${JSON.stringify(openQuestionSummary)}`,
1194
+ "",
1195
+ "Respond ONLY with JSON wrapped in BEGIN_JSON / END_JSON markers:",
1196
+ "BEGIN_JSON",
1197
+ JSON.stringify(
1198
+ {
1199
+ answer: "markdown report with sections and inline [S1] citations",
1200
+ agreement: {
1201
+ level: "high|medium|low|mixed|conflicting",
1202
+ summary: "one-sentence confidence summary",
1203
+ },
1204
+ differences: ["notable disagreement or conflict between sources"],
1205
+ caveats: ["important caveat or qualification"],
1206
+ claims: [
1207
+ {
1208
+ claim: "specific factual statement supported by the evidence",
1209
+ support: "strong|moderate|weak|conflicting",
1210
+ sourceIds: ["S1", "S2"],
1211
+ },
1212
+ ],
1213
+ recommendedSources: ["S1", "S2"],
1214
+ },
1215
+ null,
1216
+ 2,
1217
+ ),
1218
+ "END_JSON",
1219
+ ].join("\n");
1220
+ }
1221
+
964
1222
  async function runFastAllSearch(query, { locale = null, short = true } = {}) {
965
1223
  const args = [SEARCH_BIN, "all", "--inline", "--stdin", "--fast"];
966
1224
  if (!short) args.push("--full");
@@ -1046,7 +1304,9 @@ function shouldForwardChildStderr(line) {
1046
1304
  return (
1047
1305
  /^PROGRESS:/.test(line) ||
1048
1306
  /^\[greedysearch\]/.test(line) ||
1049
- /^\[(bing|perplexity|google|gemini)\]/.test(line) ||
1307
+ /^\[(bing|perplexity|google|gemini|chatgpt|logically|semantic-scholar)\]/.test(
1308
+ line,
1309
+ ) ||
1050
1310
  /^GreedySearch Chrome/.test(line) ||
1051
1311
  /^Launching GreedySearch Chrome/.test(line) ||
1052
1312
  /^Headless mode/.test(line) ||
@@ -1141,6 +1401,530 @@ export function auditCitations(answer, sources) {
1141
1401
  };
1142
1402
  }
1143
1403
 
1404
+ export function computeResearchFloor({
1405
+ sources = [],
1406
+ fetchedSources = [],
1407
+ synthesis = {},
1408
+ citationAudit = null,
1409
+ gaps = [],
1410
+ questions = [],
1411
+ rounds = [],
1412
+ qualityScore = 0,
1413
+ qualityThreshold = 8.5,
1414
+ maxSources = 8,
1415
+ requireCitations = true,
1416
+ requireQuestions = true,
1417
+ } = {}) {
1418
+ const fetchedOk = fetchedSources.filter(
1419
+ (source) =>
1420
+ source?.fetch?.ok ||
1421
+ (source?.contentChars || 0) > 100 ||
1422
+ String(source?.content || "").length > 100,
1423
+ );
1424
+ const primarySources = sources.filter((source) =>
1425
+ ["official-docs", "repo", "maintainer-blog", "academic"].includes(
1426
+ String(source?.sourceType || ""),
1427
+ ),
1428
+ );
1429
+ const claims = Array.isArray(synthesis?.claims) ? synthesis.claims : [];
1430
+ const citedCount = citationAudit ? citationAudit.cited?.length || 0 : 0;
1431
+ const questionStats = questionProgress(questions);
1432
+ // Follow-up questions discovered during a run are useful handoff gaps, not a
1433
+ // reason to fail a short research run forever. The deterministic floor only
1434
+ // requires the original/root questions to close; newly-created questions stay
1435
+ // visible in STATUS.md and `gaps` for deeper follow-up rounds.
1436
+ const requiredQuestions = (questions || []).filter(
1437
+ (q) => !q.createdRound || q.reason === "Original research question",
1438
+ );
1439
+ const requiredQuestionStats = questionProgress(requiredQuestions);
1440
+ const minFetched = Math.min(4, Math.max(2, Number(maxSources) || 8));
1441
+ const checks = {
1442
+ roundsRun: rounds.length >= 1,
1443
+ fetchedSources: fetchedOk.length >= minFetched,
1444
+ primarySources: primarySources.length >= 1,
1445
+ qualityScore: qualityScore >= Math.min(qualityThreshold, 8),
1446
+ claimsExtracted: !requireCitations || claims.length > 0,
1447
+ citationsPresent: !requireCitations || citedCount > 0,
1448
+ citationsValid: !requireCitations || citationAudit?.ok === true,
1449
+ unfetchedCitations:
1450
+ !requireCitations || (citationAudit?.unfetched || []).length === 0,
1451
+ requiredQuestionsClosed:
1452
+ !requireQuestions || requiredQuestionStats.open === 0,
1453
+ };
1454
+ return {
1455
+ floorMet: Object.values(checks).every(Boolean),
1456
+ checks,
1457
+ metrics: {
1458
+ fetchedOk: fetchedOk.length,
1459
+ primarySources: primarySources.length,
1460
+ claims: claims.length,
1461
+ cited: citedCount,
1462
+ gaps: gaps.length,
1463
+ openQuestions: questionStats.open,
1464
+ closedQuestions: questionStats.closed,
1465
+ totalQuestions: questionStats.total,
1466
+ openRequiredQuestions: requiredQuestionStats.open,
1467
+ closedRequiredQuestions: requiredQuestionStats.closed,
1468
+ totalRequiredQuestions: requiredQuestionStats.total,
1469
+ qualityScore,
1470
+ minFetched,
1471
+ },
1472
+ };
1473
+ }
1474
+
1475
+ function annotateFetchedSourcesWithIds(fetchedSources, sources) {
1476
+ const byUrl = new Map();
1477
+ for (const source of sources || []) {
1478
+ const key = normalizeUrl(
1479
+ source?.canonicalUrl || source?.finalUrl || source?.url,
1480
+ );
1481
+ if (key && source?.id) byUrl.set(key, source.id);
1482
+ }
1483
+ return (fetchedSources || []).map((source, index) => {
1484
+ const key = normalizeUrl(
1485
+ source?.finalUrl || source?.canonicalUrl || source?.url,
1486
+ );
1487
+ return {
1488
+ ...source,
1489
+ id: source?.id || byUrl.get(key) || `F${index + 1}`,
1490
+ };
1491
+ });
1492
+ }
1493
+
1494
+ export function createQuestionLedger(query) {
1495
+ return [
1496
+ {
1497
+ id: "Q1",
1498
+ question: trimText(sanitizeResearchQuery(query), 320),
1499
+ status: "open",
1500
+ reason: "Original research question",
1501
+ evidence: [],
1502
+ sourceIds: [],
1503
+ },
1504
+ ];
1505
+ }
1506
+
1507
+ function nextQuestionId(questions) {
1508
+ let max = 0;
1509
+ for (const q of questions || []) {
1510
+ const n = Number.parseInt(String(q.id || "").replace(/^Q/i, ""), 10);
1511
+ if (Number.isFinite(n)) max = Math.max(max, n);
1512
+ }
1513
+ return `Q${max + 1}`;
1514
+ }
1515
+
1516
+ function findSimilarQuestion(questions, question) {
1517
+ const normalized = sanitizeResearchQuery(question).toLowerCase();
1518
+ return (questions || []).find(
1519
+ (q) =>
1520
+ q.question?.toLowerCase() === normalized ||
1521
+ jaccardSimilarity(q.question || "", normalized) >= 0.82,
1522
+ );
1523
+ }
1524
+
1525
+ function addQuestion(questions, question, { reason = "", round = null } = {}) {
1526
+ const clean = trimText(sanitizeResearchQuery(question), 320);
1527
+ if (!clean) return null;
1528
+ const existing = findSimilarQuestion(questions, clean);
1529
+ if (existing) return existing;
1530
+ const item = {
1531
+ id: nextQuestionId(questions),
1532
+ question: clean,
1533
+ status: "open",
1534
+ reason: trimText(reason, 240),
1535
+ createdRound: round,
1536
+ evidence: [],
1537
+ sourceIds: [],
1538
+ };
1539
+ questions.push(item);
1540
+ return item;
1541
+ }
1542
+
1543
+ function closeQuestion(
1544
+ questions,
1545
+ idOrQuestion,
1546
+ { evidence = "", sourceIds = [], round = null } = {},
1547
+ ) {
1548
+ const target =
1549
+ questions.find((q) => q.id === idOrQuestion) ||
1550
+ findSimilarQuestion(questions, idOrQuestion);
1551
+ if (!target) return null;
1552
+ target.status = "closed";
1553
+ target.closedRound = target.closedRound || round;
1554
+ if (evidence)
1555
+ target.evidence = uniqueStrings([...(target.evidence || []), evidence], 4);
1556
+ if (Array.isArray(sourceIds)) {
1557
+ target.sourceIds = uniqueStrings(
1558
+ [...(target.sourceIds || []), ...sourceIds],
1559
+ 8,
1560
+ );
1561
+ }
1562
+ return target;
1563
+ }
1564
+
1565
+ function questionProgress(questions) {
1566
+ const total = questions.length;
1567
+ const closed = questions.filter((q) => q.status === "closed").length;
1568
+ return { total, closed, open: Math.max(0, total - closed) };
1569
+ }
1570
+
1571
+ export function updateQuestionLedger(
1572
+ questions,
1573
+ { roundNumber, actions = [], learningPayload = {} } = {},
1574
+ ) {
1575
+ for (const run of actions) {
1576
+ const action = run?.action || run;
1577
+ const goal =
1578
+ action?.researchGoal && action.researchGoal !== "Original user query"
1579
+ ? action.researchGoal
1580
+ : action?.query || action?.url || "";
1581
+ if (goal) {
1582
+ addQuestion(questions, goal, {
1583
+ reason: "Planned research action",
1584
+ round: roundNumber,
1585
+ });
1586
+ }
1587
+ }
1588
+
1589
+ // Cap the open-question ledger growth. Discovered gap/follow-up questions
1590
+ // are useful handoffs but Gemini tends to emit one per evidence slot, which
1591
+ // blows up the ledger and inflates the `requiredQuestionsClosed` floor
1592
+ // check. Keep at most MAX_OPEN_FOLLOWUPS of them across the whole run;
1593
+ // older ones are auto-resolved as "covered by later evidence" so they
1594
+ // don't block the floor forever.
1595
+ const MAX_OPEN_FOLLOWUPS = 5;
1596
+ const followupOpen = questions.filter(
1597
+ (q) => q.status === "open" && q.reason === "Discovered gap/follow-up",
1598
+ );
1599
+ if (followupOpen.length > MAX_OPEN_FOLLOWUPS) {
1600
+ const overflow = followupOpen
1601
+ .sort((a, b) => (a.createdRound || 0) - (b.createdRound || 0))
1602
+ .slice(0, followupOpen.length - MAX_OPEN_FOLLOWUPS);
1603
+ for (const q of overflow) {
1604
+ q.status = "resolved";
1605
+ q.closedRound = roundNumber;
1606
+ q.evidence = uniqueStrings(
1607
+ [...(q.evidence || []), "Auto-resolved to cap open-question ledger"],
1608
+ 4,
1609
+ );
1610
+ }
1611
+ }
1612
+
1613
+ const answered = Array.isArray(learningPayload.answeredQuestions)
1614
+ ? learningPayload.answeredQuestions
1615
+ : [];
1616
+ for (const item of answered) {
1617
+ if (typeof item === "string") {
1618
+ closeQuestion(questions, item, { round: roundNumber });
1619
+ continue;
1620
+ }
1621
+ const id = item?.id || item?.question;
1622
+ if (!id && item?.question) {
1623
+ const added = addQuestion(questions, item.question, {
1624
+ reason: "Answered during learning extraction",
1625
+ round: roundNumber,
1626
+ });
1627
+ if (added) closeQuestion(questions, added.id, { round: roundNumber });
1628
+ continue;
1629
+ }
1630
+ closeQuestion(questions, id, {
1631
+ evidence: item?.evidence || item?.answer || "",
1632
+ sourceIds: Array.isArray(item?.sourceIds) ? item.sourceIds : [],
1633
+ round: roundNumber,
1634
+ });
1635
+ }
1636
+
1637
+ // Keep STATUS.md as a true question ledger, not a dump of every search query
1638
+ // or caveat. Follow-up queries and raw gaps stay in their own fields; only
1639
+ // explicit newQuestions become open ledger items.
1640
+ const newQuestions = Array.isArray(learningPayload.newQuestions)
1641
+ ? learningPayload.newQuestions
1642
+ : [];
1643
+ for (const question of newQuestions) {
1644
+ addQuestion(questions, question, {
1645
+ reason: "Discovered gap/follow-up",
1646
+ round: roundNumber,
1647
+ });
1648
+ }
1649
+
1650
+ return questions;
1651
+ }
1652
+
1653
+ /**
1654
+ * Pick direct-fetch targets from known academic source domains (arXiv,
1655
+ * semanticscholar.org, DOI redirect). Returns the canonical URL plus a
1656
+ * short label for the researchGoal. Filters out anything already fetched.
1657
+ */
1658
+ function pickAcademicFetchTargets(combinedSources, usedUrls) {
1659
+ if (!Array.isArray(combinedSources) || combinedSources.length === 0)
1660
+ return [];
1661
+ const ACADEMIC_HOSTS = ["arxiv.org", "semanticscholar.org", "doi.org"];
1662
+ const seen = new Set();
1663
+ const targets = [];
1664
+ for (const source of combinedSources) {
1665
+ const url = source?.canonicalUrl || source?.finalUrl || source?.url || "";
1666
+ if (!url) continue;
1667
+ let domain = "";
1668
+ try {
1669
+ domain = new URL(url).hostname.toLowerCase().replace(/^www\./, "");
1670
+ } catch {
1671
+ continue;
1672
+ }
1673
+ if (!ACADEMIC_HOSTS.some((h) => domain === h || domain.endsWith(`.${h}`))) {
1674
+ continue;
1675
+ }
1676
+ if (usedUrls.has(url) || seen.has(url)) continue;
1677
+ seen.add(url);
1678
+ // Prefer the HTML/abs page over PDF for direct fetch — the source
1679
+ // fetcher handles both, but the HTML page gives the synthesizer
1680
+ // readable text + abstract immediately.
1681
+ const htmlUrl = url.includes("/pdf/")
1682
+ ? url.replace(/\/pdf\//, "/html/").replace(/\.pdf$/i, "")
1683
+ : url;
1684
+ targets.push({
1685
+ url: htmlUrl,
1686
+ label: source?.title || source?.id || domain,
1687
+ });
1688
+ }
1689
+ return targets.slice(0, 2);
1690
+ }
1691
+
1692
+ function reconcileQuestionsFromSynthesis(questions, synthesis, citationAudit) {
1693
+ if (!synthesis?.answer || citationAudit?.ok !== true) return questions;
1694
+ const claims = Array.isArray(synthesis.claims) ? synthesis.claims : [];
1695
+ const citedIds = Array.isArray(citationAudit.cited)
1696
+ ? citationAudit.cited
1697
+ : [];
1698
+ if (claims.length === 0 || citedIds.length === 0) return questions;
1699
+
1700
+ for (const question of questions) {
1701
+ if (question.status === "closed") continue;
1702
+ let bestClaim = null;
1703
+ let bestScore = 0;
1704
+ for (const claim of claims) {
1705
+ const score = jaccardSimilarity(
1706
+ question.question || "",
1707
+ claim.claim || "",
1708
+ );
1709
+ if (score > bestScore) {
1710
+ bestScore = score;
1711
+ bestClaim = claim;
1712
+ }
1713
+ }
1714
+ if (question.id === "Q1" || bestScore >= 0.18) {
1715
+ closeQuestion(questions, question.id, {
1716
+ evidence: bestClaim?.claim || "Answered in final cited synthesis",
1717
+ sourceIds: Array.isArray(bestClaim?.sourceIds)
1718
+ ? bestClaim.sourceIds
1719
+ : citedIds.slice(0, 4),
1720
+ });
1721
+ }
1722
+ }
1723
+ return questions;
1724
+ }
1725
+
1726
+ function renderQuestionStatus(questions) {
1727
+ if (!questions.length) return "No tracked questions.";
1728
+ return questions
1729
+ .map((q) => {
1730
+ const ids = q.sourceIds?.length ? ` (${q.sourceIds.join(", ")})` : "";
1731
+ return `- [${q.status === "closed" ? "x" : " "}] ${q.id}: ${q.question}${ids}`;
1732
+ })
1733
+ .join("\n");
1734
+ }
1735
+
1736
+ function markdownList(items, fallback = "None recorded.") {
1737
+ const unique = uniqueStrings(items);
1738
+ return unique.length
1739
+ ? unique.map((item) => `- ${item}`).join("\n")
1740
+ : fallback;
1741
+ }
1742
+
1743
+ async function writeResearchBundle({
1744
+ query,
1745
+ rounds,
1746
+ sources,
1747
+ fetchedSources,
1748
+ evidenceItems = [],
1749
+ synthesis,
1750
+ citationAudit,
1751
+ floor,
1752
+ manifest,
1753
+ allGaps = [],
1754
+ questions = [],
1755
+ outDir = null,
1756
+ }) {
1757
+ const stamp = new Date().toISOString().replaceAll(/[:.]/g, "-").slice(0, 19);
1758
+ const dir =
1759
+ outDir ||
1760
+ join(
1761
+ DEFAULT_RESEARCH_BUNDLE_ROOT,
1762
+ `${stamp}_${slugifyResearchName(query)}`,
1763
+ );
1764
+ const reportsDir = join(dir, "reports");
1765
+ const sourcesDir = join(dir, "sources");
1766
+ const dataDir = join(dir, "data");
1767
+ mkdirSync(reportsDir, { recursive: true });
1768
+ mkdirSync(sourcesDir, { recursive: true });
1769
+ mkdirSync(dataDir, { recursive: true });
1770
+
1771
+ const sourceFiles = await writeResearchSourcesToFiles(
1772
+ fetchedSources,
1773
+ sourcesDir,
1774
+ );
1775
+ const gaps = uniqueStrings([
1776
+ ...allGaps,
1777
+ ...rounds.flatMap((round) => round.gaps || []),
1778
+ ]);
1779
+ writeFileSync(
1780
+ join(dir, "STATUS.md"),
1781
+ [
1782
+ floor.floorMet ? "STATUS: DONE" : "STATUS: PARTIAL",
1783
+ "",
1784
+ `Query: ${query}`,
1785
+ `Stop reason: ${manifest.terminationReason || "max_rounds"}`,
1786
+ "",
1787
+ "## Deterministic floor checks",
1788
+ ...Object.entries(floor.checks).map(
1789
+ ([name, ok]) => `- [${ok ? "x" : " "}] ${name}`,
1790
+ ),
1791
+ "",
1792
+ "## Questions",
1793
+ renderQuestionStatus(questions),
1794
+ "",
1795
+ "## Open gaps",
1796
+ markdownList(gaps),
1797
+ "",
1798
+ ].join("\n"),
1799
+ "utf8",
1800
+ );
1801
+ writeFileSync(
1802
+ join(dir, "OUTLINE.md"),
1803
+ [
1804
+ "# Research bundle outline",
1805
+ "",
1806
+ "- `reports/SUMMARY.md` — final cited report",
1807
+ "- `reports/CLAIMS.md` — extracted claims with support/source IDs",
1808
+ "- `reports/EVIDENCE.md` — goal-based source evidence",
1809
+ "- `reports/GAPS.md` — remaining caveats and uncertainties",
1810
+ "- `sources/` — fetched source markdown files",
1811
+ "- `data/manifest.json` — machine-readable run metadata",
1812
+ "- `data/rounds.json` — per-round actions/learnings/gaps",
1813
+ "- `data/sources.json` — ranked source registry",
1814
+ "- `data/questions.json` — open/closed question ledger",
1815
+ "",
1816
+ ].join("\n"),
1817
+ "utf8",
1818
+ );
1819
+ writeFileSync(
1820
+ join(reportsDir, "SUMMARY.md"),
1821
+ String(synthesis.answer || ""),
1822
+ "utf8",
1823
+ );
1824
+ writeFileSync(
1825
+ join(reportsDir, "CLAIMS.md"),
1826
+ [
1827
+ "# Key claims",
1828
+ "",
1829
+ ...(Array.isArray(synthesis.claims) && synthesis.claims.length
1830
+ ? synthesis.claims.map((claim) => {
1831
+ const ids = Array.isArray(claim.sourceIds)
1832
+ ? claim.sourceIds.join(", ")
1833
+ : "";
1834
+ return `- ${claim.claim || ""} (${claim.support || "support unknown"}${ids ? `; ${ids}` : ""})`;
1835
+ })
1836
+ : ["No structured claims were extracted."]),
1837
+ "",
1838
+ ].join("\n"),
1839
+ "utf8",
1840
+ );
1841
+ writeFileSync(
1842
+ join(reportsDir, "EVIDENCE.md"),
1843
+ [
1844
+ "# Extracted evidence",
1845
+ "",
1846
+ ...(evidenceItems.length
1847
+ ? evidenceItems.map((item) =>
1848
+ [
1849
+ `## ${item.sourceId || item.url || "Source"}`,
1850
+ item.url ? `<${item.url}>` : "",
1851
+ item.rational ? `**Rational:** ${item.rational}` : "",
1852
+ item.evidence ? `**Evidence:** ${item.evidence}` : "",
1853
+ item.summary ? `**Summary:** ${item.summary}` : "",
1854
+ "",
1855
+ ]
1856
+ .filter(Boolean)
1857
+ .join("\n"),
1858
+ )
1859
+ : ["No goal-based evidence was extracted."]),
1860
+ "",
1861
+ ].join("\n"),
1862
+ "utf8",
1863
+ );
1864
+ writeFileSync(
1865
+ join(reportsDir, "GAPS.md"),
1866
+ [
1867
+ "# Gaps and caveats",
1868
+ "",
1869
+ "## Caveats",
1870
+ markdownList(synthesis.caveats || []),
1871
+ "",
1872
+ "## Research gaps",
1873
+ markdownList(gaps),
1874
+ "",
1875
+ ].join("\n"),
1876
+ "utf8",
1877
+ );
1878
+ writeFileSync(
1879
+ join(dataDir, "manifest.json"),
1880
+ JSON.stringify({ ...manifest, floor, citationAudit }, null, 2),
1881
+ "utf8",
1882
+ );
1883
+ writeFileSync(
1884
+ join(dataDir, "rounds.json"),
1885
+ JSON.stringify(rounds, null, 2),
1886
+ "utf8",
1887
+ );
1888
+ writeFileSync(
1889
+ join(dataDir, "sources.json"),
1890
+ JSON.stringify(sources, null, 2),
1891
+ "utf8",
1892
+ );
1893
+ writeFileSync(
1894
+ join(dataDir, "questions.json"),
1895
+ JSON.stringify(questions, null, 2),
1896
+ "utf8",
1897
+ );
1898
+ writeFileSync(
1899
+ join(dataDir, "evidence.json"),
1900
+ JSON.stringify(evidenceItems, null, 2),
1901
+ "utf8",
1902
+ );
1903
+ writeFileSync(
1904
+ join(sourcesDir, "index.md"),
1905
+ [
1906
+ "# Source index",
1907
+ "",
1908
+ ...sourceFiles.map((source) => {
1909
+ const label = source.title || source.url;
1910
+ const url = source.finalUrl || source.url;
1911
+ const path = source.contentPath ? ` — ${source.contentPath}` : "";
1912
+ return `- ${source.id || "?"}: [${label}](${url})${path}`;
1913
+ }),
1914
+ "",
1915
+ ].join("\n"),
1916
+ "utf8",
1917
+ );
1918
+ return {
1919
+ dir,
1920
+ statusPath: join(dir, "STATUS.md"),
1921
+ summaryPath: join(reportsDir, "SUMMARY.md"),
1922
+ manifestPath: join(dataDir, "manifest.json"),
1923
+ sourceCount: sourceFiles.length,
1924
+ sourceFiles,
1925
+ };
1926
+ }
1927
+
1144
1928
  export async function runResearchMode({
1145
1929
  query,
1146
1930
  breadth = 3,
@@ -1149,14 +1933,19 @@ export async function runResearchMode({
1149
1933
  locale = null,
1150
1934
  short = false,
1151
1935
  qualityThreshold = 8.5,
1936
+ writeBundle = process.env.GREEDY_RESEARCH_BUNDLE !== "0",
1937
+ researchOutDir = null,
1152
1938
  } = {}) {
1153
1939
  const options = clampResearchOptions({ breadth, iterations, maxSources });
1154
1940
  const rounds = [];
1155
1941
  let allLearnings = [];
1156
1942
  let allGaps = [];
1943
+ const questions = createQuestionLedger(query);
1157
1944
  let activeActions = null;
1158
1945
  let combinedSources = [];
1159
1946
  let fetchedSources = [];
1947
+ let evidenceItems = [];
1948
+ const extractedSourceKeys = new Set();
1160
1949
  const usedQueries = new Set();
1161
1950
  const usedUrls = new Set();
1162
1951
  const qualityHistory = [];
@@ -1171,7 +1960,7 @@ export async function runResearchMode({
1171
1960
  const engineFailures = [];
1172
1961
 
1173
1962
  process.stderr.write(
1174
- `[greedysearch] Research mode: breadth ${options.breadth}, iterations ${options.iterations}, qualityThreshold ${qualityThreshold}\n`,
1963
+ `[greedysearch] Research mode: breadth ${options.breadth}, iterations ${options.iterations}, qualityThreshold ${qualityThreshold}, engines ${RESEARCH_ENGINES.join(",")}, synthesizer gemini\n`,
1175
1964
  );
1176
1965
 
1177
1966
  for (let roundIndex = 0; roundIndex < options.iterations; roundIndex++) {
@@ -1254,6 +2043,26 @@ export async function runResearchMode({
1254
2043
  });
1255
2044
 
1256
2045
  const roundActions = noveltyFiltered.slice(0, roundBreadth);
2046
+
2047
+ // Force at least one fetchUrl per round when a known academic source
2048
+ // (arXiv, semantic-scholar, DOI) is present in combinedSources. The
2049
+ // Gemini planner occasionally emits all-search actions even when the
2050
+ // answer is in a single arXiv PDF; direct fetching gives the synthesizer
2051
+ // real PDF text and reliably passes citation audits.
2052
+ const academicTargets = pickAcademicFetchTargets(combinedSources, usedUrls);
2053
+ const hasFetch = roundActions.some((a) => a.type === "fetchUrl");
2054
+ if (!hasFetch && academicTargets.length > 0) {
2055
+ const injectTarget = academicTargets[0];
2056
+ roundActions.push({
2057
+ type: "fetchUrl",
2058
+ url: injectTarget.url,
2059
+ researchGoal: `Direct fetch of known academic source: ${injectTarget.label || injectTarget.url}`,
2060
+ });
2061
+ process.stderr.write(
2062
+ `[greedysearch] Forced fetchUrl for academic source: ${injectTarget.url}\n`,
2063
+ );
2064
+ }
2065
+
1257
2066
  const actionRuns = [];
1258
2067
  for (let i = 0; i < roundActions.length; i++) {
1259
2068
  const action = roundActions[i];
@@ -1292,6 +2101,7 @@ export async function runResearchMode({
1292
2101
  const fetchActionRuns = actionRuns.filter(
1293
2102
  (r) => r.action.type === "fetchUrl",
1294
2103
  );
2104
+ updateQuestionLedger(questions, { roundNumber, actions: actionRuns });
1295
2105
 
1296
2106
  combinedSources = dedupeSources([
1297
2107
  combinedSources,
@@ -1329,6 +2139,33 @@ export async function runResearchMode({
1329
2139
  fetchedSources,
1330
2140
  );
1331
2141
  }
2142
+ fetchedSources = annotateFetchedSourcesWithIds(
2143
+ fetchedSources,
2144
+ combinedSources,
2145
+ );
2146
+
2147
+ process.stderr.write(`PROGRESS:research:round-${roundNumber}:evidence\n`);
2148
+ const evidenceRun = await extractEvidenceFromSources({
2149
+ query,
2150
+ questions,
2151
+ fetchedSources,
2152
+ extractedSourceKeys,
2153
+ });
2154
+ if (evidenceRun.error) {
2155
+ process.stderr.write(
2156
+ `[greedysearch] Evidence extraction failed: ${evidenceRun.error}\n`,
2157
+ );
2158
+ }
2159
+ evidenceItems = [...evidenceItems, ...evidenceRun.evidence];
2160
+ for (const evidence of evidenceRun.evidence) {
2161
+ updateQuestionLedger(questions, {
2162
+ roundNumber,
2163
+ learningPayload: {
2164
+ answeredQuestions: evidence.answers || [],
2165
+ newQuestions: evidence.newQuestions || [],
2166
+ },
2167
+ });
2168
+ }
1332
2169
 
1333
2170
  // Build round query summary for learning extraction
1334
2171
  const roundQueries = actionRuns.map((run) => ({
@@ -1351,6 +2188,8 @@ export async function runResearchMode({
1351
2188
  engines: summarizeEngineAnswers(run.result),
1352
2189
  })),
1353
2190
  fetchedSources,
2191
+ questions,
2192
+ evidenceItems,
1354
2193
  ),
1355
2194
  { timeoutMs: 120000 },
1356
2195
  );
@@ -1377,8 +2216,14 @@ export async function runResearchMode({
1377
2216
  .filter(Boolean)
1378
2217
  .slice(0, 6)
1379
2218
  : [];
1380
- allLearnings = [...new Set([...allLearnings, ...learnings])];
1381
- allGaps = [...new Set([...allGaps, ...gaps])];
2219
+ allLearnings = uniqueStrings([...allLearnings, ...learnings]);
2220
+ allGaps = uniqueStrings([...allGaps, ...gaps]);
2221
+ updateQuestionLedger(questions, {
2222
+ roundNumber,
2223
+ actions: [],
2224
+ learningPayload,
2225
+ gaps,
2226
+ });
1382
2227
  rounds.push({
1383
2228
  round: roundNumber,
1384
2229
  actions: actionRuns.map((run) => ({
@@ -1391,6 +2236,8 @@ export async function runResearchMode({
1391
2236
  })),
1392
2237
  learnings,
1393
2238
  gaps,
2239
+ evidence: evidenceRun.evidence,
2240
+ evidenceError: evidenceRun.error,
1394
2241
  learningError,
1395
2242
  });
1396
2243
 
@@ -1404,19 +2251,38 @@ export async function runResearchMode({
1404
2251
  qualityHistory,
1405
2252
  );
1406
2253
  qualityHistory.push(evaluation.score);
2254
+ allGaps = uniqueStrings([...allGaps, ...(evaluation.knowledgeGaps || [])]);
2255
+ updateQuestionLedger(questions, {
2256
+ roundNumber,
2257
+ gaps: evaluation.knowledgeGaps || [],
2258
+ });
2259
+ const preliminaryFloor = computeResearchFloor({
2260
+ sources: combinedSources,
2261
+ fetchedSources,
2262
+ gaps: allGaps,
2263
+ questions,
2264
+ rounds,
2265
+ qualityScore: evaluation.score,
2266
+ qualityThreshold,
2267
+ maxSources: options.maxSources,
2268
+ requireCitations: false,
2269
+ requireQuestions: false,
2270
+ });
1407
2271
  process.stderr.write(
1408
- `[greedysearch] Quality score round ${roundNumber}: ${evaluation.score.toFixed(1)} (shouldContinue: ${evaluation.shouldContinue})\n`,
2272
+ `[greedysearch] Quality score round ${roundNumber}: ${evaluation.score.toFixed(1)} (shouldContinue: ${evaluation.shouldContinue}, floor: ${preliminaryFloor.floorMet})\n`,
1409
2273
  );
1410
2274
 
1411
- // Early termination
2275
+ // Early termination is outcome-first: Gemini quality alone is not enough.
2276
+ // Stop early only when the score is high AND deterministic source/floor checks pass.
1412
2277
  if (
1413
2278
  evaluation.score >= qualityThreshold &&
2279
+ preliminaryFloor.floorMet &&
1414
2280
  (!evaluation.shouldContinue ||
1415
2281
  evaluation.terminationReason === "quality_threshold")
1416
2282
  ) {
1417
2283
  terminationReason = evaluation.terminationReason || "quality_threshold";
1418
2284
  process.stderr.write(
1419
- `[greedysearch] Quality threshold ${qualityThreshold} reached (score: ${evaluation.score.toFixed(1)}). Terminating early.\n`,
2285
+ `[greedysearch] Research floor reached (score: ${evaluation.score.toFixed(1)}). Terminating early.\n`,
1420
2286
  );
1421
2287
  break;
1422
2288
  }
@@ -1490,16 +2356,26 @@ export async function runResearchMode({
1490
2356
  };
1491
2357
  try {
1492
2358
  const rawReport = await runGeminiPrompt(
1493
- buildFinalReportPrompt(query, rounds, combinedSources),
2359
+ buildFinalReportPrompt(
2360
+ query,
2361
+ rounds,
2362
+ combinedSources,
2363
+ questions,
2364
+ evidenceItems,
2365
+ ),
1494
2366
  { timeoutMs: 180000 },
1495
2367
  );
1496
2368
  const parsed = parseGeminiJson(rawReport, {});
2369
+ const hasClaims = Array.isArray(parsed?.claims) && parsed.claims.length > 0;
1497
2370
  synthesis = {
1498
2371
  ...synthesis,
1499
2372
  ...parsed,
1500
2373
  rawAnswer: rawReport.answer || "",
1501
2374
  geminiSources: rawReport.sources || [],
1502
- synthesized: true,
2375
+ // Only mark as synthesized if Gemini actually returned structured
2376
+ // claims. An empty/minimal response should not block the evidence
2377
+ // fallback from running.
2378
+ synthesized: hasClaims,
1503
2379
  };
1504
2380
  } catch (error) {
1505
2381
  process.stderr.write(
@@ -1508,13 +2384,120 @@ export async function runResearchMode({
1508
2384
  synthesis.error = error.message;
1509
2385
  }
1510
2386
 
1511
- const fetchedFiles = await writeResearchSourcesToFiles(fetchedSources);
2387
+ // Fallback: when no structured learnings were produced but per-source
2388
+ // evidence was extracted successfully, ask Gemini to synthesize a final
2389
+ // report directly from the evidence. This rescues runs whose per-round
2390
+ // learning prompt failed (e.g. transient Gemini input field rejection)
2391
+ // but whose evidence extraction step still captured real data.
2392
+ const hasStructuredSynthesis =
2393
+ synthesis.synthesized === true &&
2394
+ Array.isArray(synthesis.claims) &&
2395
+ synthesis.claims.length > 0;
2396
+ if (!hasStructuredSynthesis && evidenceItems.length > 0) {
2397
+ process.stderr.write(
2398
+ "[greedysearch] Falling back to evidence-based synthesis (no per-round learnings).\n",
2399
+ );
2400
+ try {
2401
+ const evidencePrompt = buildSynthesisFromEvidencePrompt(
2402
+ query,
2403
+ combinedSources,
2404
+ questions,
2405
+ evidenceItems,
2406
+ );
2407
+ const rawEvidenceReport = await runGeminiPrompt(evidencePrompt, {
2408
+ timeoutMs: 180000,
2409
+ });
2410
+ const parsedEvidence = parseGeminiJson(rawEvidenceReport, {});
2411
+ synthesis = {
2412
+ ...synthesis,
2413
+ ...parsedEvidence,
2414
+ rawAnswer: rawEvidenceReport.answer || synthesis.answer || "",
2415
+ geminiSources:
2416
+ rawEvidenceReport.sources || synthesis.geminiSources || [],
2417
+ synthesized: true,
2418
+ synthesisMode: "evidence_fallback",
2419
+ };
2420
+ } catch (error) {
2421
+ process.stderr.write(
2422
+ `[greedysearch] Evidence-based synthesis failed: ${error.message}\n`,
2423
+ );
2424
+ synthesis.evidenceFallbackError = error.message;
2425
+ }
2426
+ }
2427
+
1512
2428
  const finishedAt = new Date().toISOString();
1513
2429
  const durationMs = Date.now() - startMs;
2430
+ const qualityScore = qualityHistory.at(-1) || 0;
2431
+ fetchedSources = annotateFetchedSourcesWithIds(
2432
+ fetchedSources,
2433
+ combinedSources,
2434
+ );
1514
2435
 
1515
- // Citation audit
2436
+ // Citation audit + final question reconciliation + deterministic completion floor
1516
2437
  process.stderr.write("PROGRESS:research:audit-citations\n");
1517
2438
  const citationAudit = auditCitations(synthesis.answer || "", combinedSources);
2439
+ reconcileQuestionsFromSynthesis(questions, synthesis, citationAudit);
2440
+ const floor = computeResearchFloor({
2441
+ sources: combinedSources,
2442
+ fetchedSources,
2443
+ synthesis,
2444
+ citationAudit,
2445
+ gaps: allGaps,
2446
+ questions,
2447
+ rounds,
2448
+ qualityScore,
2449
+ qualityThreshold,
2450
+ maxSources: options.maxSources,
2451
+ });
2452
+ if (floor.floorMet && terminationReason === "max_rounds") {
2453
+ terminationReason = "done_floor_met";
2454
+ } else if (!floor.floorMet && terminationReason === "quality_threshold") {
2455
+ terminationReason = "max_rounds_floor_unmet";
2456
+ }
2457
+
2458
+ const manifest = {
2459
+ startedAt,
2460
+ finishedAt,
2461
+ durationMs,
2462
+ engines: RESEARCH_ENGINES,
2463
+ synthesizer: "gemini",
2464
+ rounds: rounds.length,
2465
+ actionsRun: totalActionsRun,
2466
+ searches: totalSearches,
2467
+ fetches: totalFetches,
2468
+ sourcesFetched: fetchedSources.filter((s) => s?.contentChars > 100).length,
2469
+ engineFailures,
2470
+ terminationReason,
2471
+ floorMet: floor.floorMet,
2472
+ };
2473
+ let bundle = null;
2474
+ let fetchedFiles;
2475
+ if (writeBundle) {
2476
+ process.stderr.write("PROGRESS:research:bundle\n");
2477
+ try {
2478
+ bundle = await writeResearchBundle({
2479
+ query,
2480
+ rounds,
2481
+ sources: combinedSources,
2482
+ fetchedSources,
2483
+ evidenceItems,
2484
+ synthesis,
2485
+ citationAudit,
2486
+ floor,
2487
+ manifest,
2488
+ allGaps,
2489
+ questions,
2490
+ outDir: researchOutDir,
2491
+ });
2492
+ fetchedFiles = bundle.sourceFiles;
2493
+ delete bundle.sourceFiles;
2494
+ } catch (error) {
2495
+ bundle = { error: error.message || String(error) };
2496
+ fetchedFiles = await writeResearchSourcesToFiles(fetchedSources);
2497
+ }
2498
+ } else {
2499
+ fetchedFiles = await writeResearchSourcesToFiles(fetchedSources);
2500
+ }
1518
2501
 
1519
2502
  process.stderr.write("PROGRESS:research:done\n");
1520
2503
 
@@ -1527,21 +2510,16 @@ export async function runResearchMode({
1527
2510
  maxSources: options.maxSources,
1528
2511
  rounds,
1529
2512
  learnings: allLearnings,
2513
+ gaps: allGaps,
2514
+ evidence: evidenceItems,
2515
+ questions,
2516
+ questionProgress: questionProgress(questions),
1530
2517
  qualityHistory,
1531
2518
  terminationReason,
1532
2519
  qualityThreshold,
1533
- manifest: {
1534
- startedAt,
1535
- finishedAt,
1536
- durationMs,
1537
- rounds: rounds.length,
1538
- actionsRun: totalActionsRun,
1539
- searches: totalSearches,
1540
- fetches: totalFetches,
1541
- sourcesFetched: fetchedSources.filter((s) => s?.contentChars > 100)
1542
- .length,
1543
- engineFailures,
1544
- },
2520
+ floor,
2521
+ bundle,
2522
+ manifest,
1545
2523
  },
1546
2524
  _citationAudit: citationAudit,
1547
2525
  _sources: combinedSources,
@@ -1559,23 +2537,43 @@ export async function runResearchMode({
1559
2537
  )
1560
2538
  : 0,
1561
2539
  agreementLevel: synthesis.agreement?.level || "mixed",
2540
+ floorMet: floor.floorMet,
1562
2541
  },
1563
2542
  };
1564
2543
  }
1565
2544
 
1566
2545
  function dedupeFetchedSources(sources) {
1567
- const seen = new Map();
2546
+ const byUrl = new Map();
1568
2547
  for (const source of sources) {
1569
2548
  const key =
1570
2549
  source?.id || normalizeUrl(source?.finalUrl || source?.url || "");
1571
2550
  if (!key) continue;
1572
- const existing = seen.get(key);
2551
+ const existing = byUrl.get(key);
1573
2552
  if (
1574
2553
  !existing ||
1575
2554
  (source.contentChars || 0) > (existing.contentChars || 0)
1576
2555
  ) {
1577
- seen.set(key, source);
2556
+ byUrl.set(key, source);
2557
+ }
2558
+ }
2559
+
2560
+ const out = [];
2561
+ for (const source of byUrl.values()) {
2562
+ const content = String(source.content || source.snippet || "");
2563
+ const duplicateIndex = out.findIndex((existing) => {
2564
+ const other = String(existing.content || existing.snippet || "");
2565
+ if (content.length < 400 || other.length < 400) return false;
2566
+ return (
2567
+ jaccardSimilarity(content.slice(0, 4000), other.slice(0, 4000)) >= 0.9
2568
+ );
2569
+ });
2570
+ if (duplicateIndex === -1) {
2571
+ out.push(source);
2572
+ continue;
2573
+ }
2574
+ if ((source.contentChars || 0) > (out[duplicateIndex].contentChars || 0)) {
2575
+ out[duplicateIndex] = source;
1578
2576
  }
1579
2577
  }
1580
- return Array.from(seen.values());
2578
+ return out;
1581
2579
  }