@apmantza/greedysearch-pi 2.0.0 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,9 @@ import {
20
20
  import { parseStructuredJson } from "./synthesis.mjs";
21
21
  import { RESEARCH_ENGINES } from "./constants.mjs";
22
22
  import { runGeminiPrompt } from "./synthesis-runner.mjs";
23
+ import { classifyResearchComplexity } from "./scale-aware.mjs";
24
+ import { runSimpleResearchMode } from "./simple-research.mjs";
25
+ import { createProgressTracker } from "./progress.mjs";
23
26
 
24
27
  const __dir = fileURLToPath(new URL(".", import.meta.url)).replace(
25
28
  /^\/([A-Z]:)/,
@@ -984,7 +987,7 @@ function normalizeEvidenceExtractions(payload, fetchedSources) {
984
987
  );
985
988
  }
986
989
 
987
- async function extractEvidenceFromSources({
990
+ export async function extractEvidenceFromSources({
988
991
  query,
989
992
  questions,
990
993
  fetchedSources,
@@ -1071,7 +1074,7 @@ function buildLearningPrompt(
1071
1074
  ].join("\n");
1072
1075
  }
1073
1076
 
1074
- function buildFinalReportPrompt(
1077
+ export function buildFinalReportPrompt(
1075
1078
  originalQuery,
1076
1079
  rounds,
1077
1080
  sources,
@@ -1151,7 +1154,7 @@ function buildFinalReportPrompt(
1151
1154
  * structured learnings (for example when Gemini's input field rejected the
1152
1155
  * per-round learning prompt but the goal-based extraction step succeeded).
1153
1156
  */
1154
- function buildSynthesisFromEvidencePrompt(
1157
+ export function buildSynthesisFromEvidencePrompt(
1155
1158
  originalQuery,
1156
1159
  sources = [],
1157
1160
  questions = [],
@@ -1401,6 +1404,142 @@ export function auditCitations(answer, sources) {
1401
1404
  };
1402
1405
  }
1403
1406
 
1407
+ /**
1408
+ * Check reachability of cited source URLs via HEAD requests.
1409
+ * Returns { reachable, dead, skipped } with per-URL status.
1410
+ */
1411
+ export async function checkCitationUrls(
1412
+ sources,
1413
+ { timeoutMs = 6000, concurrency = 4 } = {},
1414
+ ) {
1415
+ const safeConcurrency = Math.max(1, Math.floor(concurrency || 1));
1416
+ const citedSources = (sources || []).filter(
1417
+ (s) => s?.id && (s?.canonicalUrl || s?.finalUrl || s?.url),
1418
+ );
1419
+ if (citedSources.length === 0) {
1420
+ return { reachable: [], dead: [], skipped: [], ok: true };
1421
+ }
1422
+
1423
+ const reachable = [];
1424
+ const dead = [];
1425
+ const skipped = [];
1426
+
1427
+ // Process in batches to avoid overwhelming
1428
+ for (let i = 0; i < citedSources.length; i += safeConcurrency) {
1429
+ const batch = citedSources.slice(i, i + safeConcurrency);
1430
+ const results = await Promise.allSettled(
1431
+ batch.map(async (source) => {
1432
+ const url =
1433
+ source.fetch?.finalUrl ||
1434
+ source.canonicalUrl ||
1435
+ source.finalUrl ||
1436
+ source.url;
1437
+ if (!url) return { id: source.id, url: "", status: "skipped" };
1438
+
1439
+ // Skip non-HTTP URLs and known-unreachable patterns
1440
+ try {
1441
+ const parsed = new URL(url);
1442
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
1443
+ return { id: source.id, url, status: "skipped" };
1444
+ }
1445
+ } catch {
1446
+ return { id: source.id, url, status: "skipped" };
1447
+ }
1448
+
1449
+ try {
1450
+ const controller = new AbortController();
1451
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
1452
+ try {
1453
+ const response = await fetch(url, {
1454
+ method: "HEAD",
1455
+ redirect: "follow",
1456
+ signal: controller.signal,
1457
+ headers: {
1458
+ "User-Agent":
1459
+ "Mozilla/5.0 (compatible; GreedySearch/2.0; +https://github.com/apmantza/greedysearch-pi)",
1460
+ },
1461
+ });
1462
+ clearTimeout(timer);
1463
+ const ok = response.status >= 200 && response.status < 400;
1464
+ return {
1465
+ id: source.id,
1466
+ url,
1467
+ status: ok ? "reachable" : "dead",
1468
+ httpStatus: response.status,
1469
+ };
1470
+ } catch (fetchError) {
1471
+ clearTimeout(timer);
1472
+ return {
1473
+ id: source.id,
1474
+ url,
1475
+ status: "dead",
1476
+ error:
1477
+ fetchError.name === "AbortError"
1478
+ ? "timeout"
1479
+ : fetchError.message,
1480
+ };
1481
+ }
1482
+ } catch (error) {
1483
+ return {
1484
+ id: source.id,
1485
+ url,
1486
+ status: "dead",
1487
+ error: error.message,
1488
+ };
1489
+ }
1490
+ }),
1491
+ );
1492
+
1493
+ for (const result of results) {
1494
+ const value =
1495
+ result.status === "fulfilled"
1496
+ ? result.value
1497
+ : {
1498
+ id: "?",
1499
+ url: "",
1500
+ status: "dead",
1501
+ error: result.reason?.message || "unknown",
1502
+ };
1503
+ if (value.status === "reachable") reachable.push(value);
1504
+ else if (value.status === "dead") dead.push(value);
1505
+ else skipped.push(value);
1506
+ }
1507
+ }
1508
+
1509
+ return {
1510
+ reachable,
1511
+ dead,
1512
+ skipped,
1513
+ ok: dead.length === 0,
1514
+ };
1515
+ }
1516
+
1517
+ /**
1518
+ * Shared orchestration: run citation URL check with logging.
1519
+ * Used by both runResearchMode() and runSimpleResearchMode() to avoid
1520
+ * duplicating the try/catch/logging block.
1521
+ */
1522
+ export async function runCitationUrlCheck(combinedSources) {
1523
+ process.stderr.write("PROGRESS:research:check-urls\n");
1524
+ try {
1525
+ const citationUrls = await checkCitationUrls(combinedSources, {
1526
+ timeoutMs: 6000,
1527
+ concurrency: 4,
1528
+ });
1529
+ if (!citationUrls.ok) {
1530
+ process.stderr.write(
1531
+ `[greedysearch] ${citationUrls.dead.length} dead citation URL(s) detected\n`,
1532
+ );
1533
+ }
1534
+ return citationUrls;
1535
+ } catch (error) {
1536
+ process.stderr.write(
1537
+ `[greedysearch] URL reachability check failed: ${error.message}\n`,
1538
+ );
1539
+ return null;
1540
+ }
1541
+ }
1542
+
1404
1543
  export function computeResearchFloor({
1405
1544
  sources = [],
1406
1545
  fetchedSources = [],
@@ -1437,7 +1576,13 @@ export function computeResearchFloor({
1437
1576
  (q) => !q.createdRound || q.reason === "Original research question",
1438
1577
  );
1439
1578
  const requiredQuestionStats = questionProgress(requiredQuestions);
1440
- const minFetched = Math.min(4, Math.max(2, Number(maxSources) || 8));
1579
+ // Scale the minimum fetched sources by the number of rounds. The
1580
+ // simple research path runs 1 round with fewer sources, so requiring
1581
+ // 2-4 sources would be too strict. Iterative research (2+ rounds)
1582
+ // gets the full minFetched requirement.
1583
+ const roundCount = (rounds || []).length;
1584
+ const baseMin = Math.min(4, Math.max(2, Number(maxSources) || 8));
1585
+ const minFetched = roundCount <= 1 ? Math.min(2, baseMin) : baseMin;
1441
1586
  const checks = {
1442
1587
  roundsRun: rounds.length >= 1,
1443
1588
  fetchedSources: fetchedOk.length >= minFetched,
@@ -1689,7 +1834,11 @@ function pickAcademicFetchTargets(combinedSources, usedUrls) {
1689
1834
  return targets.slice(0, 2);
1690
1835
  }
1691
1836
 
1692
- function reconcileQuestionsFromSynthesis(questions, synthesis, citationAudit) {
1837
+ export function reconcileQuestionsFromSynthesis(
1838
+ questions,
1839
+ synthesis,
1840
+ citationAudit,
1841
+ ) {
1693
1842
  if (!synthesis?.answer || citationAudit?.ok !== true) return questions;
1694
1843
  const claims = Array.isArray(synthesis.claims) ? synthesis.claims : [];
1695
1844
  const citedIds = Array.isArray(citationAudit.cited)
@@ -1740,7 +1889,120 @@ function markdownList(items, fallback = "None recorded.") {
1740
1889
  : fallback;
1741
1890
  }
1742
1891
 
1743
- async function writeResearchBundle({
1892
+ /**
1893
+ * Write a human-readable provenance sidecar next to the research bundle.
1894
+ * Records date, rounds, sources, verification status, and floor results.
1895
+ */
1896
+ export function writeProvenanceSidecar(
1897
+ dir,
1898
+ {
1899
+ query,
1900
+ rounds,
1901
+ sources,
1902
+ fetchedSources,
1903
+ citationAudit,
1904
+ citationUrls,
1905
+ floor,
1906
+ manifest,
1907
+ },
1908
+ ) {
1909
+ const fetchedOk = (fetchedSources || []).filter(
1910
+ (s) => s?.contentChars > 100 || s?.fetch?.ok,
1911
+ );
1912
+ const primarySources = (sources || []).filter((s) =>
1913
+ ["official-docs", "repo", "maintainer-blog", "academic"].includes(
1914
+ String(s?.sourceType || ""),
1915
+ ),
1916
+ );
1917
+ const citedIds = new Set(citationAudit?.cited || []);
1918
+ const citedSources = (sources || []).filter((s) => citedIds.has(s?.id));
1919
+
1920
+ const lines = [
1921
+ `# Provenance: ${query}`,
1922
+ "",
1923
+ `- **Date:** ${manifest?.startedAt || new Date().toISOString()}`,
1924
+ `- **Duration:** ${manifest?.durationMs ? `${(manifest.durationMs / 1000).toFixed(1)}s` : "unknown"}`,
1925
+ `- **Mode:** ${manifest?.terminationReason === "simple_single_pass" ? "simple (single-pass)" : "iterative"}`,
1926
+ `- **Rounds:** ${manifest?.rounds || rounds?.length || 1}`,
1927
+ "",
1928
+ "## Sources",
1929
+ "",
1930
+ `- **Consulted:** ${sources?.length || 0}`,
1931
+ `- **Fetched successfully:** ${fetchedOk.length}`,
1932
+ `- **Primary sources:** ${primarySources.length}`,
1933
+ `- **Cited in report:** ${citedSources.length}`,
1934
+ "",
1935
+ ];
1936
+
1937
+ // Cited source details
1938
+ if (citedSources.length > 0) {
1939
+ lines.push("### Cited sources", "");
1940
+ for (const source of citedSources) {
1941
+ const url = source.canonicalUrl || source.finalUrl || source.url || "";
1942
+ const fetched = source.fetch?.ok ? "✓" : "✗";
1943
+ lines.push(
1944
+ `- **${source.id}:** [${source.title || url}](${url}) (${source.sourceType || "unknown"}, fetched: ${fetched})`,
1945
+ );
1946
+ }
1947
+ lines.push("");
1948
+ }
1949
+
1950
+ // URL reachability
1951
+ if (
1952
+ citationUrls &&
1953
+ (citationUrls.reachable.length > 0 || citationUrls.dead.length > 0)
1954
+ ) {
1955
+ lines.push("## URL reachability", "");
1956
+ if (citationUrls.dead.length > 0) {
1957
+ lines.push("");
1958
+ lines.push("**Dead links:**");
1959
+ for (const d of citationUrls.dead) {
1960
+ lines.push(
1961
+ `- ${d.id}: ${d.url} (${d.httpStatus || d.error || "unknown"})`,
1962
+ );
1963
+ }
1964
+ }
1965
+ if (citationUrls.reachable.length > 0) {
1966
+ lines.push("");
1967
+ lines.push(
1968
+ `**Reachable:** ${citationUrls.reachable.length}/${citationUrls.reachable.length + citationUrls.dead.length}`,
1969
+ );
1970
+ }
1971
+ lines.push("");
1972
+ }
1973
+
1974
+ // Verification status
1975
+ const verificationStatus = !citationAudit
1976
+ ? "NOT CHECKED"
1977
+ : citationAudit.ok && (citationUrls?.ok ?? true)
1978
+ ? "PASS"
1979
+ : citationAudit.ok === false
1980
+ ? "FAIL (missing citations)"
1981
+ : "FAIL (dead links)";
1982
+
1983
+ lines.push(
1984
+ "## Verification",
1985
+ "",
1986
+ `- **Citations:** ${citationAudit?.ok ? "PASS" : `FAIL — missing: ${(citationAudit?.missing || []).join(", ")}`}`,
1987
+ `- **URL reachability:** ${citationUrls ? (citationUrls.ok ? "PASS" : `FAIL — ${citationUrls.dead.length} dead`) : "SKIPPED"}`,
1988
+ `- **Floor:** ${floor?.floorMet ? "PASS" : "PARTIAL"}`,
1989
+ `- **Overall:** ${verificationStatus}`,
1990
+ "",
1991
+ );
1992
+
1993
+ // Floor checks
1994
+ if (floor?.checks) {
1995
+ lines.push("## Floor checks", "");
1996
+ for (const [name, ok] of Object.entries(floor.checks)) {
1997
+ lines.push(`- [${ok ? "x" : " "}] ${name}`);
1998
+ }
1999
+ lines.push("");
2000
+ }
2001
+
2002
+ writeFileSync(join(dir, "provenance.md"), lines.join("\n"), "utf8");
2003
+ }
2004
+
2005
+ export async function writeResearchBundle({
1744
2006
  query,
1745
2007
  rounds,
1746
2008
  sources,
@@ -1752,6 +2014,7 @@ async function writeResearchBundle({
1752
2014
  manifest,
1753
2015
  allGaps = [],
1754
2016
  questions = [],
2017
+ citationUrls = null,
1755
2018
  outDir = null,
1756
2019
  }) {
1757
2020
  const stamp = new Date().toISOString().replaceAll(/[:.]/g, "-").slice(0, 19);
@@ -1807,6 +2070,7 @@ async function writeResearchBundle({
1807
2070
  "- `reports/CLAIMS.md` — extracted claims with support/source IDs",
1808
2071
  "- `reports/EVIDENCE.md` — goal-based source evidence",
1809
2072
  "- `reports/GAPS.md` — remaining caveats and uncertainties",
2073
+ "- `provenance.md` — human-readable run metadata and verification",
1810
2074
  "- `sources/` — fetched source markdown files",
1811
2075
  "- `data/manifest.json` — machine-readable run metadata",
1812
2076
  "- `data/rounds.json` — per-round actions/learnings/gaps",
@@ -1915,11 +2179,31 @@ async function writeResearchBundle({
1915
2179
  ].join("\n"),
1916
2180
  "utf8",
1917
2181
  );
2182
+
2183
+ // Provenance sidecar — human-readable run metadata (non-critical)
2184
+ try {
2185
+ writeProvenanceSidecar(dir, {
2186
+ query,
2187
+ rounds,
2188
+ sources,
2189
+ fetchedSources,
2190
+ citationAudit,
2191
+ citationUrls,
2192
+ floor,
2193
+ manifest,
2194
+ });
2195
+ } catch (sidecarError) {
2196
+ process.stderr.write(
2197
+ `[greedysearch] Provenance sidecar write failed (non-critical): ${sidecarError.message}\n`,
2198
+ );
2199
+ }
2200
+
1918
2201
  return {
1919
2202
  dir,
1920
2203
  statusPath: join(dir, "STATUS.md"),
1921
2204
  summaryPath: join(reportsDir, "SUMMARY.md"),
1922
2205
  manifestPath: join(dataDir, "manifest.json"),
2206
+ provenancePath: join(dir, "provenance.md"),
1923
2207
  sourceCount: sourceFiles.length,
1924
2208
  sourceFiles,
1925
2209
  };
@@ -1937,6 +2221,50 @@ export async function runResearchMode({
1937
2221
  researchOutDir = null,
1938
2222
  } = {}) {
1939
2223
  const options = clampResearchOptions({ breadth, iterations, maxSources });
2224
+
2225
+ // ── Scale-aware fast path ────────────────────────────────────────────────
2226
+ // When breadth and iterations are at defaults (not user-specified), classify
2227
+ // the query complexity. Simple queries bypass the iterative loop entirely
2228
+ // for ~70% faster results and lower API cost.
2229
+ const userSpecifiedBreadth = typeof breadth === "number";
2230
+ const userSpecifiedIterations = typeof iterations === "number";
2231
+ const atDefaults = !userSpecifiedBreadth && !userSpecifiedIterations;
2232
+
2233
+ if (atDefaults) {
2234
+ try {
2235
+ const classification = await classifyResearchComplexity(query);
2236
+ process.stderr.write(
2237
+ `[greedysearch] Complexity: ${classification.complexity} (${classification.reasoning})\n`,
2238
+ );
2239
+ if (classification.complexity === "simple") {
2240
+ process.stderr.write(
2241
+ `[greedysearch] Simple query detected — using fast single-pass path\n`,
2242
+ );
2243
+ return runSimpleResearchMode({
2244
+ query,
2245
+ locale,
2246
+ maxSources: Math.min(maxSources ?? 5, 5),
2247
+ qualityThreshold,
2248
+ writeBundle,
2249
+ researchOutDir,
2250
+ });
2251
+ }
2252
+ // For moderate/complex: use classifier suggestions as hints if user
2253
+ // didn't specify values. This tightens the loop for moderate queries
2254
+ // without changing the user-explicit path.
2255
+ if (!userSpecifiedBreadth) {
2256
+ options.breadth = classification.suggestedBreadth;
2257
+ }
2258
+ if (!userSpecifiedIterations) {
2259
+ options.iterations = classification.suggestedIterations;
2260
+ }
2261
+ } catch (error) {
2262
+ process.stderr.write(
2263
+ `[greedysearch] Scale classification failed, using defaults: ${error.message}\n`,
2264
+ );
2265
+ }
2266
+ }
2267
+
1940
2268
  const rounds = [];
1941
2269
  let allLearnings = [];
1942
2270
  let allGaps = [];
@@ -1959,6 +2287,18 @@ export async function runResearchMode({
1959
2287
  let totalFetches = 0;
1960
2288
  const engineFailures = [];
1961
2289
 
2290
+ // Progress bar with ETA — pre-compute totals from plan so the bar
2291
+ // reflects the full run, not just the current round. The actual
2292
+ // actions per round come from Gemini's plan; we estimate 1 fetch
2293
+ // per academic source found.
2294
+ const progressTracker = createProgressTracker({
2295
+ totalActions: options.iterations * options.breadth,
2296
+ totalRounds: options.iterations,
2297
+ totalFetches: options.iterations, // estimate: ~1 fetch per round
2298
+ silent: process.env.GREEDY_RESEARCH_QUIET === "1",
2299
+ });
2300
+ progressTracker.startRound(1);
2301
+
1962
2302
  process.stderr.write(
1963
2303
  `[greedysearch] Research mode: breadth ${options.breadth}, iterations ${options.iterations}, qualityThreshold ${qualityThreshold}, engines ${RESEARCH_ENGINES.join(",")}, synthesizer gemini\n`,
1964
2304
  );
@@ -2072,6 +2412,10 @@ export async function runResearchMode({
2072
2412
  process.stderr.write(
2073
2413
  `[greedysearch] Action ${i + 1}/${roundActions.length} [${action.type}]: ${(action.query || action.url).slice(0, 80)}\n`,
2074
2414
  );
2415
+ progressTracker.startAction(
2416
+ action.type,
2417
+ (action.query || action.url || "").slice(0, 60),
2418
+ );
2075
2419
  const run = await executeResearchAction(action, {
2076
2420
  locale,
2077
2421
  short,
@@ -2079,10 +2423,14 @@ export async function runResearchMode({
2079
2423
  usedUrls,
2080
2424
  maxChars: 8000,
2081
2425
  });
2426
+ progressTracker.endAction();
2082
2427
  actionRuns.push(run);
2083
2428
  totalActionsRun++;
2084
2429
  if (action.type === "search") totalSearches++;
2085
- if (action.type === "fetchUrl") totalFetches++;
2430
+ if (action.type === "fetchUrl") {
2431
+ totalFetches++;
2432
+ progressTracker.endFetch(run.ok);
2433
+ }
2086
2434
  if (!run.ok) {
2087
2435
  engineFailures.push({
2088
2436
  round: roundNumber,
@@ -2243,6 +2591,10 @@ export async function runResearchMode({
2243
2591
 
2244
2592
  // Quality evaluation
2245
2593
  process.stderr.write(`PROGRESS:research:round-${roundNumber}:evaluating\n`);
2594
+ progressTracker.endRound();
2595
+ if (roundNumber < options.iterations) {
2596
+ progressTracker.startRound(roundNumber + 1);
2597
+ }
2246
2598
  const evaluation = await evaluateResearchQuality(
2247
2599
  query,
2248
2600
  rounds,
@@ -2436,6 +2788,10 @@ export async function runResearchMode({
2436
2788
  // Citation audit + final question reconciliation + deterministic completion floor
2437
2789
  process.stderr.write("PROGRESS:research:audit-citations\n");
2438
2790
  const citationAudit = auditCitations(synthesis.answer || "", combinedSources);
2791
+
2792
+ // Citation URL reachability check
2793
+ const citationUrls = await runCitationUrlCheck(combinedSources);
2794
+
2439
2795
  reconcileQuestionsFromSynthesis(questions, synthesis, citationAudit);
2440
2796
  const floor = computeResearchFloor({
2441
2797
  sources: combinedSources,
@@ -2483,6 +2839,7 @@ export async function runResearchMode({
2483
2839
  evidenceItems,
2484
2840
  synthesis,
2485
2841
  citationAudit,
2842
+ citationUrls,
2486
2843
  floor,
2487
2844
  manifest,
2488
2845
  allGaps,
@@ -2500,6 +2857,7 @@ export async function runResearchMode({
2500
2857
  }
2501
2858
 
2502
2859
  process.stderr.write("PROGRESS:research:done\n");
2860
+ progressTracker.finish();
2503
2861
 
2504
2862
  return {
2505
2863
  query,
@@ -2522,6 +2880,7 @@ export async function runResearchMode({
2522
2880
  manifest,
2523
2881
  },
2524
2882
  _citationAudit: citationAudit,
2883
+ _citationUrls: citationUrls,
2525
2884
  _sources: combinedSources,
2526
2885
  _fetchedSources: fetchedFiles,
2527
2886
  _synthesis: synthesis,
@@ -0,0 +1,93 @@
1
+ // src/search/scale-aware.mjs — Complexity classification and fast-path research
2
+ //
3
+ // Before entering the full iterative loop, classify the query complexity to
4
+ // decide whether the expensive multi-round path is warranted. Simple "what is
5
+ // X" queries get a fast single-pass path; complex/multi-faceted queries get
6
+ // the full iterative treatment (possibly with adjusted breadth/iterations).
7
+
8
+ import { trimText } from "./sources.mjs";
9
+ import { runGeminiPrompt } from "./synthesis-runner.mjs";
10
+ import { parseStructuredJson } from "./synthesis.mjs";
11
+
12
+ const COMPLEXITY_PROMPT_TIMEOUT_MS = 30_000;
13
+
14
+ function clampInt(value, min, max, fallback) {
15
+ const n = Number.parseInt(String(value ?? ""), 10);
16
+ if (!Number.isFinite(n)) return fallback;
17
+ return Math.min(max, Math.max(min, n));
18
+ }
19
+
20
+ /**
21
+ * Classify a research query as simple, moderate, or complex.
22
+ * Returns { complexity, reasoning, suggestedBreadth, suggestedIterations, needsAcademicSources }.
23
+ */
24
+ export async function classifyResearchComplexity(query) {
25
+ const prompt = [
26
+ "You are a research complexity classifier.",
27
+ "Classify the following query by research complexity.",
28
+ "",
29
+ "- simple: A narrow factual question (what is X, define X, how does X work).",
30
+ " Answerable with 1-3 search queries and a short synthesis. No sub-questions.",
31
+ "- moderate: A focused comparison, recent change, or best-practice lookup.",
32
+ " Needs 2-4 angles but stays within one domain.",
33
+ "- complex: Multi-faceted survey, landscape analysis, or cross-domain investigation.",
34
+ " Benefits from parallel research directions and iterative deepening.",
35
+ "",
36
+ "Respond ONLY with JSON wrapped in BEGIN_JSON / END_JSON markers:",
37
+ "BEGIN_JSON",
38
+ JSON.stringify(
39
+ {
40
+ complexity: "simple",
41
+ reasoning: "narrow factual question",
42
+ suggestedBreadth: 1,
43
+ suggestedIterations: 1,
44
+ needsAcademicSources: false,
45
+ },
46
+ null,
47
+ 2,
48
+ ),
49
+ "END_JSON",
50
+ "",
51
+ "Query: " + query,
52
+ ].join("\n");
53
+
54
+ try {
55
+ const raw = await runGeminiPrompt(prompt, {
56
+ timeoutMs: COMPLEXITY_PROMPT_TIMEOUT_MS,
57
+ });
58
+ const parsed = parseStructuredJson(raw?.answer || "") || {};
59
+ const complexity = ["simple", "moderate", "complex"].includes(
60
+ parsed.complexity,
61
+ )
62
+ ? parsed.complexity
63
+ : "moderate";
64
+ return {
65
+ complexity,
66
+ reasoning: trimText(parsed.reasoning || "", 200),
67
+ suggestedBreadth: clampInt(
68
+ parsed.suggestedBreadth,
69
+ 1,
70
+ 5,
71
+ complexity === "simple" ? 1 : 3,
72
+ ),
73
+ suggestedIterations: clampInt(
74
+ parsed.suggestedIterations,
75
+ 1,
76
+ 3,
77
+ complexity === "simple" ? 1 : 2,
78
+ ),
79
+ needsAcademicSources: parsed.needsAcademicSources === true,
80
+ };
81
+ } catch (error) {
82
+ process.stderr.write(
83
+ `[greedysearch] Complexity classification failed, defaulting to moderate: ${error.message}\n`,
84
+ );
85
+ return {
86
+ complexity: "moderate",
87
+ reasoning: "classification failed",
88
+ suggestedBreadth: 3,
89
+ suggestedIterations: 2,
90
+ needsAcademicSources: false,
91
+ };
92
+ }
93
+ }