@apmantza/greedysearch-pi 1.9.2 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +132 -2
  2. package/README.md +82 -47
  3. package/bin/cdp.mjs +1153 -1108
  4. package/bin/launch.mjs +9 -0
  5. package/bin/search.mjs +318 -81
  6. package/extractors/bing-copilot.mjs +48 -18
  7. package/extractors/chatgpt.mjs +553 -0
  8. package/extractors/common.mjs +213 -22
  9. package/extractors/consensus.mjs +655 -0
  10. package/extractors/consent.mjs +182 -18
  11. package/extractors/gemini.mjs +350 -217
  12. package/extractors/google-ai.mjs +129 -128
  13. package/extractors/logically.mjs +629 -0
  14. package/extractors/perplexity.mjs +547 -217
  15. package/extractors/selectors.mjs +3 -2
  16. package/extractors/semantic-scholar.mjs +219 -0
  17. package/package.json +8 -4
  18. package/skills/greedy-search/skill.md +20 -12
  19. package/src/fetcher.mjs +23 -1
  20. package/src/formatters/results.ts +185 -128
  21. package/src/search/browser-lifecycle.mjs +27 -5
  22. package/src/search/challenge-detect.mjs +205 -0
  23. package/src/search/chrome.mjs +653 -590
  24. package/src/search/constants.mjs +155 -39
  25. package/src/search/engines.mjs +114 -76
  26. package/src/search/fetch-source.mjs +566 -451
  27. package/src/search/pdf.mjs +68 -0
  28. package/src/search/progress.mjs +145 -0
  29. package/src/search/recovery.mjs +73 -45
  30. package/src/search/research.mjs +1419 -62
  31. package/src/search/scale-aware.mjs +93 -0
  32. package/src/search/simple-research.mjs +520 -0
  33. package/src/search/sources.mjs +52 -22
  34. package/src/search/synthesis-runner.mjs +105 -26
  35. package/src/search/synthesis.mjs +286 -246
  36. package/src/tools/greedy-search-handler.ts +129 -59
  37. package/src/tools/shared.ts +312 -186
  38. package/src/types.ts +110 -104
  39. package/test.mjs +537 -18
package/bin/launch.mjs CHANGED
@@ -76,6 +76,15 @@ const BASE_CHROME_FLAGS = [
76
76
  "--window-size=1920,1080",
77
77
  "--lang=en-US",
78
78
  "--force-color-profile=srgb",
79
+ // Background-tab throttling kills parallel extractions: Chrome clamps
80
+ // setTimeout to ~1Hz in unfocused tabs, so a streaming response that
81
+ // finishes in 5s solo takes 60s+ when 4 engines share one Chrome.
82
+ // The trio below restores full-speed JS in every tab. Safe for our
83
+ // anti-bot stealth — Cloudflare detects CDP/webdriver artifacts, not
84
+ // timer-throttling behavior. Same flags Playwright/Puppeteer add.
85
+ "--disable-background-timer-throttling",
86
+ "--disable-renderer-backgrounding",
87
+ "--disable-backgrounding-occluded-windows",
79
88
  ];
80
89
 
81
90
  function getChromeVersion(chromePath) {
package/bin/search.mjs CHANGED
@@ -20,7 +20,7 @@
20
20
  // node search.mjs gem "latest React features"
21
21
  // node search.mjs all "how does TCP congestion control work"
22
22
 
23
- import { existsSync, readFileSync } from "node:fs";
23
+ import { appendFileSync, existsSync, readFileSync } from "node:fs";
24
24
  // Config file for user defaults
25
25
  import { homedir } from "node:os";
26
26
  import { join } from "node:path";
@@ -33,12 +33,18 @@ import {
33
33
  openNewTab,
34
34
  touchActivity,
35
35
  } from "../src/search/chrome.mjs";
36
- import { ALL_ENGINES, ENGINES } from "../src/search/constants.mjs";
36
+ import {
37
+ ALL_ENGINES,
38
+ ENGINES,
39
+ SYNTHESIZER,
40
+ VISIBLE_RECOVERY_LOG,
41
+ } from "../src/search/constants.mjs";
37
42
  import { runExtractor } from "../src/search/engines.mjs";
38
43
  import {
39
44
  fetchMultipleSources,
40
45
  fetchTopSource,
41
46
  } from "../src/search/fetch-source.mjs";
47
+ import { waitForChallengeCleared } from "../src/search/challenge-detect.mjs";
42
48
  import { writeSourcesToFiles } from "../src/search/file-sources.mjs";
43
49
  import { writeOutput } from "../src/search/output.mjs";
44
50
  import {
@@ -51,7 +57,11 @@ import {
51
57
  mergeFetchDataIntoSources,
52
58
  } from "../src/search/sources.mjs";
53
59
  import { buildConfidence } from "../src/search/synthesis.mjs";
54
- import { synthesizeWithGemini } from "../src/search/synthesis-runner.mjs";
60
+ import {
61
+ getSynthesisStartUrl,
62
+ normalizeSynthesizer,
63
+ synthesizeResults,
64
+ } from "../src/search/synthesis-runner.mjs";
55
65
  import { normalizeQuery } from "../src/search/query.mjs";
56
66
  import { runResearchMode } from "../src/search/research.mjs";
57
67
 
@@ -69,6 +79,18 @@ function loadUserConfig() {
69
79
  return {};
70
80
  }
71
81
 
82
+ function logVisibleRecovery(event) {
83
+ try {
84
+ appendFileSync(
85
+ VISIBLE_RECOVERY_LOG,
86
+ `${JSON.stringify({ at: new Date().toISOString(), ...event })}\n`,
87
+ "utf8",
88
+ );
89
+ } catch {
90
+ // Best-effort diagnostics only. Never fail a search because logging failed.
91
+ }
92
+ }
93
+
72
94
  /** Read query/prompt from stdin (used with --stdin to avoid command-line leakage) */
73
95
  async function readStdin() {
74
96
  return new Promise((resolve) => {
@@ -89,16 +111,20 @@ async function main() {
89
111
  `${[
90
112
  'Usage: node search.mjs <engine> "<query>"',
91
113
  "",
92
- "Engines: perplexity (p), bing (b), google (g), gemini (gem), all",
114
+ "Engines: all, perplexity (p), google (g), chatgpt (gpt), gemini (gem), semantic-scholar (s2), logically (log), bing (b)",
93
115
  "",
94
116
  "Flags:",
95
- " --fast Quick mode: no source fetching or synthesis",
96
- " --synthesize Deprecated: synthesis is now default for multi-engine",
97
- " --deep-research Deprecated: source fetching is now default",
117
+ " --synthesize For engine=all: synthesize fetched sources",
118
+ " --synthesizer <engine> Synthesis engine (default from ~/.pi/greedyconfig)",
119
+ " --fast Legacy quick mode: no source fetching or synthesis",
120
+ " --depth <mode> Legacy: fast|standard|deep aliases, or research",
121
+ " --deep-research Deprecated alias for --research",
98
122
  " --research Iterative query/learnings loop (alias: --depth research)",
99
123
  " --breadth <n> Research mode query breadth, 1-5 (default: 3)",
100
124
  " --iterations <n> Research mode rounds, 1-3 (default: 2)",
101
125
  " --max-sources <n> Research mode fetched source cap, 3-12",
126
+ " --research-out-dir <dir> Write research bundle to a specific directory",
127
+ " --no-research-bundle Disable the default .pi/greedysearch-research bundle",
102
128
  " --fetch-top-source Fetch content from top source",
103
129
  " --inline Output JSON to stdout (for piping)",
104
130
  " --locale <lang> Force results language (en, de, fr, etc.)",
@@ -112,10 +138,11 @@ async function main() {
112
138
  " GREEDY_SEARCH_LOCALE Default locale (default: en)",
113
139
  "",
114
140
  "Examples:",
115
- ' node search.mjs all "Node.js streams" # Default: sources + synthesis',
116
- ' node search.mjs all "quick check" --fast # Fast: no sources/synthesis',
141
+ ' node search.mjs all "Node.js streams" # Grounded: engines + fetched sources',
142
+ ' node search.mjs all "Node.js streams" --synthesize # Add Gemini synthesis',
143
+ ' node search.mjs all "quick check" --fast # Legacy fast: no sources/synthesis',
117
144
  ' node search.mjs all "browser automation" --research --breadth 3 --iterations 2',
118
- ' node search.mjs p "what is memoization" # Single engine: fast mode',
145
+ ' node search.mjs p "what is memoization" # Single engine search',
119
146
  ].join("\n")}\n`,
120
147
  );
121
148
  process.exit(1);
@@ -129,6 +156,11 @@ async function main() {
129
156
  process.env.GREEDY_SEARCH_VISIBLE = "1";
130
157
  process.env.GREEDY_SEARCH_ALWAYS_VISIBLE = "1";
131
158
  delete process.env.GREEDY_SEARCH_HEADLESS;
159
+ } else if (process.env.GREEDY_SEARCH_VISIBLE !== "1") {
160
+ // Establish the desired mode BEFORE ensureChrome() so a stale visible
161
+ // recovery browser is switched back to headless before research planning
162
+ // and Gemini synthesis tabs are opened.
163
+ process.env.GREEDY_SEARCH_HEADLESS = "1";
132
164
  }
133
165
 
134
166
  await ensureChrome();
@@ -136,41 +168,44 @@ async function main() {
136
168
  // Track activity for headless idle timeout
137
169
  touchActivity();
138
170
 
139
- // Depth modes: fast (no synthesis/fetch), standard (synthesis+fetch 5 sources)
140
171
  const depthIdx = args.indexOf("--depth");
141
- let depth = "standard"; // DEFAULT: synthesis + source fetch
142
-
143
- if (depthIdx !== -1 && args[depthIdx + 1]) {
144
- depth = args[depthIdx + 1];
145
- } else if (args.includes("--fast")) {
146
- depth = "fast"; // Explicit fast mode requested
147
- }
148
-
149
- // For single engine (not "all"), default to fast unless explicit
172
+ const legacyDepth =
173
+ depthIdx !== -1 && args[depthIdx + 1]
174
+ ? args[depthIdx + 1].toLowerCase()
175
+ : null;
150
176
  const engineArg = args.find((a) => !a.startsWith("--"))?.toLowerCase();
151
- if (engineArg !== "all" && depthIdx === -1 && !args.includes("--fast")) {
152
- depth = "fast";
153
- }
177
+ const researchMode =
178
+ args.includes("--research") ||
179
+ args.includes("--deep-research") ||
180
+ legacyDepth === "research";
181
+ const legacyFast = args.includes("--fast") || legacyDepth === "fast";
182
+ const legacySynthesisDepth =
183
+ legacyDepth === "standard" ||
184
+ legacyDepth === "deep" ||
185
+ args.includes("--deep");
186
+ const shouldFetchSources = engineArg === "all" && !legacyFast;
187
+ const shouldSynthesize =
188
+ engineArg === "all" &&
189
+ !legacyFast &&
190
+ (args.includes("--synthesize") || legacySynthesisDepth);
191
+ const groundedSynthesis = legacyDepth === "deep" || args.includes("--deep");
154
192
 
155
- // --deep-research / --deep flags map to deep mode (backward compat)
156
193
  if (args.includes("--deep-research")) {
157
- depth = "standard";
158
194
  process.stderr.write(
159
- "[greedysearch] --deep-research is deprecated; use --depth standard (now default)\n",
195
+ "[greedysearch] --deep-research is deprecated; use --research or --depth research\n",
160
196
  );
161
197
  }
162
- if (args.includes("--deep")) {
163
- depth = "deep";
164
- }
165
- if (args.includes("--research")) {
166
- depth = "research";
167
- }
168
- if (args.includes("--synthesize")) {
198
+ if (legacySynthesisDepth) {
169
199
  process.stderr.write(
170
- "[greedysearch] --synthesize is deprecated; synthesis is now default for multi-engine\n",
200
+ "[greedysearch] depth fast|standard|deep is deprecated; use default grounded search plus --synthesize when needed\n",
171
201
  );
172
202
  }
173
203
 
204
+ const synthesizerIdx = args.indexOf("--synthesizer");
205
+ const synthesizer = normalizeSynthesizer(
206
+ synthesizerIdx === -1 ? SYNTHESIZER : args[synthesizerIdx + 1],
207
+ );
208
+
174
209
  const full = args.includes("--full");
175
210
  const short = !full;
176
211
  const fetchSource = args.includes("--fetch-top-source");
@@ -183,9 +218,10 @@ async function main() {
183
218
  iterationsIdx === -1 ? undefined : args[iterationsIdx + 1];
184
219
  const researchMaxSources =
185
220
  maxSourcesIdx === -1 ? undefined : args[maxSourcesIdx + 1];
186
- // Headless is the default — only disable if GREEDY_SEARCH_VISIBLE=1
187
- if (process.env.GREEDY_SEARCH_VISIBLE !== "1")
188
- process.env.GREEDY_SEARCH_HEADLESS = "1";
221
+ const researchOutDirIdx = args.indexOf("--research-out-dir");
222
+ const researchOutDir =
223
+ researchOutDirIdx === -1 ? undefined : args[researchOutDirIdx + 1];
224
+ const writeResearchBundle = !args.includes("--no-research-bundle");
189
225
  const outIdx = args.indexOf("--out");
190
226
  const outFile = outIdx === -1 ? null : args[outIdx + 1];
191
227
 
@@ -218,18 +254,23 @@ async function main() {
218
254
  a !== "--visible" &&
219
255
  a !== "--always-visible" &&
220
256
  a !== "--depth" &&
257
+ a !== "--synthesizer" &&
221
258
  a !== "--out" &&
222
259
  a !== "--locale" &&
223
260
  a !== "--breadth" &&
224
261
  a !== "--iterations" &&
225
262
  a !== "--max-sources" &&
263
+ a !== "--research-out-dir" &&
264
+ a !== "--no-research-bundle" &&
226
265
  a !== "--help" &&
227
266
  (depthIdx === -1 || i !== depthIdx + 1) &&
267
+ (synthesizerIdx === -1 || i !== synthesizerIdx + 1) &&
228
268
  (outIdx === -1 || i !== outIdx + 1) &&
229
269
  (localeIdx === -1 || i !== localeIdx + 1) &&
230
270
  (breadthIdx === -1 || i !== breadthIdx + 1) &&
231
271
  (iterationsIdx === -1 || i !== iterationsIdx + 1) &&
232
- (maxSourcesIdx === -1 || i !== maxSourcesIdx + 1),
272
+ (maxSourcesIdx === -1 || i !== maxSourcesIdx + 1) &&
273
+ (researchOutDirIdx === -1 || i !== researchOutDirIdx + 1),
233
274
  );
234
275
  const engine = rest[0]?.toLowerCase();
235
276
  // Read query from stdin when --stdin flag is set (avoids leaking query in process table)
@@ -241,7 +282,7 @@ async function main() {
241
282
  query = rest.slice(1).join(" ");
242
283
  }
243
284
 
244
- if (depth === "research") {
285
+ if (researchMode) {
245
286
  if (engine !== "all") {
246
287
  process.stderr.write(
247
288
  `[greedysearch] Research mode uses all engines; ignoring engine "${engine}".\n`,
@@ -254,6 +295,8 @@ async function main() {
254
295
  maxSources: researchMaxSources,
255
296
  locale,
256
297
  short,
298
+ writeBundle: writeResearchBundle,
299
+ researchOutDir,
257
300
  });
258
301
  writeOutput(out, outFile, {
259
302
  inline,
@@ -270,8 +313,11 @@ async function main() {
270
313
  // engine homepage so extractors can skip the initial navigation.
271
314
  const ENGINE_START_URLS = {
272
315
  perplexity: "https://www.perplexity.ai/",
273
- bing: "https://copilot.microsoft.com/",
274
316
  google: "https://www.google.com/",
317
+ "semantic-scholar": "https://www.semanticscholar.org/",
318
+ semanticscholar: "https://www.semanticscholar.org/",
319
+ s2: "https://www.semanticscholar.org/",
320
+ logically: "https://logically.app/research-assistant/",
275
321
  };
276
322
  const engineTabs = await Promise.all(
277
323
  ALL_ENGINES.map((e) => openNewTab(ENGINE_START_URLS[e])),
@@ -280,11 +326,10 @@ async function main() {
280
326
  await cdp(["list"]);
281
327
 
282
328
  // Time-bounded per-engine extraction so slow engines don't stall the batch.
283
- // Bing can take a little longer than Google/Perplexity under CDP contention;
284
- // keep fast mode bounded while avoiding most false recovery trips.
285
329
  const engineTimeoutFor = (engineName) => {
286
- if (depth !== "fast") return 55000;
287
- return engineName === "bing" ? 40000 : 30000;
330
+ if (!legacyFast) return 70000;
331
+ // ChatGPT needs ~25-30s solo; under CDP contention needs more headroom
332
+ return engineName === "chatgpt" ? 60000 : 35000;
288
333
  };
289
334
 
290
335
  try {
@@ -316,7 +361,19 @@ async function main() {
316
361
  if (r.status === "fulfilled") {
317
362
  out[r.value.engine] = r.value;
318
363
  } else {
319
- out[ALL_ENGINES[i]] = { error: r.reason?.message || "unknown error" };
364
+ const err = r.reason;
365
+ const msg = err?.message || "unknown error";
366
+ out[ALL_ENGINES[i]] = { error: msg };
367
+ if (err?.lastStage) {
368
+ process.stderr.write(
369
+ `[greedysearch] ${ALL_ENGINES[i]} failed at stage '${err.lastStage}': ${msg}\n`,
370
+ );
371
+ }
372
+ if (err?.partialErr) {
373
+ process.stderr.write(
374
+ `[greedysearch] ${ALL_ENGINES[i]} tail stderr:\n${err.partialErr}\n`,
375
+ );
376
+ }
320
377
  }
321
378
  }
322
379
 
@@ -331,6 +388,20 @@ async function main() {
331
388
  recoveryCandidates.length > 0 &&
332
389
  process.env.GREEDY_SEARCH_VISIBLE !== "1"
333
390
  ) {
391
+ logVisibleRecovery({
392
+ scope: "all",
393
+ phase: "start",
394
+ engines: recoveryCandidates,
395
+ reasons: Object.fromEntries(
396
+ recoveryCandidates.map((engineName) => [
397
+ engineName,
398
+ {
399
+ error: out[engineName]?.error || null,
400
+ envelope: out[engineName]?._envelope || null,
401
+ },
402
+ ]),
403
+ ),
404
+ });
334
405
  process.stderr.write(
335
406
  `[greedysearch] 🔓 Headless ${recoveryCandidates.join(", ")} search hit timeout/verification/antibot signals — retrying visible to establish cookies...\n`,
336
407
  );
@@ -431,22 +502,94 @@ async function main() {
431
502
  stillBlocked.push(...secondStillBlocked);
432
503
  }
433
504
 
505
+ logVisibleRecovery({
506
+ scope: "all",
507
+ phase: stillBlocked.length > 0 ? "needs-human" : "success",
508
+ engines: recoveryCandidates,
509
+ results: Object.fromEntries(
510
+ recoveryCandidates.map((engineName) => [
511
+ engineName,
512
+ {
513
+ mode: out[engineName]?._envelope?.mode || null,
514
+ durationMs: out[engineName]?._envelope?.durationMs || null,
515
+ lastStage: out[engineName]?._envelope?.lastStage || null,
516
+ error: out[engineName]?.error || null,
517
+ },
518
+ ]),
519
+ ),
520
+ });
521
+
434
522
  if (stillBlocked.length > 0) {
435
523
  for (const blockedEngine of stillBlocked) {
436
524
  process.stderr.write(`PROGRESS:${blockedEngine}:needs-human\n`);
437
525
  }
438
- keepVisibleForHuman = true;
439
- out._needsHumanVerification = {
440
- engines: stillBlocked,
441
- message:
442
- "Visible Chrome is open with the engine page loaded. Solve the Turnstile checkbox or other challenge in the visible window to store cookies. Cookies persist for future runs.",
443
- };
444
- process.stderr.write(
445
- `[greedysearch] 🔓 ${stillBlocked.join(", ")} still blocked — keeping visible Chrome open. Solve the challenge in the window to store cookies, then rerun.\n`,
526
+
527
+ // Poll for the user to solve any remaining challenges in
528
+ // visible Chrome. If a per-engine challenge clears, retry
529
+ // that engine's extractor on the cleared tab. Fall back to
530
+ // the existing _needsHumanVerification envelope only if the
531
+ // polling budget is exhausted.
532
+ const allPollResults = await Promise.all(
533
+ stillBlocked.map(async (blockedEngine) => {
534
+ const tab =
535
+ retryTabs[recoveryCandidates.indexOf(blockedEngine)];
536
+ const result = await waitForChallengeCleared({
537
+ tab,
538
+ engine: blockedEngine,
539
+ }).catch((pollErr) => ({
540
+ cleared: false,
541
+ reason: pollErr.message || String(pollErr),
542
+ }));
543
+ return { engine: blockedEngine, tab, ...result };
544
+ }),
545
+ );
546
+ const clearedEngines = allPollResults.filter((p) => p.cleared);
547
+ if (clearedEngines.length > 0) {
548
+ process.stderr.write(
549
+ `[greedysearch] 🔄 Auto-resuming ${clearedEngines.map((p) => p.engine).join(", ")} on cleared tabs...\n`,
550
+ );
551
+ await Promise.allSettled(
552
+ clearedEngines.map(async (p) => {
553
+ const script = ENGINES[p.engine];
554
+ try {
555
+ const result = await runExtractor(
556
+ script,
557
+ query,
558
+ p.tab,
559
+ short,
560
+ null,
561
+ locale,
562
+ );
563
+ out[p.engine] = result;
564
+ process.stderr.write(`PROGRESS:${p.engine}:done\n`);
565
+ } catch (resumeErr) {
566
+ process.stderr.write(
567
+ `[greedysearch] ⚠️ Resume extraction failed for ${p.engine}: ${resumeErr.message}\n`,
568
+ );
569
+ }
570
+ }),
571
+ );
572
+ }
573
+ const stillStillBlocked = stillBlocked.filter(
574
+ (e) => !clearedEngines.find((p) => p.engine === e),
446
575
  );
447
- // Visible Chrome stays open so the user can interact with any
448
- // Turnstile/Cloudflare challenge. Once solved, cookies are stored
449
- // in the shared profile and future headless runs will reuse them.
576
+ if (stillStillBlocked.length === 0) {
577
+ // All blocked engines cleared and resumed successfully
578
+ keepVisibleForHuman = false;
579
+ } else {
580
+ keepVisibleForHuman = true;
581
+ out._needsHumanVerification = {
582
+ engines: stillStillBlocked,
583
+ message:
584
+ "Visible Chrome is open with the engine page loaded. Solve the Turnstile checkbox or other challenge in the visible window to store cookies. Cookies persist for future runs.",
585
+ };
586
+ process.stderr.write(
587
+ `[greedysearch] 🔓 ${stillStillBlocked.join(", ")} still blocked — keeping visible Chrome open. Solve the challenge in the window to store cookies, then rerun.\n`,
588
+ );
589
+ // Visible Chrome stays open so the user can interact with any
590
+ // Turnstile/Cloudflare challenge. Once solved, cookies are stored
591
+ // in the shared profile and future headless runs will reuse them.
592
+ }
450
593
  }
451
594
  } finally {
452
595
  if (keepVisibleForHuman) {
@@ -488,18 +631,9 @@ async function main() {
488
631
  // Build a canonical source registry across all engines
489
632
  out._sources = buildSourceRegistry(out, query);
490
633
 
491
- // Pre-navigate Gemini tab in parallel with source fetch so the page
492
- // is already loaded when synthesis starts — saves ~4s of nav time.
493
- let geminiTabPromise = null;
494
- if (depth !== "fast") {
495
- geminiTabPromise = openNewTab("https://gemini.google.com/app").catch(
496
- () => null,
497
- );
498
- }
499
-
500
634
  // Source fetching: default for all "all" searches
501
635
  // Fetch all sources in a single batch (concurrency = source count).
502
- if (depth !== "fast" && out._sources.length > 0) {
636
+ if (shouldFetchSources && out._sources.length > 0) {
503
637
  process.stderr.write("PROGRESS:source-fetch:start\n");
504
638
  const fetchedSources = await fetchMultipleSources(
505
639
  out._sources,
@@ -512,29 +646,41 @@ async function main() {
512
646
  process.stderr.write("PROGRESS:source-fetch:done\n");
513
647
  }
514
648
 
515
- // Synthesize with Gemini for all non-fast modes
516
- if (depth !== "fast") {
649
+ // Optional engine-agnostic synthesis for multi-engine searches.
650
+ // Open the synthesizer tab HERE (after source fetch) instead of
651
+ // pre-opening before source fetch. Pre-opening was fragile: Chrome could
652
+ // be killed during visible recovery or idle-timeout between source fetch
653
+ // and synthesis, leaving a stale tab ID that causes "No target matching prefix".
654
+ if (shouldSynthesize) {
517
655
  process.stderr.write("PROGRESS:synthesis:start\n");
518
656
  process.stderr.write(
519
- "[greedysearch] Synthesizing results with Gemini...\n",
657
+ `[greedysearch] Synthesizing results with ${synthesizer}...\n`,
520
658
  );
659
+ let synthesisTab = null;
521
660
  try {
522
- const geminiTab = (await geminiTabPromise) ?? (await openNewTab());
523
- const synthesis = await synthesizeWithGemini(query, out, {
524
- grounded: depth === "deep",
525
- tabPrefix: geminiTab,
661
+ synthesisTab = await openNewTab(getSynthesisStartUrl(synthesizer));
662
+ const synthesis = await synthesizeResults(query, out, {
663
+ grounded: groundedSynthesis,
664
+ tabPrefix: synthesisTab,
665
+ visible: process.env.GREEDY_SEARCH_VISIBLE === "1",
666
+ synthesizer,
526
667
  });
527
668
  out._synthesis = {
528
669
  ...synthesis,
529
670
  synthesized: true,
530
671
  };
531
- await closeTab(geminiTab);
532
672
  process.stderr.write("PROGRESS:synthesis:done\n");
533
673
  } catch (e) {
534
674
  process.stderr.write(
535
675
  `[greedysearch] Synthesis failed: ${e.message}\n`,
536
676
  );
537
- out._synthesis = { error: e.message, synthesized: false };
677
+ out._synthesis = {
678
+ error: e.message,
679
+ synthesized: false,
680
+ synthesizedBy: synthesizer,
681
+ };
682
+ } finally {
683
+ if (synthesisTab) await closeTab(synthesisTab);
538
684
  }
539
685
  }
540
686
 
@@ -544,12 +690,12 @@ async function main() {
544
690
  out._topSource = await fetchTopSource(top.canonicalUrl || top.url);
545
691
  }
546
692
 
547
- // Always include confidence metrics for non-fast searches
548
- if (depth !== "fast") out._confidence = buildConfidence(out);
693
+ // Include confidence metrics for grounded multi-engine searches.
694
+ if (!legacyFast) out._confidence = buildConfidence(out);
549
695
 
550
696
  writeOutput(out, outFile, {
551
697
  inline,
552
- synthesize: depth !== "fast",
698
+ synthesize: shouldSynthesize,
553
699
  query,
554
700
  });
555
701
  return;
@@ -585,13 +731,31 @@ async function main() {
585
731
  ? "bing"
586
732
  : script.includes("perplexity")
587
733
  ? "perplexity"
588
- : null;
734
+ : script.includes("chatgpt")
735
+ ? "chatgpt"
736
+ : script.includes("semantic-scholar")
737
+ ? "semantic-scholar"
738
+ : script.includes("logically")
739
+ ? "logically"
740
+ : null;
589
741
  const canRetryVisible =
590
742
  recoveryEngine &&
591
743
  process.env.GREEDY_SEARCH_VISIBLE !== "1" &&
592
744
  isHeadlessBlockedResult(e);
593
745
 
594
746
  if (canRetryVisible) {
747
+ logVisibleRecovery({
748
+ scope: "single",
749
+ phase: "start",
750
+ engines: [recoveryEngine],
751
+ reasons: {
752
+ [recoveryEngine]: {
753
+ error: e.message || null,
754
+ envelope: e.envelope || null,
755
+ lastStage: e.lastStage || null,
756
+ },
757
+ },
758
+ });
595
759
  process.stderr.write(
596
760
  `[greedysearch] 🔓 ${recoveryEngine} blocked in headless — retrying visible to establish cookies...\n`,
597
761
  );
@@ -612,14 +776,87 @@ async function main() {
612
776
  null,
613
777
  locale,
614
778
  );
779
+ logVisibleRecovery({
780
+ scope: "single",
781
+ phase: "success",
782
+ engines: [recoveryEngine],
783
+ result: {
784
+ engine: recoveryEngine,
785
+ mode: result._envelope?.mode || null,
786
+ durationMs: result._envelope?.durationMs || null,
787
+ lastStage: result._envelope?.lastStage || null,
788
+ },
789
+ });
615
790
  if (fetchSource && result.sources?.length > 0) {
616
791
  result.topSource = await fetchTopSource(result.sources[0].url);
617
792
  }
618
793
  writeOutput(result, outFile, { inline, synthesize: false, query });
619
794
  return;
620
795
  } catch (retryErr) {
621
- // Any visible retry failure: keep Chrome open so user can solve Turnstile.
622
- // Once solved, cookies are stored in the shared profile for future headless runs.
796
+ logVisibleRecovery({
797
+ scope: "single",
798
+ phase: "needs-human",
799
+ engines: [recoveryEngine],
800
+ result: {
801
+ engine: recoveryEngine,
802
+ error: retryErr.message || String(retryErr),
803
+ envelope: retryErr.envelope || null,
804
+ },
805
+ });
806
+ // Any visible retry failure: poll for the user to solve the challenge in
807
+ // visible Chrome. If the page transitions past the challenge (cookies
808
+ // cleared, chat UI rendered, Turnstile iframe gone), automatically retry
809
+ // the extractor so the user does not need to rerun manually. Fall back
810
+ // to the existing _needsHumanVerification envelope only if the polling
811
+ // budget is exhausted.
812
+ const pollResult = await waitForChallengeCleared({
813
+ tab: retryTab,
814
+ engine: recoveryEngine,
815
+ }).catch((pollErr) => ({
816
+ cleared: false,
817
+ reason: pollErr.message || String(pollErr),
818
+ }));
819
+
820
+ if (pollResult.cleared) {
821
+ process.stderr.write(
822
+ `[greedysearch] 🔄 Auto-resuming ${recoveryEngine} extraction on the now-cleared tab...\n`,
823
+ );
824
+ try {
825
+ const result = await runExtractor(
826
+ script,
827
+ query,
828
+ retryTab,
829
+ short,
830
+ null,
831
+ locale,
832
+ );
833
+ logVisibleRecovery({
834
+ scope: "single",
835
+ phase: "success-after-poll",
836
+ engines: [recoveryEngine],
837
+ result: {
838
+ engine: recoveryEngine,
839
+ mode: result._envelope?.mode || null,
840
+ durationMs: result._envelope?.durationMs || null,
841
+ lastStage: result._envelope?.lastStage || null,
842
+ },
843
+ });
844
+ if (fetchSource && result.sources?.length > 0) {
845
+ result.topSource = await fetchTopSource(result.sources[0].url);
846
+ }
847
+ writeOutput(result, outFile, { inline, synthesize: false, query });
848
+ return;
849
+ } catch (resumeErr) {
850
+ process.stderr.write(
851
+ `[greedysearch] ⚠️ Resume extraction failed: ${resumeErr.message}\n`,
852
+ );
853
+ // Fall through to needs-human with the resume error context
854
+ }
855
+ }
856
+
857
+ // Polling timed out (or resume extraction failed) — keep Chrome open so the
858
+ // user can solve Turnstile. Once solved, cookies are stored in the shared
859
+ // profile for future headless runs.
623
860
  keepVisibleForHuman = true;
624
861
  writeOutput(
625
862
  {