@apmantza/greedysearch-pi 1.9.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/launch.mjs CHANGED
@@ -76,6 +76,15 @@ const BASE_CHROME_FLAGS = [
76
76
  "--window-size=1920,1080",
77
77
  "--lang=en-US",
78
78
  "--force-color-profile=srgb",
79
+ // Background-tab throttling kills parallel extractions: Chrome clamps
80
+ // setTimeout to ~1Hz in unfocused tabs, so a streaming response that
81
+ // finishes in 5s solo takes 60s+ when 4 engines share one Chrome.
82
+ // The trio below restores full-speed JS in every tab. Safe for our
83
+ // anti-bot stealth — Cloudflare detects CDP/webdriver artifacts, not
84
+ // timer-throttling behavior. Same flags Playwright/Puppeteer add.
85
+ "--disable-background-timer-throttling",
86
+ "--disable-renderer-backgrounding",
87
+ "--disable-backgrounding-occluded-windows",
79
88
  ];
80
89
 
81
90
  function getChromeVersion(chromePath) {
package/bin/search.mjs CHANGED
@@ -20,7 +20,7 @@
20
20
  // node search.mjs gem "latest React features"
21
21
  // node search.mjs all "how does TCP congestion control work"
22
22
 
23
- import { existsSync, readFileSync } from "node:fs";
23
+ import { appendFileSync, existsSync, readFileSync } from "node:fs";
24
24
  // Config file for user defaults
25
25
  import { homedir } from "node:os";
26
26
  import { join } from "node:path";
@@ -33,7 +33,12 @@ import {
33
33
  openNewTab,
34
34
  touchActivity,
35
35
  } from "../src/search/chrome.mjs";
36
- import { ALL_ENGINES, ENGINES } from "../src/search/constants.mjs";
36
+ import {
37
+ ALL_ENGINES,
38
+ ENGINES,
39
+ SYNTHESIZER,
40
+ VISIBLE_RECOVERY_LOG,
41
+ } from "../src/search/constants.mjs";
37
42
  import { runExtractor } from "../src/search/engines.mjs";
38
43
  import {
39
44
  fetchMultipleSources,
@@ -51,7 +56,11 @@ import {
51
56
  mergeFetchDataIntoSources,
52
57
  } from "../src/search/sources.mjs";
53
58
  import { buildConfidence } from "../src/search/synthesis.mjs";
54
- import { synthesizeWithGemini } from "../src/search/synthesis-runner.mjs";
59
+ import {
60
+ getSynthesisStartUrl,
61
+ normalizeSynthesizer,
62
+ synthesizeResults,
63
+ } from "../src/search/synthesis-runner.mjs";
55
64
  import { normalizeQuery } from "../src/search/query.mjs";
56
65
  import { runResearchMode } from "../src/search/research.mjs";
57
66
 
@@ -69,6 +78,18 @@ function loadUserConfig() {
69
78
  return {};
70
79
  }
71
80
 
81
+ function logVisibleRecovery(event) {
82
+ try {
83
+ appendFileSync(
84
+ VISIBLE_RECOVERY_LOG,
85
+ `${JSON.stringify({ at: new Date().toISOString(), ...event })}\n`,
86
+ "utf8",
87
+ );
88
+ } catch {
89
+ // Best-effort diagnostics only. Never fail a search because logging failed.
90
+ }
91
+ }
92
+
72
93
  /** Read query/prompt from stdin (used with --stdin to avoid command-line leakage) */
73
94
  async function readStdin() {
74
95
  return new Promise((resolve) => {
@@ -89,16 +110,20 @@ async function main() {
89
110
  `${[
90
111
  'Usage: node search.mjs <engine> "<query>"',
91
112
  "",
92
- "Engines: perplexity (p), bing (b), google (g), gemini (gem), all",
113
+ "Engines: all, perplexity (p), google (g), chatgpt (gpt), gemini (gem), semantic-scholar (s2), logically (log), bing (b)",
93
114
  "",
94
115
  "Flags:",
95
- " --fast Quick mode: no source fetching or synthesis",
96
- " --synthesize Deprecated: synthesis is now default for multi-engine",
97
- " --deep-research Deprecated: source fetching is now default",
116
+ " --synthesize For engine=all: synthesize fetched sources",
117
+ " --synthesizer <engine> Synthesis engine (default from ~/.pi/greedyconfig)",
118
+ " --fast Legacy quick mode: no source fetching or synthesis",
119
+ " --depth <mode> Legacy: fast|standard|deep aliases, or research",
120
+ " --deep-research Deprecated alias for --research",
98
121
  " --research Iterative query/learnings loop (alias: --depth research)",
99
122
  " --breadth <n> Research mode query breadth, 1-5 (default: 3)",
100
123
  " --iterations <n> Research mode rounds, 1-3 (default: 2)",
101
124
  " --max-sources <n> Research mode fetched source cap, 3-12",
125
+ " --research-out-dir <dir> Write research bundle to a specific directory",
126
+ " --no-research-bundle Disable the default .pi/greedysearch-research bundle",
102
127
  " --fetch-top-source Fetch content from top source",
103
128
  " --inline Output JSON to stdout (for piping)",
104
129
  " --locale <lang> Force results language (en, de, fr, etc.)",
@@ -112,10 +137,11 @@ async function main() {
112
137
  " GREEDY_SEARCH_LOCALE Default locale (default: en)",
113
138
  "",
114
139
  "Examples:",
115
- ' node search.mjs all "Node.js streams" # Default: sources + synthesis',
116
- ' node search.mjs all "quick check" --fast # Fast: no sources/synthesis',
140
+ ' node search.mjs all "Node.js streams" # Grounded: engines + fetched sources',
141
+ ' node search.mjs all "Node.js streams" --synthesize # Add Gemini synthesis',
142
+ ' node search.mjs all "quick check" --fast # Legacy fast: no sources/synthesis',
117
143
  ' node search.mjs all "browser automation" --research --breadth 3 --iterations 2',
118
- ' node search.mjs p "what is memoization" # Single engine: fast mode',
144
+ ' node search.mjs p "what is memoization" # Single engine search',
119
145
  ].join("\n")}\n`,
120
146
  );
121
147
  process.exit(1);
@@ -129,6 +155,11 @@ async function main() {
129
155
  process.env.GREEDY_SEARCH_VISIBLE = "1";
130
156
  process.env.GREEDY_SEARCH_ALWAYS_VISIBLE = "1";
131
157
  delete process.env.GREEDY_SEARCH_HEADLESS;
158
+ } else if (process.env.GREEDY_SEARCH_VISIBLE !== "1") {
159
+ // Establish the desired mode BEFORE ensureChrome() so a stale visible
160
+ // recovery browser is switched back to headless before research planning
161
+ // and Gemini synthesis tabs are opened.
162
+ process.env.GREEDY_SEARCH_HEADLESS = "1";
132
163
  }
133
164
 
134
165
  await ensureChrome();
@@ -136,41 +167,44 @@ async function main() {
136
167
  // Track activity for headless idle timeout
137
168
  touchActivity();
138
169
 
139
- // Depth modes: fast (no synthesis/fetch), standard (synthesis+fetch 5 sources)
140
170
  const depthIdx = args.indexOf("--depth");
141
- let depth = "standard"; // DEFAULT: synthesis + source fetch
142
-
143
- if (depthIdx !== -1 && args[depthIdx + 1]) {
144
- depth = args[depthIdx + 1];
145
- } else if (args.includes("--fast")) {
146
- depth = "fast"; // Explicit fast mode requested
147
- }
148
-
149
- // For single engine (not "all"), default to fast unless explicit
171
+ const legacyDepth =
172
+ depthIdx !== -1 && args[depthIdx + 1]
173
+ ? args[depthIdx + 1].toLowerCase()
174
+ : null;
150
175
  const engineArg = args.find((a) => !a.startsWith("--"))?.toLowerCase();
151
- if (engineArg !== "all" && depthIdx === -1 && !args.includes("--fast")) {
152
- depth = "fast";
153
- }
176
+ const researchMode =
177
+ args.includes("--research") ||
178
+ args.includes("--deep-research") ||
179
+ legacyDepth === "research";
180
+ const legacyFast = args.includes("--fast") || legacyDepth === "fast";
181
+ const legacySynthesisDepth =
182
+ legacyDepth === "standard" ||
183
+ legacyDepth === "deep" ||
184
+ args.includes("--deep");
185
+ const shouldFetchSources = engineArg === "all" && !legacyFast;
186
+ const shouldSynthesize =
187
+ engineArg === "all" &&
188
+ !legacyFast &&
189
+ (args.includes("--synthesize") || legacySynthesisDepth);
190
+ const groundedSynthesis = legacyDepth === "deep" || args.includes("--deep");
154
191
 
155
- // --deep-research / --deep flags map to deep mode (backward compat)
156
192
  if (args.includes("--deep-research")) {
157
- depth = "standard";
158
193
  process.stderr.write(
159
- "[greedysearch] --deep-research is deprecated; use --depth standard (now default)\n",
194
+ "[greedysearch] --deep-research is deprecated; use --research or --depth research\n",
160
195
  );
161
196
  }
162
- if (args.includes("--deep")) {
163
- depth = "deep";
164
- }
165
- if (args.includes("--research")) {
166
- depth = "research";
167
- }
168
- if (args.includes("--synthesize")) {
197
+ if (legacySynthesisDepth) {
169
198
  process.stderr.write(
170
- "[greedysearch] --synthesize is deprecated; synthesis is now default for multi-engine\n",
199
+ "[greedysearch] depth fast|standard|deep is deprecated; use default grounded search plus --synthesize when needed\n",
171
200
  );
172
201
  }
173
202
 
203
+ const synthesizerIdx = args.indexOf("--synthesizer");
204
+ const synthesizer = normalizeSynthesizer(
205
+ synthesizerIdx === -1 ? SYNTHESIZER : args[synthesizerIdx + 1],
206
+ );
207
+
174
208
  const full = args.includes("--full");
175
209
  const short = !full;
176
210
  const fetchSource = args.includes("--fetch-top-source");
@@ -183,9 +217,10 @@ async function main() {
183
217
  iterationsIdx === -1 ? undefined : args[iterationsIdx + 1];
184
218
  const researchMaxSources =
185
219
  maxSourcesIdx === -1 ? undefined : args[maxSourcesIdx + 1];
186
- // Headless is the default — only disable if GREEDY_SEARCH_VISIBLE=1
187
- if (process.env.GREEDY_SEARCH_VISIBLE !== "1")
188
- process.env.GREEDY_SEARCH_HEADLESS = "1";
220
+ const researchOutDirIdx = args.indexOf("--research-out-dir");
221
+ const researchOutDir =
222
+ researchOutDirIdx === -1 ? undefined : args[researchOutDirIdx + 1];
223
+ const writeResearchBundle = !args.includes("--no-research-bundle");
189
224
  const outIdx = args.indexOf("--out");
190
225
  const outFile = outIdx === -1 ? null : args[outIdx + 1];
191
226
 
@@ -218,18 +253,23 @@ async function main() {
218
253
  a !== "--visible" &&
219
254
  a !== "--always-visible" &&
220
255
  a !== "--depth" &&
256
+ a !== "--synthesizer" &&
221
257
  a !== "--out" &&
222
258
  a !== "--locale" &&
223
259
  a !== "--breadth" &&
224
260
  a !== "--iterations" &&
225
261
  a !== "--max-sources" &&
262
+ a !== "--research-out-dir" &&
263
+ a !== "--no-research-bundle" &&
226
264
  a !== "--help" &&
227
265
  (depthIdx === -1 || i !== depthIdx + 1) &&
266
+ (synthesizerIdx === -1 || i !== synthesizerIdx + 1) &&
228
267
  (outIdx === -1 || i !== outIdx + 1) &&
229
268
  (localeIdx === -1 || i !== localeIdx + 1) &&
230
269
  (breadthIdx === -1 || i !== breadthIdx + 1) &&
231
270
  (iterationsIdx === -1 || i !== iterationsIdx + 1) &&
232
- (maxSourcesIdx === -1 || i !== maxSourcesIdx + 1),
271
+ (maxSourcesIdx === -1 || i !== maxSourcesIdx + 1) &&
272
+ (researchOutDirIdx === -1 || i !== researchOutDirIdx + 1),
233
273
  );
234
274
  const engine = rest[0]?.toLowerCase();
235
275
  // Read query from stdin when --stdin flag is set (avoids leaking query in process table)
@@ -241,7 +281,7 @@ async function main() {
241
281
  query = rest.slice(1).join(" ");
242
282
  }
243
283
 
244
- if (depth === "research") {
284
+ if (researchMode) {
245
285
  if (engine !== "all") {
246
286
  process.stderr.write(
247
287
  `[greedysearch] Research mode uses all engines; ignoring engine "${engine}".\n`,
@@ -254,6 +294,8 @@ async function main() {
254
294
  maxSources: researchMaxSources,
255
295
  locale,
256
296
  short,
297
+ writeBundle: writeResearchBundle,
298
+ researchOutDir,
257
299
  });
258
300
  writeOutput(out, outFile, {
259
301
  inline,
@@ -270,8 +312,11 @@ async function main() {
270
312
  // engine homepage so extractors can skip the initial navigation.
271
313
  const ENGINE_START_URLS = {
272
314
  perplexity: "https://www.perplexity.ai/",
273
- bing: "https://copilot.microsoft.com/",
274
315
  google: "https://www.google.com/",
316
+ "semantic-scholar": "https://www.semanticscholar.org/",
317
+ semanticscholar: "https://www.semanticscholar.org/",
318
+ s2: "https://www.semanticscholar.org/",
319
+ logically: "https://logically.app/research-assistant/",
275
320
  };
276
321
  const engineTabs = await Promise.all(
277
322
  ALL_ENGINES.map((e) => openNewTab(ENGINE_START_URLS[e])),
@@ -280,11 +325,10 @@ async function main() {
280
325
  await cdp(["list"]);
281
326
 
282
327
  // Time-bounded per-engine extraction so slow engines don't stall the batch.
283
- // Bing can take a little longer than Google/Perplexity under CDP contention;
284
- // keep fast mode bounded while avoiding most false recovery trips.
285
328
  const engineTimeoutFor = (engineName) => {
286
- if (depth !== "fast") return 55000;
287
- return engineName === "bing" ? 40000 : 30000;
329
+ if (!legacyFast) return 70000;
330
+ // ChatGPT needs ~25-30s solo; under CDP contention needs more headroom
331
+ return engineName === "chatgpt" ? 60000 : 35000;
288
332
  };
289
333
 
290
334
  try {
@@ -316,7 +360,19 @@ async function main() {
316
360
  if (r.status === "fulfilled") {
317
361
  out[r.value.engine] = r.value;
318
362
  } else {
319
- out[ALL_ENGINES[i]] = { error: r.reason?.message || "unknown error" };
363
+ const err = r.reason;
364
+ const msg = err?.message || "unknown error";
365
+ out[ALL_ENGINES[i]] = { error: msg };
366
+ if (err?.lastStage) {
367
+ process.stderr.write(
368
+ `[greedysearch] ${ALL_ENGINES[i]} failed at stage '${err.lastStage}': ${msg}\n`,
369
+ );
370
+ }
371
+ if (err?.partialErr) {
372
+ process.stderr.write(
373
+ `[greedysearch] ${ALL_ENGINES[i]} tail stderr:\n${err.partialErr}\n`,
374
+ );
375
+ }
320
376
  }
321
377
  }
322
378
 
@@ -331,6 +387,20 @@ async function main() {
331
387
  recoveryCandidates.length > 0 &&
332
388
  process.env.GREEDY_SEARCH_VISIBLE !== "1"
333
389
  ) {
390
+ logVisibleRecovery({
391
+ scope: "all",
392
+ phase: "start",
393
+ engines: recoveryCandidates,
394
+ reasons: Object.fromEntries(
395
+ recoveryCandidates.map((engineName) => [
396
+ engineName,
397
+ {
398
+ error: out[engineName]?.error || null,
399
+ envelope: out[engineName]?._envelope || null,
400
+ },
401
+ ]),
402
+ ),
403
+ });
334
404
  process.stderr.write(
335
405
  `[greedysearch] 🔓 Headless ${recoveryCandidates.join(", ")} search hit timeout/verification/antibot signals — retrying visible to establish cookies...\n`,
336
406
  );
@@ -431,6 +501,23 @@ async function main() {
431
501
  stillBlocked.push(...secondStillBlocked);
432
502
  }
433
503
 
504
+ logVisibleRecovery({
505
+ scope: "all",
506
+ phase: stillBlocked.length > 0 ? "needs-human" : "success",
507
+ engines: recoveryCandidates,
508
+ results: Object.fromEntries(
509
+ recoveryCandidates.map((engineName) => [
510
+ engineName,
511
+ {
512
+ mode: out[engineName]?._envelope?.mode || null,
513
+ durationMs: out[engineName]?._envelope?.durationMs || null,
514
+ lastStage: out[engineName]?._envelope?.lastStage || null,
515
+ error: out[engineName]?.error || null,
516
+ },
517
+ ]),
518
+ ),
519
+ });
520
+
434
521
  if (stillBlocked.length > 0) {
435
522
  for (const blockedEngine of stillBlocked) {
436
523
  process.stderr.write(`PROGRESS:${blockedEngine}:needs-human\n`);
@@ -488,18 +575,9 @@ async function main() {
488
575
  // Build a canonical source registry across all engines
489
576
  out._sources = buildSourceRegistry(out, query);
490
577
 
491
- // Pre-navigate Gemini tab in parallel with source fetch so the page
492
- // is already loaded when synthesis starts — saves ~4s of nav time.
493
- let geminiTabPromise = null;
494
- if (depth !== "fast") {
495
- geminiTabPromise = openNewTab("https://gemini.google.com/app").catch(
496
- () => null,
497
- );
498
- }
499
-
500
578
  // Source fetching: default for all "all" searches
501
579
  // Fetch all sources in a single batch (concurrency = source count).
502
- if (depth !== "fast" && out._sources.length > 0) {
580
+ if (shouldFetchSources && out._sources.length > 0) {
503
581
  process.stderr.write("PROGRESS:source-fetch:start\n");
504
582
  const fetchedSources = await fetchMultipleSources(
505
583
  out._sources,
@@ -512,29 +590,41 @@ async function main() {
512
590
  process.stderr.write("PROGRESS:source-fetch:done\n");
513
591
  }
514
592
 
515
- // Synthesize with Gemini for all non-fast modes
516
- if (depth !== "fast") {
593
+ // Optional engine-agnostic synthesis for multi-engine searches.
594
+ // Open the synthesizer tab HERE (after source fetch) instead of
595
+ // pre-opening before source fetch. Pre-opening was fragile: Chrome could
596
+ // be killed during visible recovery or idle-timeout between source fetch
597
+ // and synthesis, leaving a stale tab ID that causes "No target matching prefix".
598
+ if (shouldSynthesize) {
517
599
  process.stderr.write("PROGRESS:synthesis:start\n");
518
600
  process.stderr.write(
519
- "[greedysearch] Synthesizing results with Gemini...\n",
601
+ `[greedysearch] Synthesizing results with ${synthesizer}...\n`,
520
602
  );
603
+ let synthesisTab = null;
521
604
  try {
522
- const geminiTab = (await geminiTabPromise) ?? (await openNewTab());
523
- const synthesis = await synthesizeWithGemini(query, out, {
524
- grounded: depth === "deep",
525
- tabPrefix: geminiTab,
605
+ synthesisTab = await openNewTab(getSynthesisStartUrl(synthesizer));
606
+ const synthesis = await synthesizeResults(query, out, {
607
+ grounded: groundedSynthesis,
608
+ tabPrefix: synthesisTab,
609
+ visible: process.env.GREEDY_SEARCH_VISIBLE === "1",
610
+ synthesizer,
526
611
  });
527
612
  out._synthesis = {
528
613
  ...synthesis,
529
614
  synthesized: true,
530
615
  };
531
- await closeTab(geminiTab);
532
616
  process.stderr.write("PROGRESS:synthesis:done\n");
533
617
  } catch (e) {
534
618
  process.stderr.write(
535
619
  `[greedysearch] Synthesis failed: ${e.message}\n`,
536
620
  );
537
- out._synthesis = { error: e.message, synthesized: false };
621
+ out._synthesis = {
622
+ error: e.message,
623
+ synthesized: false,
624
+ synthesizedBy: synthesizer,
625
+ };
626
+ } finally {
627
+ if (synthesisTab) await closeTab(synthesisTab);
538
628
  }
539
629
  }
540
630
 
@@ -544,12 +634,12 @@ async function main() {
544
634
  out._topSource = await fetchTopSource(top.canonicalUrl || top.url);
545
635
  }
546
636
 
547
- // Always include confidence metrics for non-fast searches
548
- if (depth !== "fast") out._confidence = buildConfidence(out);
637
+ // Include confidence metrics for grounded multi-engine searches.
638
+ if (!legacyFast) out._confidence = buildConfidence(out);
549
639
 
550
640
  writeOutput(out, outFile, {
551
641
  inline,
552
- synthesize: depth !== "fast",
642
+ synthesize: shouldSynthesize,
553
643
  query,
554
644
  });
555
645
  return;
@@ -585,13 +675,31 @@ async function main() {
585
675
  ? "bing"
586
676
  : script.includes("perplexity")
587
677
  ? "perplexity"
588
- : null;
678
+ : script.includes("chatgpt")
679
+ ? "chatgpt"
680
+ : script.includes("semantic-scholar")
681
+ ? "semantic-scholar"
682
+ : script.includes("logically")
683
+ ? "logically"
684
+ : null;
589
685
  const canRetryVisible =
590
686
  recoveryEngine &&
591
687
  process.env.GREEDY_SEARCH_VISIBLE !== "1" &&
592
688
  isHeadlessBlockedResult(e);
593
689
 
594
690
  if (canRetryVisible) {
691
+ logVisibleRecovery({
692
+ scope: "single",
693
+ phase: "start",
694
+ engines: [recoveryEngine],
695
+ reasons: {
696
+ [recoveryEngine]: {
697
+ error: e.message || null,
698
+ envelope: e.envelope || null,
699
+ lastStage: e.lastStage || null,
700
+ },
701
+ },
702
+ });
595
703
  process.stderr.write(
596
704
  `[greedysearch] 🔓 ${recoveryEngine} blocked in headless — retrying visible to establish cookies...\n`,
597
705
  );
@@ -612,12 +720,33 @@ async function main() {
612
720
  null,
613
721
  locale,
614
722
  );
723
+ logVisibleRecovery({
724
+ scope: "single",
725
+ phase: "success",
726
+ engines: [recoveryEngine],
727
+ result: {
728
+ engine: recoveryEngine,
729
+ mode: result._envelope?.mode || null,
730
+ durationMs: result._envelope?.durationMs || null,
731
+ lastStage: result._envelope?.lastStage || null,
732
+ },
733
+ });
615
734
  if (fetchSource && result.sources?.length > 0) {
616
735
  result.topSource = await fetchTopSource(result.sources[0].url);
617
736
  }
618
737
  writeOutput(result, outFile, { inline, synthesize: false, query });
619
738
  return;
620
739
  } catch (retryErr) {
740
+ logVisibleRecovery({
741
+ scope: "single",
742
+ phase: "needs-human",
743
+ engines: [recoveryEngine],
744
+ result: {
745
+ engine: recoveryEngine,
746
+ error: retryErr.message || String(retryErr),
747
+ envelope: retryErr.envelope || null,
748
+ },
749
+ });
621
750
  // Any visible retry failure: keep Chrome open so user can solve Turnstile.
622
751
  // Once solved, cookies are stored in the shared profile for future headless runs.
623
752
  keepVisibleForHuman = true;
@@ -41,6 +41,24 @@ const GLOBAL_VAR = "__bingClipboard";
41
41
  // Bing Copilot-specific helpers
42
42
  // ============================================================================
43
43
 
44
+ async function detectSignInWall(tab) {
45
+ // Language-agnostic: if the chat input is absent but the page hosts
46
+ // known OAuth provider endpoints, we're on the Copilot login wall.
47
+ const code = `(() => {
48
+ if (document.querySelector('#userInput')) return false;
49
+ const links = Array.from(document.querySelectorAll('a[href], button'));
50
+ const hasOAuth = links.some(el => {
51
+ const h = (el.href || el.getAttribute('formaction') || '').toLowerCase();
52
+ return h.includes('login.microsoftonline.com')
53
+ || h.includes('appleid.apple.com')
54
+ || h.includes('accounts.google.com');
55
+ });
56
+ return hasOAuth;
57
+ })()`;
58
+ const result = await cdp(["eval", tab, code]).catch(() => "false");
59
+ return result === "true";
60
+ }
61
+
44
62
  async function extractAnswer(tab, env, query = "") {
45
63
  // In headless mode: snap the accessibility tree before spending ~18s on
46
64
  // clipboard polls. Copilot loads its input fine in headless but renders
@@ -181,10 +199,15 @@ async function extractFromAccessibilityTree(tab, query = "") {
181
199
  const snap = await cdp(["snap", tab]).catch(() => "");
182
200
  if (!snap || (await detectVerificationChallenge(tab, cdp))) return "";
183
201
 
184
- const articleLines = snap
185
- .split("\n")
186
- .map((line) => line.match(/^\s*\[article\]\s+(.+)$/i)?.[1])
187
- .filter(Boolean);
202
+ // Linear article extraction — no regex. Avoids the ReDoS-prone
203
+ // /^\s*\[article\]\s+(.+)$/i pattern (SonarCloud hotspot js:S5852).
204
+ const articleLines = [];
205
+ for (const line of snap.split("\n")) {
206
+ const trimmed = line.trimStart();
207
+ if (!trimmed.toLowerCase().startsWith("[article]")) continue;
208
+ const after = trimmed.slice("[article]".length).trimStart();
209
+ if (after) articleLines.push(after);
210
+ }
188
211
  if (articleLines.length === 0) return "";
189
212
 
190
213
  const answer = pickAnswerArticle(articleLines, query);
@@ -419,12 +442,27 @@ async function main() {
419
442
  }
420
443
  }
421
444
 
445
+ // Detect sign-in wall before burning time waiting for an input that
446
+ // will never appear. Copilot now gates the chat behind Microsoft/Apple/Google
447
+ // login on fresh sessions.
448
+ if (await detectSignInWall(tab)) {
449
+ throw new Error(
450
+ "Copilot requires sign-in — please sign in with Microsoft, Apple, or Google in the visible browser window. Once signed in, cookies persist for future runs.",
451
+ );
452
+ }
453
+
422
454
  // Wait for React app to mount input (up to 15s, longer after verification)
423
455
  const inputReady = await waitForSelector(tab, S.input, 15000, 500);
424
456
  env.inputReady = inputReady;
425
457
  await new Promise((r) => setTimeout(r, jitter(300)));
426
458
 
427
459
  if (!inputReady) {
460
+ // If input still missing, double-check we didn't land on the login wall
461
+ if (await detectSignInWall(tab)) {
462
+ throw new Error(
463
+ "Copilot requires sign-in — please sign in with Microsoft, Apple, or Google in the visible browser window. Once signed in, cookies persist for future runs.",
464
+ );
465
+ }
428
466
  throw new Error(
429
467
  "Copilot input not found — verification may have failed or page is in unexpected state",
430
468
  );