@apmantza/greedysearch-pi 1.8.9 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@
10
10
  // Errors go to stderr only — stdout is always clean JSON for piping.
11
11
 
12
12
  import {
13
+ buildEnvelope,
13
14
  cdp,
14
15
  formatAnswer,
15
16
  getOrOpenTab,
@@ -36,22 +37,39 @@ const GLOBAL_VAR = "__bingClipboard";
36
37
  // Bing Copilot-specific helpers
37
38
  // ============================================================================
38
39
 
39
- async function extractAnswer(tab) {
40
+ async function extractAnswer(tab, env) {
41
+ // In headless mode: snap the accessibility tree before spending ~18s on
42
+ // clipboard polls. Copilot loads its input fine in headless but renders
43
+ // responses behind a Cloudflare-protected iframe — detecting that here
44
+ // fast-fails to the visible retry instead of burning all the poll time.
45
+ if (process.env.GREEDY_SEARCH_HEADLESS === "1") {
46
+ const snap = await cdp(["snap", tab]).catch(() => "");
47
+ if (/cloudflare|challenge|security check/i.test(snap)) {
48
+ console.error("[bing] Cloudflare challenge in snap — fast-failing to visible retry");
49
+ env.blockedBy = "cloudflare";
50
+ throw new Error("Cloudflare challenge detected — headless blocked");
51
+ }
52
+ }
53
+
40
54
  // Wait for the assistant copy button to exist. On fresh Copilot
41
55
  // sessions the answer text can render before the button handler is
42
56
  // fully hydrated. Wait for the button + a small hydration delay.
43
- await waitForCopyButton(tab, S.copyButton, { timeout: 5000 }).catch(
57
+ // 2s is enough the CF snap check above ensures we only reach here
58
+ // on a clean response, where the button appears within ~1s.
59
+ await waitForCopyButton(tab, S.copyButton, { timeout: 2000 }).catch(
44
60
  () => null,
45
61
  );
46
62
  // Give React time to hydrate the click handler on the button
47
63
  await new Promise((r) => setTimeout(r, 800));
48
64
 
49
65
  let answer = await clickCopyAndPollClipboard(tab, 5000);
66
+ let clipboardEmpty = !answer;
50
67
 
51
68
  // Retry once if clipboard is empty (Copilot might be slow to wire the handler)
52
69
  if (!answer) {
53
70
  console.error("[bing] Clipboard empty, retrying copy/poll...");
54
71
  answer = await clickCopyAndPollClipboard(tab, 8000);
72
+ clipboardEmpty = !answer;
55
73
  }
56
74
 
57
75
  // DOM fallback: visible Copilot can render a valid response while the copy
@@ -59,17 +77,21 @@ async function extractAnswer(tab) {
59
77
  // answer from page text before treating this as a headless/iframe block.
60
78
  if (!answer) {
61
79
  answer = await extractFromVisibleDom(tab);
80
+ if (answer) env.fallbackUsed = "visibleDom";
62
81
  }
63
82
 
64
83
  // DOM fallback: if clipboard still empty, extract text directly from response DOM.
65
84
  // This handles headless mode where Copilot renders the AI reply inside nested
66
85
  // iframes (copilot.microsoft.com → copilot.fun → blob:…) and hides the copy button.
67
86
  if (!answer) {
68
- answer = await extractFromIframes(tab);
87
+ const iframeResult = await extractFromIframes(tab, env);
88
+ answer = iframeResult.answer;
89
+ if (answer) env.fallbackUsed = "iframeDom";
69
90
  }
70
91
 
71
92
  if (!answer) throw new Error("Clipboard interceptor returned empty text");
72
93
 
94
+ env.clipboardEmpty = clipboardEmpty;
73
95
  const sources = parseSourcesFromMarkdown(answer);
74
96
  return { answer: answer.trim(), sources };
75
97
  }
@@ -136,7 +158,7 @@ async function extractFromVisibleDom(tab) {
136
158
  * Returns the extracted text or empty string on failure (caller falls through to error
137
159
  * which triggers the visible Chrome auto-retry in search.mjs).
138
160
  */
139
- async function extractFromIframes(mainTab) {
161
+ async function extractFromIframes(mainTab, env) {
140
162
  try {
141
163
  // Check if the AI copy button exists — if it does, we're in visible mode
142
164
  // and clipboard should have worked. This is a different issue.
@@ -145,7 +167,7 @@ async function extractFromIframes(mainTab) {
145
167
  mainTab,
146
168
  `!!document.querySelector('${S.copyButton}')`,
147
169
  ]).catch(() => "false");
148
- if (hasCopyBtn === "true") return ""; // not a headless/iframe issue
170
+ if (hasCopyBtn === "true") return { answer: "" }; // not a headless/iframe issue
149
171
 
150
172
  // Check for Cloudflare challenge in the accessibility tree.
151
173
  // If present, Copilot content is blocked entirely — no DOM extraction possible.
@@ -154,7 +176,8 @@ async function extractFromIframes(mainTab) {
154
176
  console.error(
155
177
  "[bing] Cloudflare challenge detected — content blocked in headless",
156
178
  );
157
- return ""; // Let caller throw → triggers visible auto-retry
179
+ env.blockedBy = "cloudflare";
180
+ return { answer: "" }; // Let caller throw → triggers visible auto-retry
158
181
  }
159
182
 
160
183
  console.error(
@@ -175,7 +198,7 @@ async function extractFromIframes(mainTab) {
175
198
  );
176
199
  if (!funFrame) {
177
200
  console.error("[bing] No copilot.fun iframe target found");
178
- return "";
201
+ return { answer: "" };
179
202
  }
180
203
 
181
204
  // Try to extract from the nested blob iframe (rarely succeeds due to Cloudflare)
@@ -190,7 +213,7 @@ async function extractFromIframes(mainTab) {
190
213
  console.error(
191
214
  `[bing] DOM extraction succeeded (${innerText.length} chars)`,
192
215
  );
193
- return innerText;
216
+ return { answer: innerText };
194
217
  }
195
218
 
196
219
  console.error(
@@ -199,7 +222,7 @@ async function extractFromIframes(mainTab) {
199
222
  } catch (e) {
200
223
  console.error(`[bing] DOM extraction failed: ${e.message}`);
201
224
  }
202
- return "";
225
+ return { answer: "" };
203
226
  }
204
227
 
205
228
  // ============================================================================
@@ -214,6 +237,20 @@ async function main() {
214
237
  validateQuery(args, USAGE);
215
238
 
216
239
  const { query, tabPrefix, short } = parseArgs(args);
240
+ const startTime = Date.now();
241
+ const mode =
242
+ process.env.GREEDY_SEARCH_VISIBLE === "1" ? "visible" : "headless";
243
+
244
+ // Lightweight envelope — no extra CDP calls, just tracks what we already know
245
+ const env = {
246
+ engine: "bing",
247
+ mode,
248
+ clipboardEmpty: null,
249
+ fallbackUsed: null,
250
+ blockedBy: null,
251
+ verificationResult: null,
252
+ inputReady: null,
253
+ };
217
254
 
218
255
  try {
219
256
  // Only refresh page list when creating a fresh tab (no prefix provided)
@@ -240,6 +277,7 @@ async function main() {
240
277
 
241
278
  // Handle verification challenges (Cloudflare Turnstile, Microsoft auth, etc.)
242
279
  const verifyResult = await handleVerification(tab, cdp, 10000);
280
+ env.verificationResult = verifyResult;
243
281
  if (verifyResult === "needs-human") {
244
282
  throw new Error(
245
283
  "Copilot verification required — please solve it manually in the browser window",
@@ -272,6 +310,7 @@ async function main() {
272
310
 
273
311
  // Wait for React app to mount input (up to 15s, longer after verification)
274
312
  const inputReady = await waitForSelector(tab, S.input, 15000, 500);
313
+ env.inputReady = inputReady;
275
314
  await new Promise((r) => setTimeout(r, jitter(300)));
276
315
 
277
316
  if (!inputReady) {
@@ -293,24 +332,42 @@ async function main() {
293
332
  `document.querySelector('${S.input}')?.dispatchEvent(new KeyboardEvent('keydown',{key:'Enter',bubbles:true,keyCode:13})), 'ok'`,
294
333
  ]);
295
334
 
335
+ // Post-submit: Bing's antibot sometimes appears AFTER the query is sent.
336
+ // Fire-and-forget verification check — runs in parallel with stream wait.
337
+ // Zero added latency to the critical path; if it finds and clicks the
338
+ // challenge, the stream unblocks instead of timing out at 60s.
339
+ setTimeout(() => {
340
+ handleVerification(tab, cdp, 10000)
341
+ .then((v) => {
342
+ if (v === "clicked") {
343
+ console.error("[bing] Post-submit verification clicked");
344
+ env.verificationResult = "post-submit-clicked";
345
+ }
346
+ })
347
+ .catch(() => {});
348
+ }, 2000);
349
+
296
350
  // Wait for Bing Copilot's response to finish streaming before extracting.
297
351
  await waitForStreamComplete(tab, { timeout: 60000, minLength: 50 });
298
352
 
299
- const { answer, sources } = await extractAnswer(tab);
353
+ const { answer, sources } = await extractAnswer(tab, env);
300
354
  if (!answer)
301
355
  throw new Error("No answer extracted — Copilot may not have responded");
302
356
 
303
357
  const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
304
358
  () => "",
305
359
  );
360
+ env.durationMs = Date.now() - startTime;
306
361
  outputJson({
307
362
  query,
308
363
  url: finalUrl,
309
364
  answer: formatAnswer(answer, short),
310
365
  sources,
366
+ _envelope: buildEnvelope(env),
311
367
  });
312
368
  } catch (e) {
313
- handleError(e);
369
+ env.durationMs = Date.now() - startTime;
370
+ handleError(e, buildEnvelope(env));
314
371
  }
315
372
  }
316
373
 
@@ -552,10 +552,45 @@ export function outputJson(data) {
552
552
  }
553
553
 
554
554
  /**
555
- * Handle and output error, then exit
555
+ * Build a lightweight result envelope from data already collected during extraction.
556
+ * Zero additional CDP calls — everything here is already known.
557
+ * @param {object} fields
558
+ * @returns {object}
559
+ */
560
+ export function buildEnvelope({
561
+ engine,
562
+ mode = "headless",
563
+ clipboardEmpty = null,
564
+ fallbackUsed = null,
565
+ blockedBy = null,
566
+ verificationResult = null,
567
+ inputReady = null,
568
+ durationMs = null,
569
+ } = {}) {
570
+ return {
571
+ engine,
572
+ mode,
573
+ clipboardEmpty,
574
+ fallbackUsed,
575
+ blockedBy,
576
+ verificationResult,
577
+ inputReady,
578
+ durationMs,
579
+ };
580
+ }
581
+
582
+ /**
583
+ * Handle and output error, then exit.
584
+ * If an envelope is provided, writes it to stdout as JSON so the runner
585
+ * can parse structured diagnostics even on failure.
556
586
  * @param {Error} error - Error to handle
587
+ * @param {object} [envelope] - Optional envelope object
557
588
  */
558
- export function handleError(error) {
589
+ export function handleError(error, envelope = null) {
590
+ if (envelope) {
591
+ const out = JSON.stringify({ _envelope: envelope, error: error.message });
592
+ process.stdout.write(`${out}\n`);
593
+ }
559
594
  process.stderr.write(`Error: ${error.message}\n`);
560
595
  process.exit(1);
561
596
  }