@apmantza/greedysearch-pi 1.8.9 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +503 -446
- package/bin/cdp.mjs +15 -2
- package/bin/search.mjs +679 -668
- package/extractors/bing-copilot.mjs +68 -11
- package/extractors/common.mjs +37 -2
- package/extractors/consent.mjs +388 -294
- package/extractors/gemini.mjs +217 -150
- package/extractors/perplexity.mjs +56 -7
- package/package.json +1 -1
- package/src/search/chrome.mjs +62 -1
- package/src/search/constants.mjs +1 -6
- package/src/search/engines.mjs +76 -67
- package/src/search/file-sources.mjs +46 -0
- package/src/search/query.mjs +49 -0
- package/src/search/recovery.mjs +20 -1
- package/src/search/sources.mjs +37 -21
- package/src/search/synthesis.mjs +27 -16
- package/extractors/bing-aria.mjs +0 -539
- package/extractors/google-search.mjs +0 -234
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
// Errors go to stderr only — stdout is always clean JSON for piping.
|
|
11
11
|
|
|
12
12
|
import {
|
|
13
|
+
buildEnvelope,
|
|
13
14
|
cdp,
|
|
14
15
|
formatAnswer,
|
|
15
16
|
getOrOpenTab,
|
|
@@ -36,22 +37,39 @@ const GLOBAL_VAR = "__bingClipboard";
|
|
|
36
37
|
// Bing Copilot-specific helpers
|
|
37
38
|
// ============================================================================
|
|
38
39
|
|
|
39
|
-
async function extractAnswer(tab) {
|
|
40
|
+
async function extractAnswer(tab, env) {
|
|
41
|
+
// In headless mode: snap the accessibility tree before spending ~18s on
|
|
42
|
+
// clipboard polls. Copilot loads its input fine in headless but renders
|
|
43
|
+
// responses behind a Cloudflare-protected iframe — detecting that here
|
|
44
|
+
// fast-fails to the visible retry instead of burning all the poll time.
|
|
45
|
+
if (process.env.GREEDY_SEARCH_HEADLESS === "1") {
|
|
46
|
+
const snap = await cdp(["snap", tab]).catch(() => "");
|
|
47
|
+
if (/cloudflare|challenge|security check/i.test(snap)) {
|
|
48
|
+
console.error("[bing] Cloudflare challenge in snap — fast-failing to visible retry");
|
|
49
|
+
env.blockedBy = "cloudflare";
|
|
50
|
+
throw new Error("Cloudflare challenge detected — headless blocked");
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
40
54
|
// Wait for the assistant copy button to exist. On fresh Copilot
|
|
41
55
|
// sessions the answer text can render before the button handler is
|
|
42
56
|
// fully hydrated. Wait for the button + a small hydration delay.
|
|
43
|
-
|
|
57
|
+
// 2s is enough — the CF snap check above ensures we only reach here
|
|
58
|
+
// on a clean response, where the button appears within ~1s.
|
|
59
|
+
await waitForCopyButton(tab, S.copyButton, { timeout: 2000 }).catch(
|
|
44
60
|
() => null,
|
|
45
61
|
);
|
|
46
62
|
// Give React time to hydrate the click handler on the button
|
|
47
63
|
await new Promise((r) => setTimeout(r, 800));
|
|
48
64
|
|
|
49
65
|
let answer = await clickCopyAndPollClipboard(tab, 5000);
|
|
66
|
+
let clipboardEmpty = !answer;
|
|
50
67
|
|
|
51
68
|
// Retry once if clipboard is empty (Copilot might be slow to wire the handler)
|
|
52
69
|
if (!answer) {
|
|
53
70
|
console.error("[bing] Clipboard empty, retrying copy/poll...");
|
|
54
71
|
answer = await clickCopyAndPollClipboard(tab, 8000);
|
|
72
|
+
clipboardEmpty = !answer;
|
|
55
73
|
}
|
|
56
74
|
|
|
57
75
|
// DOM fallback: visible Copilot can render a valid response while the copy
|
|
@@ -59,17 +77,21 @@ async function extractAnswer(tab) {
|
|
|
59
77
|
// answer from page text before treating this as a headless/iframe block.
|
|
60
78
|
if (!answer) {
|
|
61
79
|
answer = await extractFromVisibleDom(tab);
|
|
80
|
+
if (answer) env.fallbackUsed = "visibleDom";
|
|
62
81
|
}
|
|
63
82
|
|
|
64
83
|
// DOM fallback: if clipboard still empty, extract text directly from response DOM.
|
|
65
84
|
// This handles headless mode where Copilot renders the AI reply inside nested
|
|
66
85
|
// iframes (copilot.microsoft.com → copilot.fun → blob:…) and hides the copy button.
|
|
67
86
|
if (!answer) {
|
|
68
|
-
|
|
87
|
+
const iframeResult = await extractFromIframes(tab, env);
|
|
88
|
+
answer = iframeResult.answer;
|
|
89
|
+
if (answer) env.fallbackUsed = "iframeDom";
|
|
69
90
|
}
|
|
70
91
|
|
|
71
92
|
if (!answer) throw new Error("Clipboard interceptor returned empty text");
|
|
72
93
|
|
|
94
|
+
env.clipboardEmpty = clipboardEmpty;
|
|
73
95
|
const sources = parseSourcesFromMarkdown(answer);
|
|
74
96
|
return { answer: answer.trim(), sources };
|
|
75
97
|
}
|
|
@@ -136,7 +158,7 @@ async function extractFromVisibleDom(tab) {
|
|
|
136
158
|
* Returns the extracted text or empty string on failure (caller falls through to error
|
|
137
159
|
* which triggers the visible Chrome auto-retry in search.mjs).
|
|
138
160
|
*/
|
|
139
|
-
async function extractFromIframes(mainTab) {
|
|
161
|
+
async function extractFromIframes(mainTab, env) {
|
|
140
162
|
try {
|
|
141
163
|
// Check if the AI copy button exists — if it does, we're in visible mode
|
|
142
164
|
// and clipboard should have worked. This is a different issue.
|
|
@@ -145,7 +167,7 @@ async function extractFromIframes(mainTab) {
|
|
|
145
167
|
mainTab,
|
|
146
168
|
`!!document.querySelector('${S.copyButton}')`,
|
|
147
169
|
]).catch(() => "false");
|
|
148
|
-
if (hasCopyBtn === "true") return ""; // not a headless/iframe issue
|
|
170
|
+
if (hasCopyBtn === "true") return { answer: "" }; // not a headless/iframe issue
|
|
149
171
|
|
|
150
172
|
// Check for Cloudflare challenge in the accessibility tree.
|
|
151
173
|
// If present, Copilot content is blocked entirely — no DOM extraction possible.
|
|
@@ -154,7 +176,8 @@ async function extractFromIframes(mainTab) {
|
|
|
154
176
|
console.error(
|
|
155
177
|
"[bing] Cloudflare challenge detected — content blocked in headless",
|
|
156
178
|
);
|
|
157
|
-
|
|
179
|
+
env.blockedBy = "cloudflare";
|
|
180
|
+
return { answer: "" }; // Let caller throw → triggers visible auto-retry
|
|
158
181
|
}
|
|
159
182
|
|
|
160
183
|
console.error(
|
|
@@ -175,7 +198,7 @@ async function extractFromIframes(mainTab) {
|
|
|
175
198
|
);
|
|
176
199
|
if (!funFrame) {
|
|
177
200
|
console.error("[bing] No copilot.fun iframe target found");
|
|
178
|
-
return "";
|
|
201
|
+
return { answer: "" };
|
|
179
202
|
}
|
|
180
203
|
|
|
181
204
|
// Try to extract from the nested blob iframe (rarely succeeds due to Cloudflare)
|
|
@@ -190,7 +213,7 @@ async function extractFromIframes(mainTab) {
|
|
|
190
213
|
console.error(
|
|
191
214
|
`[bing] DOM extraction succeeded (${innerText.length} chars)`,
|
|
192
215
|
);
|
|
193
|
-
return innerText;
|
|
216
|
+
return { answer: innerText };
|
|
194
217
|
}
|
|
195
218
|
|
|
196
219
|
console.error(
|
|
@@ -199,7 +222,7 @@ async function extractFromIframes(mainTab) {
|
|
|
199
222
|
} catch (e) {
|
|
200
223
|
console.error(`[bing] DOM extraction failed: ${e.message}`);
|
|
201
224
|
}
|
|
202
|
-
return "";
|
|
225
|
+
return { answer: "" };
|
|
203
226
|
}
|
|
204
227
|
|
|
205
228
|
// ============================================================================
|
|
@@ -214,6 +237,20 @@ async function main() {
|
|
|
214
237
|
validateQuery(args, USAGE);
|
|
215
238
|
|
|
216
239
|
const { query, tabPrefix, short } = parseArgs(args);
|
|
240
|
+
const startTime = Date.now();
|
|
241
|
+
const mode =
|
|
242
|
+
process.env.GREEDY_SEARCH_VISIBLE === "1" ? "visible" : "headless";
|
|
243
|
+
|
|
244
|
+
// Lightweight envelope — no extra CDP calls, just tracks what we already know
|
|
245
|
+
const env = {
|
|
246
|
+
engine: "bing",
|
|
247
|
+
mode,
|
|
248
|
+
clipboardEmpty: null,
|
|
249
|
+
fallbackUsed: null,
|
|
250
|
+
blockedBy: null,
|
|
251
|
+
verificationResult: null,
|
|
252
|
+
inputReady: null,
|
|
253
|
+
};
|
|
217
254
|
|
|
218
255
|
try {
|
|
219
256
|
// Only refresh page list when creating a fresh tab (no prefix provided)
|
|
@@ -240,6 +277,7 @@ async function main() {
|
|
|
240
277
|
|
|
241
278
|
// Handle verification challenges (Cloudflare Turnstile, Microsoft auth, etc.)
|
|
242
279
|
const verifyResult = await handleVerification(tab, cdp, 10000);
|
|
280
|
+
env.verificationResult = verifyResult;
|
|
243
281
|
if (verifyResult === "needs-human") {
|
|
244
282
|
throw new Error(
|
|
245
283
|
"Copilot verification required — please solve it manually in the browser window",
|
|
@@ -272,6 +310,7 @@ async function main() {
|
|
|
272
310
|
|
|
273
311
|
// Wait for React app to mount input (up to 15s, longer after verification)
|
|
274
312
|
const inputReady = await waitForSelector(tab, S.input, 15000, 500);
|
|
313
|
+
env.inputReady = inputReady;
|
|
275
314
|
await new Promise((r) => setTimeout(r, jitter(300)));
|
|
276
315
|
|
|
277
316
|
if (!inputReady) {
|
|
@@ -293,24 +332,42 @@ async function main() {
|
|
|
293
332
|
`document.querySelector('${S.input}')?.dispatchEvent(new KeyboardEvent('keydown',{key:'Enter',bubbles:true,keyCode:13})), 'ok'`,
|
|
294
333
|
]);
|
|
295
334
|
|
|
335
|
+
// Post-submit: Bing's antibot sometimes appears AFTER the query is sent.
|
|
336
|
+
// Fire-and-forget verification check — runs in parallel with stream wait.
|
|
337
|
+
// Zero added latency to the critical path; if it finds and clicks the
|
|
338
|
+
// challenge, the stream unblocks instead of timing out at 60s.
|
|
339
|
+
setTimeout(() => {
|
|
340
|
+
handleVerification(tab, cdp, 10000)
|
|
341
|
+
.then((v) => {
|
|
342
|
+
if (v === "clicked") {
|
|
343
|
+
console.error("[bing] Post-submit verification clicked");
|
|
344
|
+
env.verificationResult = "post-submit-clicked";
|
|
345
|
+
}
|
|
346
|
+
})
|
|
347
|
+
.catch(() => {});
|
|
348
|
+
}, 2000);
|
|
349
|
+
|
|
296
350
|
// Wait for Bing Copilot's response to finish streaming before extracting.
|
|
297
351
|
await waitForStreamComplete(tab, { timeout: 60000, minLength: 50 });
|
|
298
352
|
|
|
299
|
-
const { answer, sources } = await extractAnswer(tab);
|
|
353
|
+
const { answer, sources } = await extractAnswer(tab, env);
|
|
300
354
|
if (!answer)
|
|
301
355
|
throw new Error("No answer extracted — Copilot may not have responded");
|
|
302
356
|
|
|
303
357
|
const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
|
|
304
358
|
() => "",
|
|
305
359
|
);
|
|
360
|
+
env.durationMs = Date.now() - startTime;
|
|
306
361
|
outputJson({
|
|
307
362
|
query,
|
|
308
363
|
url: finalUrl,
|
|
309
364
|
answer: formatAnswer(answer, short),
|
|
310
365
|
sources,
|
|
366
|
+
_envelope: buildEnvelope(env),
|
|
311
367
|
});
|
|
312
368
|
} catch (e) {
|
|
313
|
-
|
|
369
|
+
env.durationMs = Date.now() - startTime;
|
|
370
|
+
handleError(e, buildEnvelope(env));
|
|
314
371
|
}
|
|
315
372
|
}
|
|
316
373
|
|
package/extractors/common.mjs
CHANGED
|
@@ -552,10 +552,45 @@ export function outputJson(data) {
|
|
|
552
552
|
}
|
|
553
553
|
|
|
554
554
|
/**
|
|
555
|
-
*
|
|
555
|
+
* Build a lightweight result envelope from data already collected during extraction.
|
|
556
|
+
* Zero additional CDP calls — everything here is already known.
|
|
557
|
+
* @param {object} fields
|
|
558
|
+
* @returns {object}
|
|
559
|
+
*/
|
|
560
|
+
export function buildEnvelope({
|
|
561
|
+
engine,
|
|
562
|
+
mode = "headless",
|
|
563
|
+
clipboardEmpty = null,
|
|
564
|
+
fallbackUsed = null,
|
|
565
|
+
blockedBy = null,
|
|
566
|
+
verificationResult = null,
|
|
567
|
+
inputReady = null,
|
|
568
|
+
durationMs = null,
|
|
569
|
+
} = {}) {
|
|
570
|
+
return {
|
|
571
|
+
engine,
|
|
572
|
+
mode,
|
|
573
|
+
clipboardEmpty,
|
|
574
|
+
fallbackUsed,
|
|
575
|
+
blockedBy,
|
|
576
|
+
verificationResult,
|
|
577
|
+
inputReady,
|
|
578
|
+
durationMs,
|
|
579
|
+
};
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
/**
|
|
583
|
+
* Handle and output error, then exit.
|
|
584
|
+
* If an envelope is provided, writes it to stdout as JSON so the runner
|
|
585
|
+
* can parse structured diagnostics even on failure.
|
|
556
586
|
* @param {Error} error - Error to handle
|
|
587
|
+
* @param {object} [envelope] - Optional envelope object
|
|
557
588
|
*/
|
|
558
|
-
export function handleError(error) {
|
|
589
|
+
export function handleError(error, envelope = null) {
|
|
590
|
+
if (envelope) {
|
|
591
|
+
const out = JSON.stringify({ _envelope: envelope, error: error.message });
|
|
592
|
+
process.stdout.write(`${out}\n`);
|
|
593
|
+
}
|
|
559
594
|
process.stderr.write(`Error: ${error.message}\n`);
|
|
560
595
|
process.exit(1);
|
|
561
596
|
}
|