@apmantza/greedysearch-pi 1.8.7 → 1.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,529 +1,561 @@
1
- // extractors/common.mjs — shared utilities for CDP-based extractors
2
- // Extracts common patterns: cdp wrapper, tab management, clipboard interception, source parsing
3
-
4
- import { randomInt } from "node:crypto";
5
- import { spawn } from "node:child_process";
6
- import { dirname, join } from "node:path";
7
- import { fileURLToPath } from "node:url";
8
-
9
- const __dir = dirname(fileURLToPath(import.meta.url));
10
- const CDP = join(__dir, "..", "bin", "cdp.mjs");
11
-
12
- // ============================================================================
13
- // CDP wrapper
14
- // ============================================================================
15
-
16
- /**
17
- * Execute a CDP command through the cdp.mjs CLI
18
- * @param {string[]} args - Command arguments
19
- * @param {number} [timeoutMs=30000] - Timeout in milliseconds
20
- * @returns {Promise<string>} Command output
21
- */
22
- export function cdp(args, timeoutMs = 30000) {
23
- return new Promise((resolve, reject) => {
24
- const proc = spawn(process.execPath, [CDP, ...args], {
25
- stdio: ["ignore", "pipe", "pipe"],
26
- });
27
- let out = "";
28
- let err = "";
29
- proc.stdout.on("data", (d) => (out += d));
30
- proc.stderr.on("data", (d) => (err += d));
31
- const timer = setTimeout(() => {
32
- proc.kill();
33
- reject(new Error(`cdp timeout: ${args[0]}`));
34
- }, timeoutMs);
35
- proc.on("close", (code) => {
36
- clearTimeout(timer);
37
- if (code === 0) resolve(out.trim());
38
- else reject(new Error(err.trim() || `cdp exit ${code}`));
39
- });
40
- });
41
- }
42
-
43
- // ============================================================================
44
- // Tab management
45
- // ============================================================================
46
-
47
- /**
48
- * Get an existing tab by prefix or open a new one
49
- * @param {string|null} tabPrefix - Existing tab prefix, or null to create new
50
- * @returns {Promise<string>} Tab identifier
51
- */
52
- export async function getOrOpenTab(tabPrefix) {
53
- if (tabPrefix) return tabPrefix;
54
- // Always open a fresh tab to avoid SPA navigation issues
55
- const list = await cdp(["list"]);
56
- const anchor = list.split("\n")[0]?.slice(0, 8);
57
- if (!anchor)
58
- throw new Error(
59
- "No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?",
60
- );
61
- const raw = await cdp([
62
- "evalraw",
63
- anchor,
64
- "Target.createTarget",
65
- '{"url":"about:blank"}',
66
- ]);
67
- const { targetId } = JSON.parse(raw);
68
- await cdp(["list"]); // refresh cache
69
- const tid = targetId.slice(0, 8);
70
- // Inject stealth patches for anti-detection coverage (both headless + visible)
71
- injectHeadlessStealth(tid).catch(() => {});
72
- return tid;
73
- }
74
-
75
- // ============================================================================
76
- // Clipboard interception (for extractors that use copy-to-clipboard)
77
- // ============================================================================
78
-
79
- /**
80
- * Inject clipboard interceptor to capture text when copy buttons are clicked.
81
- * Each engine uses a unique global variable to avoid conflicts.
82
- * @param {string} tab - Tab identifier
83
- * @param {string} globalVar - Global variable name (e.g., '__pplxClipboard', '__geminiClipboard')
84
- */
85
- export async function injectClipboardInterceptor(tab, globalVar) {
86
- const code = `
87
- window.${globalVar} = null;
88
- const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
89
- navigator.clipboard.writeText = function(text) {
90
- window.${globalVar} = text;
91
- return _origWriteText(text);
92
- };
93
- const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
94
- navigator.clipboard.write = async function(items) {
95
- try {
96
- for (const item of items) {
97
- if (item.types && item.types.includes('text/plain')) {
98
- const blob = await item.getType('text/plain');
99
- window.${globalVar} = await blob.text();
100
- break;
101
- }
102
- }
103
- } catch(e) {}
104
- return _origWrite(items);
105
- };
106
- `;
107
- await cdp(["eval", tab, code]);
108
- }
109
-
110
- // ============================================================================
111
- // Headless stealth injection
112
- // ============================================================================
113
-
114
- /**
115
- * Inject anti-detection patches into a page in headless mode.
116
- * Based on production patterns from screenshotrun.com.
117
- */
118
- export async function injectHeadlessStealth(tab) {
119
- const code = `
120
- (function() {
121
- // ── Runtime.enable / CDP detection masking ──────────────
122
- try { delete window.__REBROWSER_RUNTIME_ENABLE; } catch(_) {}
123
- try { delete window.__REBROWSER_DEVTOOLS; } catch(_) {}
124
- try { delete window.__nightmare; } catch(_) {}
125
- try { delete window.__phantom; } catch(_) {}
126
- try { delete window.callPhantom; } catch(_) {}
127
- try { delete window._phantom; } catch(_) {}
128
- try { delete window.Buffer; } catch(_) {}
129
-
130
- Object.defineProperty(navigator, 'webdriver', { get: () => false });
131
- Object.defineProperty(navigator, 'plugins', {
132
- get: () => {
133
- var p = [
134
- { name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
135
- { name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' },
136
- { name: 'Native Client', filename: 'internal-nacl-plugin', description: '' },
137
- ];
138
- p.length = 3;
139
- return p;
140
- },
141
- });
142
- Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
143
- if (!window.chrome) {
144
- window.chrome = {
145
- runtime: { connect: () => {}, sendMessage: () => {}, onMessage: { addListener: () => {} } },
146
- loadTimes: () => ({}),
147
- csi: () => ({}),
148
- };
149
- }
150
- var origQuery = navigator.permissions?.query;
151
- if (origQuery) {
152
- navigator.permissions.query = function(params) {
153
- if (params.name === 'notifications') return Promise.resolve({ state: Notification.permission });\n return origQuery(params);
154
- };
155
- }
156
- try {
157
- var getParam = WebGLRenderingContext.prototype.getParameter;
158
- WebGLRenderingContext.prototype.getParameter = function(p) {
159
- if (p === 37445) return 'Intel Inc.';
160
- if (p === 37446) return 'Intel Iris OpenGL Engine';
161
- return getParam.call(this, p);
162
- };
163
- } catch(_) {}
164
- Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 });
165
- Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 });
166
-
167
- // ── Canvas fingerprint noise ─────────────────────────
168
- // Headless rendering engines produce slightly different canvas output
169
- // than headed Chrome. Subtle noise breaks hash-based fingerprinting.
170
- try {
171
- var origFill = CanvasRenderingContext2D.prototype.fillText;
172
- CanvasRenderingContext2D.prototype.fillText = function() {
173
- this.globalAlpha = 1 - (Math.random() * 0.001);
174
- return origFill.apply(this, arguments);
175
- };
176
- } catch(_) {}
177
- try {
178
- var origStroke = CanvasRenderingContext2D.prototype.strokeText;
179
- CanvasRenderingContext2D.prototype.strokeText = function() {
180
- this.globalAlpha = 1 - (Math.random() * 0.001);
181
- return origStroke.apply(this, arguments);
182
- };
183
- } catch(_) {}
184
- try {
185
- var origToDataURL = HTMLCanvasElement.prototype.toDataURL;
186
- HTMLCanvasElement.prototype.toDataURL = function() {
187
- var ctx = this.getContext('2d');
188
- if (ctx) {
189
- // Add 1px noise pixel in corner (invisible but changes hash)
190
- var imgData = ctx.getImageData(0, 0, 1, 1);
191
- if (imgData) imgData.data[0] ^= (Math.random() < 0.5 ? 1 : 0);
192
- ctx.putImageData(imgData, 0, 0);
193
- }
194
- return origToDataURL.apply(this, arguments);
195
- };
196
- } catch(_) {}
197
-
198
- // ── CDP Runtime serialization guard ──────────────────
199
- // Sites detect CDP by putting a getter on Error.prototype.stack
200
- // and checking if console.log triggers it (only happens when
201
- // Runtime domain is enabled). We monkey-patch console methods to
202
- // strip custom getters from arguments before they reach CDP.
203
- try {
204
- var _origLog = console.log, _origError = console.error,
205
- _origWarn = console.warn, _origDebug = console.debug,
206
- _origInfo = console.info;
207
- var _safeArg = function(a) {
208
- if (a instanceof Error) {
209
- try { return new Error(a.message); } catch(_) { return a; }
210
- }
211
- return a;
212
- };
213
- console.log = function() { return _origLog.apply(console, Array.prototype.map.call(arguments, _safeArg)); };
214
- console.error = function() { return _origError.apply(console, Array.prototype.map.call(arguments, _safeArg)); };
215
- console.warn = function() { return _origWarn.apply(console, Array.prototype.map.call(arguments, _safeArg)); };
216
- console.debug = function() { return _origDebug.apply(console, Array.prototype.map.call(arguments, _safeArg)); };
217
- console.info = function() { return _origInfo.apply(console, Array.prototype.map.call(arguments, _safeArg)); };
218
- } catch(_) {}
219
- })();
220
- `;
221
- await cdp([
222
- "evalraw",
223
- tab,
224
- "Page.addScriptToEvaluateOnNewDocument",
225
- JSON.stringify({ source: code }),
226
- ]);
227
- }
228
-
229
- // ============================================================================
230
- // Source extraction from markdown
231
- // ============================================================================
232
-
233
- /**
234
- * Parse Markdown links from text to extract sources
235
- * @param {string} text - Text containing Markdown links like [title](url)
236
- * @returns {Array<{title: string, url: string}>} Extracted sources
237
- */
238
- export function parseSourcesFromMarkdown(text) {
239
- return Array.from(text.matchAll(/\[([^\]]+)\]\((https?:\/\/[^\s)]+)\)/g))
240
- .map((m) => ({ title: m[1], url: m[2] }))
241
- .filter((v, i, arr) => arr.findIndex((x) => x.url === v.url) === i)
242
- .slice(0, 10);
243
- }
244
-
245
- // ============================================================================
246
- // Timing constants
247
- // ============================================================================
248
-
249
- export const TIMING = {
250
- postNav: 800, // settle after navigation
251
- postNavSlow: 1200, // settle after slower navigations (Bing, Gemini)
252
- postClick: 300, // settle after a UI click
253
- postType: 300, // settle after typing
254
- inputPoll: 400, // polling interval when waiting for input to appear
255
- copyPoll: 600, // polling interval when waiting for copy button
256
- afterVerify: 1500, // settle after a verification challenge completes
257
- };
258
-
259
- // ============================================================================
260
- // Copy button polling
261
- // ============================================================================
262
-
263
- /**
264
- * Wait for a copy button to appear in the DOM.
265
- * @param {string} tab - Tab identifier
266
- * @param {string} selector - CSS selector for the copy button
267
- * @param {object} [options]
268
- * @param {number} [options.timeout=60000] - Max wait in ms
269
- * @param {Function} [options.onPoll] - Optional async callback on each poll tick (e.g. scroll)
270
- * @returns {Promise<void>}
271
- */
272
- export async function waitForCopyButton(tab, selector, options = {}) {
273
- const { timeout = 60000, onPoll } = options;
274
- const deadline = Date.now() + timeout;
275
- let tick = 0;
276
- while (Date.now() < deadline) {
277
- await new Promise((r) => setTimeout(r, jitter(TIMING.copyPoll)));
278
- if (onPoll) await onPoll(++tick).catch(() => null);
279
- const found = await cdp([
280
- "eval",
281
- tab,
282
- `!!document.querySelector('${selector}')`,
283
- ]).catch(() => "false");
284
- if (found === "true") return;
285
- }
286
- throw new Error(
287
- `Copy button ('${selector}') did not appear within ${timeout}ms`,
288
- );
289
- }
290
-
291
- // ============================================================================
292
- // Timing jitter
293
- // ============================================================================
294
-
295
- /**
296
- * Add ±20% random jitter to a timing value to avoid bot-like regularity.
297
- * Also floors at 50ms minimum to prevent micro-polling.
298
- * @param {number} ms - Base interval in milliseconds
299
- * @returns {number} Jittered interval
300
- */
301
- export function jitter(ms) {
302
- const variance = ms * 0.4;
303
- const offset = randomInt(-Math.floor(variance), Math.floor(variance) + 1);
304
- return Math.max(50, Math.round(ms + offset));
305
- }
306
-
307
- // ============================================================================
308
- // Stream completion detection
309
- // ============================================================================
310
-
311
- /**
312
- * Wait for generation/streaming to complete by monitoring text length stability.
313
- *
314
- * Uses a SINGLE Runtime.evaluate call with awaitPromise: true — the stability
315
- * polling runs entirely inside the browser context, emitting no CDP traffic
316
- * during the wait. This avoids the CDP Runtime serialization detection vector
317
- * that would otherwise fire on every poll tick (~50 evals → 1 eval).
318
- *
319
- * @param {string} tab - Tab identifier
320
- * @param {object} options - Options
321
- * @param {number} [options.timeout=30000] - Maximum wait time in ms
322
- * @param {number} [options.interval=600] - Polling interval in ms (jittered ±20%)
323
- * @param {number} [options.stableRounds=3] - Required stable rounds to consider complete
324
- * @param {string} [options.selector='document.body'] - Element to monitor (default: body)
325
- * @returns {Promise<number>} Final text length
326
- */
327
- export async function waitForStreamComplete(tab, options = {}) {
328
- const {
329
- timeout = 20000,
330
- interval = 600,
331
- stableRounds = 3,
332
- selector = "document.body",
333
- minLength = 0,
334
- } = options;
335
-
336
- // Single self-contained eval — polling runs in the browser, no CDP chatter.
337
- // The promise resolves when stability is reached or timeout expires.
338
- const code = String.raw`
339
- new Promise((resolve, reject) => {
340
- const _deadline = Date.now() + ${timeout};
341
- const _baseInterval = ${interval};
342
- const _stableRounds = ${stableRounds};
343
- const _minLength = ${minLength};
344
- let _lastLen = -1;
345
- let _stableCount = 0;
346
-
347
- function _jitter(ms) {
348
- return Math.max(50, ms + (Math.random() * ms * 0.4 - ms * 0.2));
349
- }
350
-
351
- function _poll() {
352
- try {
353
- // Re-query DOM each tick element may not exist at eval start
354
- const el = ${selector};
355
- const cur = el?.innerText?.length ?? 0;
356
- if (cur >= _minLength) {
357
- if (cur === _lastLen) {
358
- _stableCount++;
359
- if (_stableCount >= _stableRounds) { resolve(cur); return; }
360
- } else {
361
- _lastLen = cur;
362
- _stableCount = 0;
363
- }
364
- }
365
- if (Date.now() < _deadline) {
366
- setTimeout(_poll, _jitter(_baseInterval));
367
- } else {
368
- if (_lastLen >= _minLength) { resolve(_lastLen); }
369
- else { reject(new Error('Generation did not stabilise within ${timeout}ms')); }
370
- }
371
- } catch(e) { reject(e); }
372
- }
373
-
374
- _poll();
375
- })
376
- `;
377
-
378
- // Use eval (which has awaitPromise:true in cdp.mjs) with generous timeout.
379
- // This is ONE Runtime.evaluate call — the polling loop runs in the browser.
380
- const lenStr = await cdp(["eval", tab, code], timeout + 10000);
381
- const currentLen = parseInt(lenStr, 10) || 0;
382
-
383
- if (currentLen >= minLength) return currentLen;
384
- throw new Error(`Generation did not stabilise within ${timeout}ms`);
385
- }
386
-
387
- // ============================================================================
388
- // DOM selector waiting (single eval, no polling)
389
- // ============================================================================
390
-
391
- /**
392
- * Wait for a CSS selector to appear in the DOM using a single self-contained
393
- * eval. The polling loop runs in the browser — zero CDP traffic until done.
394
- *
395
- * @param {string} tab - Tab identifier
396
- * @param {string} selector - CSS selector to wait for
397
- * @param {number} [timeoutMs=15000] - Maximum wait time in ms
398
- * @param {number} [interval=500] - Base polling interval in ms (jittered ±20%)
399
- * @returns {Promise<boolean>} true if selector was found, false on timeout
400
- */
401
- export async function waitForSelector(
402
- tab,
403
- selector,
404
- timeoutMs = 15000,
405
- interval = 500,
406
- ) {
407
- const code = String.raw`
408
- new Promise((resolve) => {
409
- const _deadline = Date.now() + ${timeoutMs};
410
- const _baseInterval = ${interval};
411
-
412
- function _jitter(ms) {
413
- return Math.max(50, ms + (Math.random() * ms * 0.4 - ms * 0.2));
414
- }
415
-
416
- function _poll() {
417
- try {
418
- if (document.querySelector('${selector}')) { resolve(true); return; }
419
- if (Date.now() < _deadline) { setTimeout(_poll, _jitter(_baseInterval)); }
420
- else { resolve(false); }
421
- } catch(_) { resolve(false); }
422
- }
423
-
424
- _poll();
425
- })
426
- `;
427
-
428
- const result = await cdp(["eval", tab, code], timeoutMs + 5000);
429
- return result === "true";
430
- }
431
-
432
- // ============================================================================
433
- // CLI argument parsing
434
- // ============================================================================
435
-
436
- /**
437
- * Prepare args — if --stdin is present, read the query/prompt from stdin
438
- * and replace the --stdin flag with the content. This avoids leaking queries
439
- * and prompts via command-line arguments visible in the process table.
440
- * Call this before parseArgs().
441
- * @param {string[]} args - process.argv.slice(2)
442
- * @returns {Promise<string[]>} modified args with query in place of --stdin
443
- */
444
- export async function prepareArgs(args) {
445
- const stdinIdx = args.indexOf("--stdin");
446
- if (stdinIdx === -1) return args;
447
-
448
- const query = await new Promise((resolve) => {
449
- let data = "";
450
- process.stdin.setEncoding("utf8");
451
- process.stdin.on("data", (chunk) => (data += chunk));
452
- process.stdin.on("end", () => resolve(data.trim()));
453
- });
454
-
455
- // Replace --stdin with the query text (parseArgs will extract it as query)
456
- const modified = [...args];
457
- modified[stdinIdx] = query;
458
- return modified;
459
- }
460
-
461
- /**
462
- * Parse standard extractor CLI arguments
463
- * @param {string[]} args - process.argv.slice(2)
464
- * @returns {{query: string, tabPrefix: string|null, short: boolean, locale: string|null}}
465
- */
466
- export function parseArgs(args) {
467
- const short = args.includes("--short");
468
- let rest = args.filter((a) => a !== "--short");
469
-
470
- const tabFlagIdx = rest.indexOf("--tab");
471
- const tabPrefix = tabFlagIdx === -1 ? null : rest[tabFlagIdx + 1];
472
- if (tabFlagIdx !== -1) {
473
- rest = rest.filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1);
474
- }
475
-
476
- const localeIdx = rest.indexOf("--locale");
477
- const locale = localeIdx === -1 ? null : rest[localeIdx + 1];
478
- if (localeIdx !== -1) {
479
- rest = rest.filter((_, i) => i !== localeIdx && i !== localeIdx + 1);
480
- }
481
-
482
- const query = rest.join(" ");
483
- return { query, tabPrefix, short, locale };
484
- }
485
-
486
- /**
487
- * Validate that a query was provided, show usage and exit if not
488
- * @param {string[]} args - process.argv.slice(2)
489
- * @param {string} usage - Usage string for error message
490
- */
491
- export function validateQuery(args, usage) {
492
- if (!args.length || args[0] === "--help") {
493
- process.stderr.write(usage);
494
- process.exit(1);
495
- }
496
- }
497
-
498
- // ============================================================================
499
- // Output formatting
500
- // ============================================================================
501
-
502
- /**
503
- * Truncate answer if short mode is enabled
504
- * @param {string} answer - Full answer text
505
- * @param {boolean} short - Whether to truncate
506
- * @param {number} [maxLen=300] - Maximum length in short mode
507
- * @returns {string} Formatted answer
508
- */
509
- export function formatAnswer(answer, short, maxLen = 300) {
510
- if (!short || answer.length <= maxLen) return answer;
511
- return `${answer.slice(0, maxLen).replace(/\s+\S*$/, "")}…`;
512
- }
513
-
514
- /**
515
- * Output JSON result to stdout
516
- * @param {object} data - Data to output
517
- */
518
- export function outputJson(data) {
519
- process.stdout.write(`${JSON.stringify(data, null, 2)}\n`);
520
- }
521
-
522
- /**
523
- * Handle and output error, then exit
524
- * @param {Error} error - Error to handle
525
- */
526
- export function handleError(error) {
527
- process.stderr.write(`Error: ${error.message}\n`);
528
- process.exit(1);
529
- }
1
+ // extractors/common.mjs — shared utilities for CDP-based extractors
2
+ // Extracts common patterns: cdp wrapper, tab management, clipboard interception, source parsing
3
+
4
+ import { randomInt } from "node:crypto";
5
+ import { spawn } from "node:child_process";
6
+ import { dirname, join } from "node:path";
7
+ import { fileURLToPath } from "node:url";
8
+
9
+ const __dir = dirname(fileURLToPath(import.meta.url));
10
+ const CDP = join(__dir, "..", "bin", "cdp.mjs");
11
+
12
+ // ============================================================================
13
+ // CDP wrapper
14
+ // ============================================================================
15
+
16
+ /**
17
+ * Execute a CDP command through the cdp.mjs CLI
18
+ * @param {string[]} args - Command arguments
19
+ * @param {number} [timeoutMs=30000] - Timeout in milliseconds
20
+ * @returns {Promise<string>} Command output
21
+ */
22
+ export function cdp(args, timeoutMs = 30000) {
23
+ return new Promise((resolve, reject) => {
24
+ const proc = spawn(process.execPath, [CDP, ...args], {
25
+ stdio: ["ignore", "pipe", "pipe"],
26
+ });
27
+ let out = "";
28
+ let err = "";
29
+ proc.stdout.on("data", (d) => (out += d));
30
+ proc.stderr.on("data", (d) => (err += d));
31
+ const timer = setTimeout(() => {
32
+ proc.kill();
33
+ reject(new Error(`cdp timeout: ${args[0]}`));
34
+ }, timeoutMs);
35
+ proc.on("close", (code) => {
36
+ clearTimeout(timer);
37
+ if (code === 0) resolve(out.trim());
38
+ else reject(new Error(err.trim() || `cdp exit ${code}`));
39
+ });
40
+ });
41
+ }
42
+
43
+ // ============================================================================
44
+ // Tab management
45
+ // ============================================================================
46
+
47
+ /**
48
+ * Get an existing tab by prefix or open a new one
49
+ * @param {string|null} tabPrefix - Existing tab prefix, or null to create new
50
+ * @returns {Promise<string>} Tab identifier
51
+ */
52
+ export async function getOrOpenTab(tabPrefix) {
53
+ if (tabPrefix) return tabPrefix;
54
+ // Always open a fresh tab to avoid SPA navigation issues
55
+ const list = await cdp(["list"]);
56
+ const anchor = list.split("\n")[0]?.slice(0, 8);
57
+ if (!anchor)
58
+ throw new Error(
59
+ "No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?",
60
+ );
61
+ const raw = await cdp([
62
+ "evalraw",
63
+ anchor,
64
+ "Target.createTarget",
65
+ '{"url":"about:blank"}',
66
+ ]);
67
+ const { targetId } = JSON.parse(raw);
68
+ await cdp(["list"]); // refresh cache
69
+ const tid = targetId.slice(0, 8);
70
+ // Inject stealth patches for anti-detection coverage (both headless + visible)
71
+ injectHeadlessStealth(tid).catch(() => {});
72
+ return tid;
73
+ }
74
+
75
+ // ============================================================================
76
+ // Clipboard interception (for extractors that use copy-to-clipboard)
77
+ // ============================================================================
78
+
79
+ /**
80
+ * Inject clipboard interceptor to capture text when copy buttons are clicked.
81
+ * Each engine uses a unique global variable to avoid conflicts.
82
+ * @param {string} tab - Tab identifier
83
+ * @param {string} globalVar - Global variable name (e.g., '__pplxClipboard', '__geminiClipboard')
84
+ */
85
+ export async function injectClipboardInterceptor(tab, globalVar) {
86
+ const code = `
87
+ window.${globalVar} = null;
88
+ const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
89
+ navigator.clipboard.writeText = function(text) {
90
+ window.${globalVar} = text;
91
+ return _origWriteText(text);
92
+ };
93
+ const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
94
+ navigator.clipboard.write = async function(items) {
95
+ try {
96
+ for (const item of items) {
97
+ if (item.types && item.types.includes('text/plain')) {
98
+ const blob = await item.getType('text/plain');
99
+ window.${globalVar} = await blob.text();
100
+ break;
101
+ }
102
+ }
103
+ } catch(e) {}
104
+ return _origWrite(items);
105
+ };
106
+ `;
107
+ await cdp(["eval", tab, code]);
108
+ }
109
+
110
+ // ============================================================================
111
+ // Headless stealth injection
112
+ // ============================================================================
113
+
114
+ /**
115
+ * Inject anti-detection patches into a page in headless mode.
116
+ * Based on production patterns from screenshotrun.com.
117
+ */
118
+ export async function injectHeadlessStealth(tab) {
119
+ const code = `
120
+ (function() {
121
+ // ── Runtime.enable / CDP detection masking ──────────────
122
+ try { delete window.__REBROWSER_RUNTIME_ENABLE; } catch(_) {}
123
+ try { delete window.__REBROWSER_DEVTOOLS; } catch(_) {}
124
+ try { delete window.__nightmare; } catch(_) {}
125
+ try { delete window.__phantom; } catch(_) {}
126
+ try { delete window.callPhantom; } catch(_) {}
127
+ try { delete window._phantom; } catch(_) {}
128
+ try { delete window.Buffer; } catch(_) {}
129
+
130
+ Object.defineProperty(navigator, 'webdriver', { get: () => false });
131
+ Object.defineProperty(navigator, 'plugins', {
132
+ get: () => {
133
+ var p = [
134
+ { name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
135
+ { name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' },
136
+ { name: 'Native Client', filename: 'internal-nacl-plugin', description: '' },
137
+ ];
138
+ p.length = 3;
139
+ return p;
140
+ },
141
+ });
142
+ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
143
+ if (!window.chrome) {
144
+ window.chrome = {
145
+ runtime: { connect: () => {}, sendMessage: () => {}, onMessage: { addListener: () => {} } },
146
+ loadTimes: () => ({}),
147
+ csi: () => ({}),
148
+ };
149
+ }
150
+ var origQuery = navigator.permissions?.query;
151
+ if (origQuery) {
152
+ navigator.permissions.query = function(params) {
153
+ if (params.name === 'notifications') return Promise.resolve({ state: Notification.permission });\n return origQuery(params);
154
+ };
155
+ }
156
+ try {
157
+ var getParam = WebGLRenderingContext.prototype.getParameter;
158
+ WebGLRenderingContext.prototype.getParameter = function(p) {
159
+ if (p === 37445) return 'Intel Inc.';
160
+ if (p === 37446) return 'Intel Iris OpenGL Engine';
161
+ return getParam.call(this, p);
162
+ };
163
+ } catch(_) {}
164
+ Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 });
165
+ Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 });
166
+
167
+ // ── Canvas fingerprint noise ─────────────────────────
168
+ // Headless rendering engines produce slightly different canvas output
169
+ // than headed Chrome. Subtle noise breaks hash-based fingerprinting.
170
+ try {
171
+ var origFill = CanvasRenderingContext2D.prototype.fillText;
172
+ CanvasRenderingContext2D.prototype.fillText = function() {
173
+ this.globalAlpha = 1 - (Math.random() * 0.001);
174
+ return origFill.apply(this, arguments);
175
+ };
176
+ } catch(_) {}
177
+ try {
178
+ var origStroke = CanvasRenderingContext2D.prototype.strokeText;
179
+ CanvasRenderingContext2D.prototype.strokeText = function() {
180
+ this.globalAlpha = 1 - (Math.random() * 0.001);
181
+ return origStroke.apply(this, arguments);
182
+ };
183
+ } catch(_) {}
184
+ try {
185
+ var origToDataURL = HTMLCanvasElement.prototype.toDataURL;
186
+ HTMLCanvasElement.prototype.toDataURL = function() {
187
+ var ctx = this.getContext('2d');
188
+ if (ctx) {
189
+ // Add 1px noise pixel in corner (invisible but changes hash)
190
+ var imgData = ctx.getImageData(0, 0, 1, 1);
191
+ if (imgData) imgData.data[0] ^= (Math.random() < 0.5 ? 1 : 0);
192
+ ctx.putImageData(imgData, 0, 0);
193
+ }
194
+ return origToDataURL.apply(this, arguments);
195
+ };
196
+ } catch(_) {}
197
+
198
+ // ── CDP Runtime serialization guard ──────────────────
199
+ // Sites detect CDP by putting a getter on Error.prototype.stack
200
+ // and checking if console.log triggers it (only happens when
201
+ // Runtime domain is enabled). We monkey-patch console methods to
202
+ // strip custom getters from arguments before they reach CDP.
203
+ try {
204
+ var _origLog = console.log, _origError = console.error,
205
+ _origWarn = console.warn, _origDebug = console.debug,
206
+ _origInfo = console.info;
207
+ var _safeArg = function(a) {
208
+ if (a instanceof Error) {
209
+ try { return new Error(a.message); } catch(_) { return a; }
210
+ }
211
+ return a;
212
+ };
213
+ console.log = function() { return _origLog.apply(console, Array.prototype.map.call(arguments, _safeArg)); };
214
+ console.error = function() { return _origError.apply(console, Array.prototype.map.call(arguments, _safeArg)); };
215
+ console.warn = function() { return _origWarn.apply(console, Array.prototype.map.call(arguments, _safeArg)); };
216
+ console.debug = function() { return _origDebug.apply(console, Array.prototype.map.call(arguments, _safeArg)); };
217
+ console.info = function() { return _origInfo.apply(console, Array.prototype.map.call(arguments, _safeArg)); };
218
+ } catch(_) {}
219
+ })();
220
+ `;
221
+ await cdp([
222
+ "evalraw",
223
+ tab,
224
+ "Page.addScriptToEvaluateOnNewDocument",
225
+ JSON.stringify({ source: code }),
226
+ ]);
227
+ }
228
+
229
+ // ============================================================================
230
+ // Source extraction from markdown
231
+ // ============================================================================
232
+
233
+ /**
234
+ * Parse Markdown links from text to extract sources
235
+ * @param {string} text - Text containing Markdown links like [title](url)
236
+ * @returns {Array<{title: string, url: string}>} Extracted sources
237
+ */
238
+ export function parseSourcesFromMarkdown(text) {
239
+ if (!text) return [];
240
+ const results = [];
241
+ let idx = 0;
242
+ while (idx < text.length && results.length < 10) {
243
+ const openBracket = text.indexOf("[", idx);
244
+ if (openBracket === -1) break;
245
+ const closeBracket = text.indexOf("](", openBracket);
246
+ if (closeBracket === -1) break;
247
+ const openParen = closeBracket + 2;
248
+ // Validate URL prefix and find closing paren
249
+ let closeParen = -1;
250
+ for (let p = openParen; p < text.length; p++) {
251
+ const ch = text[p];
252
+ if (ch === ")") {
253
+ closeParen = p;
254
+ break;
255
+ }
256
+ if (/\s/.test(ch)) break; // whitespace in URL = invalid markdown link
257
+ }
258
+ if (closeParen !== -1) {
259
+ const title = text.slice(openBracket + 1, closeBracket);
260
+ const url = text.slice(openParen, closeParen);
261
+ if (/^https?:\/\//i.test(url) && title) {
262
+ // Deduplicate by URL
263
+ if (!results.some((r) => r.url === url)) {
264
+ results.push({ title, url });
265
+ }
266
+ }
267
+ idx = closeParen + 1;
268
+ } else {
269
+ idx = openBracket + 1;
270
+ }
271
+ }
272
+ return results;
273
+ }
274
+
275
+ // ============================================================================
276
+ // Timing constants
277
+ // ============================================================================
278
+
279
+ export const TIMING = {
280
+ postNav: 800, // settle after navigation
281
+ postNavSlow: 1200, // settle after slower navigations (Bing, Gemini)
282
+ postClick: 300, // settle after a UI click
283
+ postType: 300, // settle after typing
284
+ inputPoll: 400, // polling interval when waiting for input to appear
285
+ copyPoll: 600, // polling interval when waiting for copy button
286
+ afterVerify: 1500, // settle after a verification challenge completes
287
+ };
288
+
289
+ // ============================================================================
290
+ // Copy button polling
291
+ // ============================================================================
292
+
293
+ /**
294
+ * Wait for a copy button to appear in the DOM.
295
+ * @param {string} tab - Tab identifier
296
+ * @param {string} selector - CSS selector for the copy button
297
+ * @param {object} [options]
298
+ * @param {number} [options.timeout=60000] - Max wait in ms
299
+ * @param {Function} [options.onPoll] - Optional async callback on each poll tick (e.g. scroll)
300
+ * @returns {Promise<void>}
301
+ */
302
+ export async function waitForCopyButton(tab, selector, options = {}) {
303
+ const { timeout = 60000, onPoll } = options;
304
+ const deadline = Date.now() + timeout;
305
+ let tick = 0;
306
+ while (Date.now() < deadline) {
307
+ await new Promise((r) => setTimeout(r, jitter(TIMING.copyPoll)));
308
+ if (onPoll) await onPoll(++tick).catch(() => null);
309
+ const found = await cdp([
310
+ "eval",
311
+ tab,
312
+ `!!document.querySelector('${selector}')`,
313
+ ]).catch(() => "false");
314
+ if (found === "true") return;
315
+ }
316
+ throw new Error(
317
+ `Copy button ('${selector}') did not appear within ${timeout}ms`,
318
+ );
319
+ }
320
+
321
+ // ============================================================================
322
+ // Timing jitter
323
+ // ============================================================================
324
+
325
+ /**
326
+ * Add ±20% random jitter to a timing value to avoid bot-like regularity.
327
+ * Also floors at 50ms minimum to prevent micro-polling.
328
+ * @param {number} ms - Base interval in milliseconds
329
+ * @returns {number} Jittered interval
330
+ */
331
+ export function jitter(ms) {
332
+ const variance = ms * 0.4;
333
+ const offset = randomInt(-Math.floor(variance), Math.floor(variance) + 1);
334
+ return Math.max(50, Math.round(ms + offset));
335
+ }
336
+
337
+ // ============================================================================
338
+ // Stream completion detection
339
+ // ============================================================================
340
+
341
+ /**
342
+ * Wait for generation/streaming to complete by monitoring text length stability.
343
+ *
344
+ * Uses a SINGLE Runtime.evaluate call with awaitPromise: true — the stability
345
+ * polling runs entirely inside the browser context, emitting no CDP traffic
346
+ * during the wait. This avoids the CDP Runtime serialization detection vector
347
+ * that would otherwise fire on every poll tick (~50 evals → 1 eval).
348
+ *
349
+ * @param {string} tab - Tab identifier
350
+ * @param {object} options - Options
351
+ * @param {number} [options.timeout=30000] - Maximum wait time in ms
352
+ * @param {number} [options.interval=600] - Polling interval in ms (jittered ±20%)
353
+ * @param {number} [options.stableRounds=3] - Required stable rounds to consider complete
354
+ * @param {string} [options.selector='document.body'] - Element to monitor (default: body)
355
+ * @returns {Promise<number>} Final text length
356
+ */
357
+ export async function waitForStreamComplete(tab, options = {}) {
358
+ const {
359
+ timeout = 20000,
360
+ interval = 600,
361
+ stableRounds = 3,
362
+ selector = "document.body",
363
+ minLength = 0,
364
+ } = options;
365
+
366
+ // Single self-contained eval — polling runs in the browser, no CDP chatter.
367
+ // The promise resolves when stability is reached or timeout expires.
368
+ const code = String.raw`
369
+ new Promise((resolve, reject) => {
370
+ const _deadline = Date.now() + ${timeout};
371
+ const _baseInterval = ${interval};
372
+ const _stableRounds = ${stableRounds};
373
+ const _minLength = ${minLength};
374
+ let _lastLen = -1;
375
+ let _stableCount = 0;
376
+
377
+ function _jitter(ms) {
378
+ return Math.max(50, ms + (Math.random() * ms * 0.4 - ms * 0.2));
379
+ }
380
+
381
+ function _poll() {
382
+ try {
383
+ // Re-query DOM each tick — element may not exist at eval start
384
+ const el = ${selector};
385
+ const cur = el?.innerText?.length ?? 0;
386
+ if (cur >= _minLength) {
387
+ if (cur === _lastLen) {
388
+ _stableCount++;
389
+ if (_stableCount >= _stableRounds) { resolve(cur); return; }
390
+ } else {
391
+ _lastLen = cur;
392
+ _stableCount = 0;
393
+ }
394
+ }
395
+ if (Date.now() < _deadline) {
396
+ setTimeout(_poll, _jitter(_baseInterval));
397
+ } else {
398
+ if (_lastLen >= _minLength) { resolve(_lastLen); }
399
+ else { reject(new Error('Generation did not stabilise within ${timeout}ms')); }
400
+ }
401
+ } catch(e) { reject(e); }
402
+ }
403
+
404
+ _poll();
405
+ })
406
+ `;
407
+
408
+ // Use eval (which has awaitPromise:true in cdp.mjs) with generous timeout.
409
+ // This is ONE Runtime.evaluate call — the polling loop runs in the browser.
410
+ const lenStr = await cdp(["eval", tab, code], timeout + 10000);
411
+ const currentLen = parseInt(lenStr, 10) || 0;
412
+
413
+ if (currentLen >= minLength) return currentLen;
414
+ throw new Error(`Generation did not stabilise within ${timeout}ms`);
415
+ }
416
+
417
+ // ============================================================================
418
+ // DOM selector waiting (single eval, no polling)
419
+ // ============================================================================
420
+
421
+ /**
422
+ * Wait for a CSS selector to appear in the DOM using a single self-contained
423
+ * eval. The polling loop runs in the browser — zero CDP traffic until done.
424
+ *
425
+ * @param {string} tab - Tab identifier
426
+ * @param {string} selector - CSS selector to wait for
427
+ * @param {number} [timeoutMs=15000] - Maximum wait time in ms
428
+ * @param {number} [interval=500] - Base polling interval in ms (jittered ±20%)
429
+ * @returns {Promise<boolean>} true if selector was found, false on timeout
430
+ */
431
+ export async function waitForSelector(
432
+ tab,
433
+ selector,
434
+ timeoutMs = 15000,
435
+ interval = 500,
436
+ ) {
437
+ const code = String.raw`
438
+ new Promise((resolve) => {
439
+ const _deadline = Date.now() + ${timeoutMs};
440
+ const _baseInterval = ${interval};
441
+
442
+ function _jitter(ms) {
443
+ return Math.max(50, ms + (Math.random() * ms * 0.4 - ms * 0.2));
444
+ }
445
+
446
+ function _poll() {
447
+ try {
448
+ if (document.querySelector('${selector}')) { resolve(true); return; }
449
+ if (Date.now() < _deadline) { setTimeout(_poll, _jitter(_baseInterval)); }
450
+ else { resolve(false); }
451
+ } catch(_) { resolve(false); }
452
+ }
453
+
454
+ _poll();
455
+ })
456
+ `;
457
+
458
+ const result = await cdp(["eval", tab, code], timeoutMs + 5000);
459
+ return result === "true";
460
+ }
461
+
462
+ // ============================================================================
463
+ // CLI argument parsing
464
+ // ============================================================================
465
+
466
+ /**
467
+ * Prepare args — if --stdin is present, read the query/prompt from stdin
468
+ * and replace the --stdin flag with the content. This avoids leaking queries
469
+ * and prompts via command-line arguments visible in the process table.
470
+ * Call this before parseArgs().
471
+ * @param {string[]} args - process.argv.slice(2)
472
+ * @returns {Promise<string[]>} modified args with query in place of --stdin
473
+ */
474
+ export async function prepareArgs(args) {
475
+ const stdinIdx = args.indexOf("--stdin");
476
+ if (stdinIdx === -1) return args;
477
+
478
+ const query = await new Promise((resolve) => {
479
+ let data = "";
480
+ process.stdin.setEncoding("utf8");
481
+ process.stdin.on("data", (chunk) => (data += chunk));
482
+ process.stdin.on("end", () => resolve(data.trim()));
483
+ });
484
+
485
+ // Replace --stdin with the query text (parseArgs will extract it as query)
486
+ const modified = [...args];
487
+ modified[stdinIdx] = query;
488
+ return modified;
489
+ }
490
+
491
+ /**
492
+ * Parse standard extractor CLI arguments
493
+ * @param {string[]} args - process.argv.slice(2)
494
+ * @returns {{query: string, tabPrefix: string|null, short: boolean, locale: string|null}}
495
+ */
496
+ export function parseArgs(args) {
497
+ const short = args.includes("--short");
498
+ let rest = args.filter((a) => a !== "--short");
499
+
500
+ const tabFlagIdx = rest.indexOf("--tab");
501
+ const tabPrefix = tabFlagIdx === -1 ? null : rest[tabFlagIdx + 1];
502
+ if (tabFlagIdx !== -1) {
503
+ rest = rest.filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1);
504
+ }
505
+
506
+ const localeIdx = rest.indexOf("--locale");
507
+ const locale = localeIdx === -1 ? null : rest[localeIdx + 1];
508
+ if (localeIdx !== -1) {
509
+ rest = rest.filter((_, i) => i !== localeIdx && i !== localeIdx + 1);
510
+ }
511
+
512
+ const query = rest.join(" ");
513
+ return { query, tabPrefix, short, locale };
514
+ }
515
+
516
+ /**
517
+ * Validate that a query was provided, show usage and exit if not
518
+ * @param {string[]} args - process.argv.slice(2)
519
+ * @param {string} usage - Usage string for error message
520
+ */
521
+ export function validateQuery(args, usage) {
522
+ if (!args.length || args[0] === "--help") {
523
+ process.stderr.write(usage);
524
+ process.exit(1);
525
+ }
526
+ }
527
+
528
+ // ============================================================================
529
+ // Output formatting
530
+ // ============================================================================
531
+
532
+ /**
533
+ * Truncate answer if short mode is enabled
534
+ * @param {string} answer - Full answer text
535
+ * @param {boolean} short - Whether to truncate
536
+ * @param {number} [maxLen=300] - Maximum length in short mode
537
+ * @returns {string} Formatted answer
538
+ */
539
+ export function formatAnswer(answer, short, maxLen = 300) {
540
+ if (!short || answer.length <= maxLen) return answer;
541
+ const truncated = answer.slice(0, maxLen);
542
+ const lastSpace = truncated.lastIndexOf(" ");
543
+ return lastSpace > 0 ? `${truncated.slice(0, lastSpace)}…` : `${truncated}…`;
544
+ }
545
+
546
+ /**
547
+ * Output JSON result to stdout
548
+ * @param {object} data - Data to output
549
+ */
550
+ export function outputJson(data) {
551
+ process.stdout.write(`${JSON.stringify(data, null, 2)}\n`);
552
+ }
553
+
554
+ /**
555
+ * Handle and output error, then exit
556
+ * @param {Error} error - Error to handle
557
+ */
558
+ export function handleError(error) {
559
+ process.stderr.write(`Error: ${error.message}\n`);
560
+ process.exit(1);
561
+ }