@apmantza/greedysearch-pi 1.9.1 → 1.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -13
- package/README.md +11 -1
- package/bin/launch.mjs +2 -0
- package/bin/search.mjs +757 -674
- package/extractors/bing-copilot.mjs +490 -374
- package/extractors/common.mjs +703 -645
- package/extractors/consent.mjs +421 -388
- package/index.ts +2 -1
- package/package.json +8 -4
- package/skills/greedy-search/skill.md +5 -14
- package/src/search/research.mjs +1581 -0
- package/src/search/sources.mjs +26 -4
- package/src/search/synthesis-runner.mjs +52 -46
- package/src/tools/greedy-search-handler.ts +85 -13
- package/test.mjs +971 -534
package/extractors/common.mjs
CHANGED
|
@@ -1,645 +1,703 @@
|
|
|
1
|
-
// extractors/common.mjs — shared utilities for CDP-based extractors
|
|
2
|
-
// Extracts common patterns: cdp wrapper, tab management, clipboard interception, source parsing
|
|
3
|
-
|
|
4
|
-
import { randomInt } from "node:crypto";
|
|
5
|
-
import { spawn } from "node:child_process";
|
|
6
|
-
import { dirname, join } from "node:path";
|
|
7
|
-
import { fileURLToPath } from "node:url";
|
|
8
|
-
|
|
9
|
-
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
10
|
-
const CDP = join(__dir, "..", "bin", "cdp.mjs");
|
|
11
|
-
|
|
12
|
-
// ============================================================================
|
|
13
|
-
// CDP wrapper
|
|
14
|
-
// ============================================================================
|
|
15
|
-
|
|
16
|
-
/**
|
|
17
|
-
* Execute a CDP command through the cdp.mjs CLI
|
|
18
|
-
* @param {string[]} args - Command arguments
|
|
19
|
-
* @param {number} [timeoutMs=30000] - Timeout in milliseconds
|
|
20
|
-
* @returns {Promise<string>} Command output
|
|
21
|
-
*/
|
|
22
|
-
export function cdp(args, timeoutMs = 30000) {
|
|
23
|
-
return new Promise((resolve, reject) => {
|
|
24
|
-
const proc = spawn(process.execPath, [CDP, ...args], {
|
|
25
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
26
|
-
});
|
|
27
|
-
let out = "";
|
|
28
|
-
let err = "";
|
|
29
|
-
proc.stdout.on("data", (d) => (out += d));
|
|
30
|
-
proc.stderr.on("data", (d) => (err += d));
|
|
31
|
-
const timer = setTimeout(() => {
|
|
32
|
-
proc.kill();
|
|
33
|
-
reject(new Error(`cdp timeout: ${args[0]}`));
|
|
34
|
-
}, timeoutMs);
|
|
35
|
-
proc.on("close", (code) => {
|
|
36
|
-
clearTimeout(timer);
|
|
37
|
-
if (code === 0) resolve(out.trim());
|
|
38
|
-
else reject(new Error(err.trim() || `cdp exit ${code}`));
|
|
39
|
-
});
|
|
40
|
-
});
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
// ============================================================================
|
|
44
|
-
// Tab management
|
|
45
|
-
// ============================================================================
|
|
46
|
-
|
|
47
|
-
/**
|
|
48
|
-
* Get an existing tab by prefix or open a new one
|
|
49
|
-
* @param {string|null} tabPrefix - Existing tab prefix, or null to create new
|
|
50
|
-
* @returns {Promise<string>} Tab identifier
|
|
51
|
-
*/
|
|
52
|
-
export async function getOrOpenTab(tabPrefix) {
|
|
53
|
-
if (tabPrefix) return tabPrefix;
|
|
54
|
-
// Always open a fresh tab to avoid SPA navigation issues
|
|
55
|
-
const list = await cdp(["list"]);
|
|
56
|
-
const anchor = list.split("\n")[0]?.slice(0, 8);
|
|
57
|
-
if (!anchor)
|
|
58
|
-
throw new Error(
|
|
59
|
-
"No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?",
|
|
60
|
-
);
|
|
61
|
-
const raw = await cdp([
|
|
62
|
-
"evalraw",
|
|
63
|
-
anchor,
|
|
64
|
-
"Target.createTarget",
|
|
65
|
-
'{"url":"about:blank"}',
|
|
66
|
-
]);
|
|
67
|
-
const { targetId } = JSON.parse(raw);
|
|
68
|
-
await cdp(["list"]); // refresh cache
|
|
69
|
-
const tid = targetId.slice(0, 8);
|
|
70
|
-
// Inject stealth patches for anti-detection coverage (both headless + visible)
|
|
71
|
-
injectHeadlessStealth(tid).catch(() => {});
|
|
72
|
-
return tid;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
// ============================================================================
|
|
76
|
-
// Clipboard interception (for extractors that use copy-to-clipboard)
|
|
77
|
-
// ============================================================================
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* Inject clipboard interceptor to capture text when copy buttons are clicked.
|
|
81
|
-
* Each engine uses a unique global variable to avoid conflicts.
|
|
82
|
-
* @param {string} tab - Tab identifier
|
|
83
|
-
* @param {string} globalVar - Global variable name (e.g., '__pplxClipboard', '__geminiClipboard')
|
|
84
|
-
*/
|
|
85
|
-
export async function injectClipboardInterceptor(tab, globalVar) {
|
|
86
|
-
const code = `
|
|
87
|
-
window.${globalVar} = null;
|
|
88
|
-
const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
|
|
89
|
-
navigator.clipboard.writeText = function(text) {
|
|
90
|
-
window.${globalVar} = text;
|
|
91
|
-
return _origWriteText(text);
|
|
92
|
-
};
|
|
93
|
-
const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
|
|
94
|
-
navigator.clipboard.write = async function(items) {
|
|
95
|
-
try {
|
|
96
|
-
for (const item of items) {
|
|
97
|
-
if (item.types && item.types.includes('text/plain')) {
|
|
98
|
-
const blob = await item.getType('text/plain');
|
|
99
|
-
window.${globalVar} = await blob.text();
|
|
100
|
-
break;
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
} catch(e) {}
|
|
104
|
-
return _origWrite(items);
|
|
105
|
-
};
|
|
106
|
-
`;
|
|
107
|
-
await cdp(["eval", tab, code]);
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
// ============================================================================
|
|
111
|
-
// Headless stealth injection
|
|
112
|
-
// ============================================================================
|
|
113
|
-
|
|
114
|
-
/**
|
|
115
|
-
* Inject anti-detection patches into a page in headless mode.
|
|
116
|
-
* Based on production patterns from screenshotrun.com.
|
|
117
|
-
*/
|
|
118
|
-
export async function injectHeadlessStealth(tab) {
|
|
119
|
-
const code = `
|
|
120
|
-
(function() {
|
|
121
|
-
// ── Runtime.enable / CDP detection masking ──────────────
|
|
122
|
-
try { delete window.__REBROWSER_RUNTIME_ENABLE; } catch(_) {}
|
|
123
|
-
try { delete window.__REBROWSER_DEVTOOLS; } catch(_) {}
|
|
124
|
-
try { delete window.__nightmare; } catch(_) {}
|
|
125
|
-
try { delete window.__phantom; } catch(_) {}
|
|
126
|
-
try { delete window.callPhantom; } catch(_) {}
|
|
127
|
-
try { delete window._phantom; } catch(_) {}
|
|
128
|
-
try { delete window.Buffer; } catch(_) {}
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
//
|
|
213
|
-
//
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
var
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
//
|
|
252
|
-
try {
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
// ============================================================================
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
*
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
"
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
//
|
|
388
|
-
//
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
*
|
|
402
|
-
* @param {
|
|
403
|
-
* @param {string}
|
|
404
|
-
* @
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
)
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
}
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
1
|
+
// extractors/common.mjs — shared utilities for CDP-based extractors
|
|
2
|
+
// Extracts common patterns: cdp wrapper, tab management, clipboard interception, source parsing
|
|
3
|
+
|
|
4
|
+
import { randomInt } from "node:crypto";
|
|
5
|
+
import { spawn } from "node:child_process";
|
|
6
|
+
import { dirname, join } from "node:path";
|
|
7
|
+
import { fileURLToPath } from "node:url";
|
|
8
|
+
|
|
9
|
+
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
10
|
+
const CDP = join(__dir, "..", "bin", "cdp.mjs");
|
|
11
|
+
|
|
12
|
+
// ============================================================================
|
|
13
|
+
// CDP wrapper
|
|
14
|
+
// ============================================================================
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Execute a CDP command through the cdp.mjs CLI
|
|
18
|
+
* @param {string[]} args - Command arguments
|
|
19
|
+
* @param {number} [timeoutMs=30000] - Timeout in milliseconds
|
|
20
|
+
* @returns {Promise<string>} Command output
|
|
21
|
+
*/
|
|
22
|
+
export function cdp(args, timeoutMs = 30000) {
|
|
23
|
+
return new Promise((resolve, reject) => {
|
|
24
|
+
const proc = spawn(process.execPath, [CDP, ...args], {
|
|
25
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
26
|
+
});
|
|
27
|
+
let out = "";
|
|
28
|
+
let err = "";
|
|
29
|
+
proc.stdout.on("data", (d) => (out += d));
|
|
30
|
+
proc.stderr.on("data", (d) => (err += d));
|
|
31
|
+
const timer = setTimeout(() => {
|
|
32
|
+
proc.kill();
|
|
33
|
+
reject(new Error(`cdp timeout: ${args[0]}`));
|
|
34
|
+
}, timeoutMs);
|
|
35
|
+
proc.on("close", (code) => {
|
|
36
|
+
clearTimeout(timer);
|
|
37
|
+
if (code === 0) resolve(out.trim());
|
|
38
|
+
else reject(new Error(err.trim() || `cdp exit ${code}`));
|
|
39
|
+
});
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ============================================================================
|
|
44
|
+
// Tab management
|
|
45
|
+
// ============================================================================
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Get an existing tab by prefix or open a new one
|
|
49
|
+
* @param {string|null} tabPrefix - Existing tab prefix, or null to create new
|
|
50
|
+
* @returns {Promise<string>} Tab identifier
|
|
51
|
+
*/
|
|
52
|
+
export async function getOrOpenTab(tabPrefix) {
|
|
53
|
+
if (tabPrefix) return tabPrefix;
|
|
54
|
+
// Always open a fresh tab to avoid SPA navigation issues
|
|
55
|
+
const list = await cdp(["list"]);
|
|
56
|
+
const anchor = list.split("\n")[0]?.slice(0, 8);
|
|
57
|
+
if (!anchor)
|
|
58
|
+
throw new Error(
|
|
59
|
+
"No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?",
|
|
60
|
+
);
|
|
61
|
+
const raw = await cdp([
|
|
62
|
+
"evalraw",
|
|
63
|
+
anchor,
|
|
64
|
+
"Target.createTarget",
|
|
65
|
+
'{"url":"about:blank"}',
|
|
66
|
+
]);
|
|
67
|
+
const { targetId } = JSON.parse(raw);
|
|
68
|
+
await cdp(["list"]); // refresh cache
|
|
69
|
+
const tid = targetId.slice(0, 8);
|
|
70
|
+
// Inject stealth patches for anti-detection coverage (both headless + visible)
|
|
71
|
+
injectHeadlessStealth(tid).catch(() => {});
|
|
72
|
+
return tid;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ============================================================================
|
|
76
|
+
// Clipboard interception (for extractors that use copy-to-clipboard)
|
|
77
|
+
// ============================================================================
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Inject clipboard interceptor to capture text when copy buttons are clicked.
|
|
81
|
+
* Each engine uses a unique global variable to avoid conflicts.
|
|
82
|
+
* @param {string} tab - Tab identifier
|
|
83
|
+
* @param {string} globalVar - Global variable name (e.g., '__pplxClipboard', '__geminiClipboard')
|
|
84
|
+
*/
|
|
85
|
+
export async function injectClipboardInterceptor(tab, globalVar) {
|
|
86
|
+
const code = `
|
|
87
|
+
window.${globalVar} = null;
|
|
88
|
+
const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
|
|
89
|
+
navigator.clipboard.writeText = function(text) {
|
|
90
|
+
window.${globalVar} = text;
|
|
91
|
+
return _origWriteText(text);
|
|
92
|
+
};
|
|
93
|
+
const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
|
|
94
|
+
navigator.clipboard.write = async function(items) {
|
|
95
|
+
try {
|
|
96
|
+
for (const item of items) {
|
|
97
|
+
if (item.types && item.types.includes('text/plain')) {
|
|
98
|
+
const blob = await item.getType('text/plain');
|
|
99
|
+
window.${globalVar} = await blob.text();
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
} catch(e) {}
|
|
104
|
+
return _origWrite(items);
|
|
105
|
+
};
|
|
106
|
+
`;
|
|
107
|
+
await cdp(["eval", tab, code]);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// ============================================================================
|
|
111
|
+
// Headless stealth injection
|
|
112
|
+
// ============================================================================
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Inject anti-detection patches into a page in headless mode.
|
|
116
|
+
* Based on production patterns from screenshotrun.com.
|
|
117
|
+
*/
|
|
118
|
+
export async function injectHeadlessStealth(tab) {
|
|
119
|
+
const code = `
|
|
120
|
+
(function() {
|
|
121
|
+
// ── Runtime.enable / CDP detection masking ──────────────
|
|
122
|
+
try { delete window.__REBROWSER_RUNTIME_ENABLE; } catch(_) {}
|
|
123
|
+
try { delete window.__REBROWSER_DEVTOOLS; } catch(_) {}
|
|
124
|
+
try { delete window.__nightmare; } catch(_) {}
|
|
125
|
+
try { delete window.__phantom; } catch(_) {}
|
|
126
|
+
try { delete window.callPhantom; } catch(_) {}
|
|
127
|
+
try { delete window._phantom; } catch(_) {}
|
|
128
|
+
try { delete window.Buffer; } catch(_) {}
|
|
129
|
+
|
|
130
|
+
// Real Chrome without automation does not expose a useful webdriver value.
|
|
131
|
+
// A literal false value is itself a common stealth tell; prefer undefined and
|
|
132
|
+
// make the descriptor configurable like native browser properties.
|
|
133
|
+
Object.defineProperty(navigator, 'webdriver', { get: () => undefined, configurable: true });
|
|
134
|
+
Object.defineProperty(navigator, 'vendor', { get: () => 'Google Inc.', configurable: true });
|
|
135
|
+
Object.defineProperty(navigator, 'platform', { get: () => 'Win32', configurable: true });
|
|
136
|
+
Object.defineProperty(navigator, 'maxTouchPoints', { get: () => 0, configurable: true });
|
|
137
|
+
Object.defineProperty(navigator, 'pdfViewerEnabled', { get: () => true, configurable: true });
|
|
138
|
+
Object.defineProperty(navigator, 'plugins', {
|
|
139
|
+
get: () => {
|
|
140
|
+
var p = [
|
|
141
|
+
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
|
142
|
+
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' },
|
|
143
|
+
{ name: 'Native Client', filename: 'internal-nacl-plugin', description: '' },
|
|
144
|
+
];
|
|
145
|
+
p.length = 3;
|
|
146
|
+
return p;
|
|
147
|
+
},
|
|
148
|
+
});
|
|
149
|
+
Object.defineProperty(navigator, 'mimeTypes', {
|
|
150
|
+
get: () => {
|
|
151
|
+
var m = [
|
|
152
|
+
{ type: 'application/pdf', suffixes: 'pdf', description: 'Portable Document Format', enabledPlugin: null },
|
|
153
|
+
{ type: 'text/pdf', suffixes: 'pdf', description: 'Portable Document Format', enabledPlugin: null },
|
|
154
|
+
];
|
|
155
|
+
m.item = function(i) { return m[i] || null; };
|
|
156
|
+
m.namedItem = function(name) { return m.find(function(x) { return x.type === name; }) || null; };
|
|
157
|
+
return m;
|
|
158
|
+
},
|
|
159
|
+
configurable: true,
|
|
160
|
+
});
|
|
161
|
+
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'], configurable: true });
|
|
162
|
+
try {
|
|
163
|
+
Object.defineProperty(navigator, 'connection', { get: () => ({ effectiveType: '4g', rtt: 50, downlink: 10, saveData: false }), configurable: true });
|
|
164
|
+
} catch(_) {}
|
|
165
|
+
if (!navigator.mediaDevices) {
|
|
166
|
+
Object.defineProperty(navigator, 'mediaDevices', {
|
|
167
|
+
get: () => ({
|
|
168
|
+
enumerateDevices: () => Promise.resolve([
|
|
169
|
+
{ deviceId: 'default', kind: 'audioinput', label: '', groupId: 'default' },
|
|
170
|
+
{ deviceId: 'default', kind: 'audiooutput', label: '', groupId: 'default' },
|
|
171
|
+
{ deviceId: '', kind: 'videoinput', label: '', groupId: '' },
|
|
172
|
+
]),
|
|
173
|
+
getUserMedia: () => Promise.reject(new DOMException('NotAllowedError')),
|
|
174
|
+
getDisplayMedia: () => Promise.reject(new DOMException('NotAllowedError')),
|
|
175
|
+
}),
|
|
176
|
+
configurable: true,
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
if (!window.chrome) {
|
|
180
|
+
window.chrome = {
|
|
181
|
+
app: { isInstalled: false, InstallState: {}, RunningState: {} },
|
|
182
|
+
runtime: {
|
|
183
|
+
OnInstalledReason: {}, OnRestartRequiredReason: {}, PlatformArch: {}, PlatformNaclArch: {}, PlatformOs: {}, RequestUpdateCheckStatus: {},
|
|
184
|
+
connect: () => ({}), sendMessage: () => {}, onMessage: { addListener: () => {} }
|
|
185
|
+
},
|
|
186
|
+
loadTimes: () => ({}),
|
|
187
|
+
csi: () => ({}),
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
var __greedyNativeFns = [];
|
|
191
|
+
function __markNative(fn) { try { __greedyNativeFns.push(fn); } catch(_) {} return fn; }
|
|
192
|
+
|
|
193
|
+
var origQuery = navigator.permissions?.query;
|
|
194
|
+
if (origQuery) {
|
|
195
|
+
navigator.permissions.query = __markNative(function query(params) {
|
|
196
|
+
if (params && params.name === 'notifications') return Promise.resolve({ state: Notification.permission || 'default', onchange: null });
|
|
197
|
+
return origQuery.apply(this, arguments);
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
try {
|
|
201
|
+
var getParam = WebGLRenderingContext.prototype.getParameter;
|
|
202
|
+
WebGLRenderingContext.prototype.getParameter = __markNative(function getParameter(p) {
|
|
203
|
+
if (p === 37445) return 'Intel Inc.';
|
|
204
|
+
if (p === 37446) return 'Intel Iris OpenGL Engine';
|
|
205
|
+
return getParam.call(this, p);
|
|
206
|
+
});
|
|
207
|
+
} catch(_) {}
|
|
208
|
+
Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8, configurable: true });
|
|
209
|
+
Object.defineProperty(navigator, 'deviceMemory', { get: () => 8, configurable: true });
|
|
210
|
+
|
|
211
|
+
// ── Canvas fingerprint noise ─────────────────────────
|
|
212
|
+
// Headless rendering engines produce slightly different canvas output
|
|
213
|
+
// than headed Chrome. Subtle noise breaks hash-based fingerprinting.
|
|
214
|
+
try {
|
|
215
|
+
var __canvasNoise = ((Date.now() % 997) + Math.floor(Math.random() * 997)) & 1;
|
|
216
|
+
var origFill = CanvasRenderingContext2D.prototype.fillText;
|
|
217
|
+
CanvasRenderingContext2D.prototype.fillText = __markNative(function fillText() {
|
|
218
|
+
this.globalAlpha = 0.9995;
|
|
219
|
+
return origFill.apply(this, arguments);
|
|
220
|
+
});
|
|
221
|
+
} catch(_) {}
|
|
222
|
+
try {
|
|
223
|
+
var origStroke = CanvasRenderingContext2D.prototype.strokeText;
|
|
224
|
+
CanvasRenderingContext2D.prototype.strokeText = __markNative(function strokeText() {
|
|
225
|
+
this.globalAlpha = 0.9995;
|
|
226
|
+
return origStroke.apply(this, arguments);
|
|
227
|
+
});
|
|
228
|
+
} catch(_) {}
|
|
229
|
+
try {
|
|
230
|
+
var origToDataURL = HTMLCanvasElement.prototype.toDataURL;
|
|
231
|
+
HTMLCanvasElement.prototype.toDataURL = __markNative(function toDataURL() {
|
|
232
|
+
var ctx = this.getContext('2d');
|
|
233
|
+
if (ctx) {
|
|
234
|
+
// Add 1px noise pixel in corner (invisible but changes hash)
|
|
235
|
+
var imgData = ctx.getImageData(0, 0, 1, 1);
|
|
236
|
+
if (imgData) imgData.data[0] ^= __canvasNoise;
|
|
237
|
+
ctx.putImageData(imgData, 0, 0);
|
|
238
|
+
}
|
|
239
|
+
return origToDataURL.apply(this, arguments);
|
|
240
|
+
});
|
|
241
|
+
} catch(_) {}
|
|
242
|
+
|
|
243
|
+
// ── window outer dimensions ──────────────────────────
|
|
244
|
+
// outerWidth/Height = 0 in headless — a well-known bot signal.
|
|
245
|
+
// Mirror innerWidth/Height (set by --window-size flag) so the ratio is sane.
|
|
246
|
+
try {
|
|
247
|
+
if (!window.outerWidth) Object.defineProperty(window, 'outerWidth', { get: () => window.innerWidth || 1920, configurable: true });
|
|
248
|
+
if (!window.outerHeight) Object.defineProperty(window, 'outerHeight', { get: () => window.innerHeight || 1080, configurable: true });
|
|
249
|
+
} catch(_) {}
|
|
250
|
+
|
|
251
|
+
// ── screen properties ─────────────────────────────────
|
|
252
|
+
try {
|
|
253
|
+
if (!screen.colorDepth) Object.defineProperty(screen, 'colorDepth', { get: () => 24, configurable: true });
|
|
254
|
+
if (!screen.pixelDepth) Object.defineProperty(screen, 'pixelDepth', { get: () => 24, configurable: true });
|
|
255
|
+
} catch(_) {}
|
|
256
|
+
|
|
257
|
+
// ── navigator.userAgentData (UA Client Hints) ─────────
|
|
258
|
+
// Derive version from the UA string already set by --user-agent flag so the
|
|
259
|
+
// two APIs are always consistent. Removes any "HeadlessChrome" brand entry.
|
|
260
|
+
try {
|
|
261
|
+
var _uaMajor = (navigator.userAgent.match(new RegExp('Chrome/([0-9]+)')) || [])[1] || '136';
|
|
262
|
+
var _uaFull = (navigator.userAgent.match(new RegExp('Chrome/([0-9.]+)')) || [])[1] || (_uaMajor + '.0.0.0');
|
|
263
|
+
var _brands = [
|
|
264
|
+
{ brand: 'Not)A;Brand', version: '99' },
|
|
265
|
+
{ brand: 'Google Chrome', version: _uaMajor },
|
|
266
|
+
{ brand: 'Chromium', version: _uaMajor },
|
|
267
|
+
];
|
|
268
|
+
Object.defineProperty(navigator, 'userAgentData', {
|
|
269
|
+
get: function() {
|
|
270
|
+
return {
|
|
271
|
+
brands: _brands, mobile: false, platform: 'Windows',
|
|
272
|
+
getHighEntropyValues: function() {
|
|
273
|
+
return Promise.resolve({
|
|
274
|
+
architecture: 'x86', bitness: '64',
|
|
275
|
+
brands: _brands,
|
|
276
|
+
fullVersionList: [
|
|
277
|
+
{ brand: 'Not)A;Brand', version: '99.0.0.0' },
|
|
278
|
+
{ brand: 'Google Chrome', version: _uaFull },
|
|
279
|
+
{ brand: 'Chromium', version: _uaFull },
|
|
280
|
+
],
|
|
281
|
+
mobile: false, model: '', platform: 'Windows',
|
|
282
|
+
platformVersion: '15.0.0', uaFullVersion: _uaFull, wow64: false,
|
|
283
|
+
});
|
|
284
|
+
},
|
|
285
|
+
toJSON: function() { return { brands: _brands, mobile: false, platform: 'Windows' }; },
|
|
286
|
+
};
|
|
287
|
+
},
|
|
288
|
+
configurable: true,
|
|
289
|
+
});
|
|
290
|
+
} catch(_) {}
|
|
291
|
+
|
|
292
|
+
// ── CDP Runtime serialization guard ──────────────────
|
|
293
|
+
// Sites detect CDP by putting a getter on Error.prototype.stack
|
|
294
|
+
// and checking if console.log triggers it (only happens when
|
|
295
|
+
// Runtime domain is enabled). We monkey-patch console methods to
|
|
296
|
+
// strip custom getters from arguments before they reach CDP.
|
|
297
|
+
try {
|
|
298
|
+
var _origLog = console.log, _origError = console.error,
|
|
299
|
+
_origWarn = console.warn, _origDebug = console.debug,
|
|
300
|
+
_origInfo = console.info;
|
|
301
|
+
var _safeArg = function(a) {
|
|
302
|
+
if (a instanceof Error) {
|
|
303
|
+
try { return new Error(a.message); } catch(_) { return a; }
|
|
304
|
+
}
|
|
305
|
+
return a;
|
|
306
|
+
};
|
|
307
|
+
console.log = __markNative(function log() { return _origLog.apply(console, Array.prototype.map.call(arguments, _safeArg)); });
|
|
308
|
+
console.error = __markNative(function error() { return _origError.apply(console, Array.prototype.map.call(arguments, _safeArg)); });
|
|
309
|
+
console.warn = __markNative(function warn() { return _origWarn.apply(console, Array.prototype.map.call(arguments, _safeArg)); });
|
|
310
|
+
console.debug = __markNative(function debug() { return _origDebug.apply(console, Array.prototype.map.call(arguments, _safeArg)); });
|
|
311
|
+
console.info = __markNative(function info() { return _origInfo.apply(console, Array.prototype.map.call(arguments, _safeArg)); });
|
|
312
|
+
} catch(_) {}
|
|
313
|
+
|
|
314
|
+
// ── Native function masking ──────────────────────────
|
|
315
|
+
// Patched APIs should not stringify as user-defined stealth code.
|
|
316
|
+
try {
|
|
317
|
+
var __nativeToString = Function.prototype.toString;
|
|
318
|
+
Function.prototype.toString = function toString() {
|
|
319
|
+
if (__greedyNativeFns.indexOf(this) !== -1) {
|
|
320
|
+
var name = this.name || '';
|
|
321
|
+
return 'function ' + name + '() { [native code] }';
|
|
322
|
+
}
|
|
323
|
+
return __nativeToString.call(this);
|
|
324
|
+
};
|
|
325
|
+
} catch(_) {}
|
|
326
|
+
})();
|
|
327
|
+
`;
|
|
328
|
+
await cdp([
|
|
329
|
+
"evalraw",
|
|
330
|
+
tab,
|
|
331
|
+
"Page.addScriptToEvaluateOnNewDocument",
|
|
332
|
+
JSON.stringify({ source: code }),
|
|
333
|
+
]);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// ============================================================================
|
|
337
|
+
// Source extraction from markdown
|
|
338
|
+
// ============================================================================
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* Parse Markdown links from text to extract sources
|
|
342
|
+
* @param {string} text - Text containing Markdown links like [title](url)
|
|
343
|
+
* @returns {Array<{title: string, url: string}>} Extracted sources
|
|
344
|
+
*/
|
|
345
|
+
export function parseSourcesFromMarkdown(text) {
|
|
346
|
+
if (!text) return [];
|
|
347
|
+
const results = [];
|
|
348
|
+
let idx = 0;
|
|
349
|
+
while (idx < text.length && results.length < 10) {
|
|
350
|
+
const openBracket = text.indexOf("[", idx);
|
|
351
|
+
if (openBracket === -1) break;
|
|
352
|
+
const closeBracket = text.indexOf("](", openBracket);
|
|
353
|
+
if (closeBracket === -1) break;
|
|
354
|
+
const openParen = closeBracket + 2;
|
|
355
|
+
// Validate URL prefix and find closing paren
|
|
356
|
+
let closeParen = -1;
|
|
357
|
+
for (let p = openParen; p < text.length; p++) {
|
|
358
|
+
const ch = text[p];
|
|
359
|
+
if (ch === ")") {
|
|
360
|
+
closeParen = p;
|
|
361
|
+
break;
|
|
362
|
+
}
|
|
363
|
+
if (/\s/.test(ch)) break; // whitespace in URL = invalid markdown link
|
|
364
|
+
}
|
|
365
|
+
if (closeParen !== -1) {
|
|
366
|
+
const title = text.slice(openBracket + 1, closeBracket);
|
|
367
|
+
const url = text.slice(openParen, closeParen);
|
|
368
|
+
if (/^https?:\/\//i.test(url) && title) {
|
|
369
|
+
// Deduplicate by URL
|
|
370
|
+
if (!results.some((r) => r.url === url)) {
|
|
371
|
+
results.push({ title, url });
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
idx = closeParen + 1;
|
|
375
|
+
} else {
|
|
376
|
+
idx = openBracket + 1;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
return results;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// ============================================================================
|
|
383
|
+
// Timing constants
|
|
384
|
+
// ============================================================================
|
|
385
|
+
|
|
386
|
+
export const TIMING = {
|
|
387
|
+
postNav: 800, // settle after navigation
|
|
388
|
+
postNavSlow: 1200, // settle after slower navigations (Bing, Gemini)
|
|
389
|
+
postClick: 300, // settle after a UI click
|
|
390
|
+
postType: 300, // settle after typing
|
|
391
|
+
inputPoll: 400, // polling interval when waiting for input to appear
|
|
392
|
+
copyPoll: 600, // polling interval when waiting for copy button
|
|
393
|
+
afterVerify: 1500, // settle after a verification challenge completes
|
|
394
|
+
};
|
|
395
|
+
|
|
396
|
+
// ============================================================================
|
|
397
|
+
// Copy button polling
|
|
398
|
+
// ============================================================================
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* Wait for a copy button to appear in the DOM.
|
|
402
|
+
* @param {string} tab - Tab identifier
|
|
403
|
+
* @param {string} selector - CSS selector for the copy button
|
|
404
|
+
* @param {object} [options]
|
|
405
|
+
* @param {number} [options.timeout=60000] - Max wait in ms
|
|
406
|
+
* @param {Function} [options.onPoll] - Optional async callback on each poll tick (e.g. scroll)
|
|
407
|
+
* @returns {Promise<void>}
|
|
408
|
+
*/
|
|
409
|
+
export async function waitForCopyButton(tab, selector, options = {}) {
|
|
410
|
+
const { timeout = 60000, onPoll } = options;
|
|
411
|
+
const deadline = Date.now() + timeout;
|
|
412
|
+
let tick = 0;
|
|
413
|
+
while (Date.now() < deadline) {
|
|
414
|
+
await new Promise((r) => setTimeout(r, jitter(TIMING.copyPoll)));
|
|
415
|
+
if (onPoll) await onPoll(++tick).catch(() => null);
|
|
416
|
+
const found = await cdp([
|
|
417
|
+
"eval",
|
|
418
|
+
tab,
|
|
419
|
+
`!!document.querySelector('${selector}')`,
|
|
420
|
+
]).catch(() => "false");
|
|
421
|
+
if (found === "true") return;
|
|
422
|
+
}
|
|
423
|
+
throw new Error(
|
|
424
|
+
`Copy button ('${selector}') did not appear within ${timeout}ms`,
|
|
425
|
+
);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// ============================================================================
|
|
429
|
+
// Timing jitter
|
|
430
|
+
// ============================================================================
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* Add ±20% random jitter to a timing value to avoid bot-like regularity.
|
|
434
|
+
* Also floors at 50ms minimum to prevent micro-polling.
|
|
435
|
+
* @param {number} ms - Base interval in milliseconds
|
|
436
|
+
* @returns {number} Jittered interval
|
|
437
|
+
*/
|
|
438
|
+
export function jitter(ms) {
|
|
439
|
+
const variance = ms * 0.4;
|
|
440
|
+
const offset = randomInt(-Math.floor(variance), Math.floor(variance) + 1);
|
|
441
|
+
return Math.max(50, Math.round(ms + offset));
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// ============================================================================
|
|
445
|
+
// Stream completion detection
|
|
446
|
+
// ============================================================================
|
|
447
|
+
|
|
448
|
+
/**
|
|
449
|
+
* Wait for generation/streaming to complete by monitoring text length stability.
|
|
450
|
+
*
|
|
451
|
+
* Uses a SINGLE Runtime.evaluate call with awaitPromise: true — the stability
|
|
452
|
+
* polling runs entirely inside the browser context, emitting no CDP traffic
|
|
453
|
+
* during the wait. This avoids the CDP Runtime serialization detection vector
|
|
454
|
+
* that would otherwise fire on every poll tick (~50 evals → 1 eval).
|
|
455
|
+
*
|
|
456
|
+
* @param {string} tab - Tab identifier
|
|
457
|
+
* @param {object} options - Options
|
|
458
|
+
* @param {number} [options.timeout=30000] - Maximum wait time in ms
|
|
459
|
+
* @param {number} [options.interval=600] - Polling interval in ms (jittered ±20%)
|
|
460
|
+
* @param {number} [options.stableRounds=3] - Required stable rounds to consider complete
|
|
461
|
+
* @param {string} [options.selector='document.body'] - Element to monitor (default: body)
|
|
462
|
+
* @returns {Promise<number>} Final text length
|
|
463
|
+
*/
|
|
464
|
+
export async function waitForStreamComplete(tab, options = {}) {
|
|
465
|
+
const {
|
|
466
|
+
timeout = 20000,
|
|
467
|
+
interval = 600,
|
|
468
|
+
stableRounds = 3,
|
|
469
|
+
selector = "document.body",
|
|
470
|
+
minLength = 0,
|
|
471
|
+
} = options;
|
|
472
|
+
|
|
473
|
+
// Single self-contained eval — polling runs in the browser, no CDP chatter.
|
|
474
|
+
// The promise resolves when stability is reached or timeout expires.
|
|
475
|
+
const code = String.raw`
|
|
476
|
+
new Promise((resolve, reject) => {
|
|
477
|
+
const _deadline = Date.now() + ${timeout};
|
|
478
|
+
const _baseInterval = ${interval};
|
|
479
|
+
const _stableRounds = ${stableRounds};
|
|
480
|
+
const _minLength = ${minLength};
|
|
481
|
+
let _lastLen = -1;
|
|
482
|
+
let _stableCount = 0;
|
|
483
|
+
|
|
484
|
+
function _jitter(ms) {
|
|
485
|
+
return Math.max(50, ms + (Math.random() * ms * 0.4 - ms * 0.2));
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
function _poll() {
|
|
489
|
+
try {
|
|
490
|
+
// Re-query DOM each tick — element may not exist at eval start
|
|
491
|
+
const el = ${selector};
|
|
492
|
+
const cur = el?.innerText?.length ?? 0;
|
|
493
|
+
if (cur >= _minLength) {
|
|
494
|
+
if (cur === _lastLen) {
|
|
495
|
+
_stableCount++;
|
|
496
|
+
if (_stableCount >= _stableRounds) { resolve(cur); return; }
|
|
497
|
+
} else {
|
|
498
|
+
_lastLen = cur;
|
|
499
|
+
_stableCount = 0;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
if (Date.now() < _deadline) {
|
|
503
|
+
setTimeout(_poll, _jitter(_baseInterval));
|
|
504
|
+
} else {
|
|
505
|
+
if (_lastLen >= _minLength) { resolve(_lastLen); }
|
|
506
|
+
else { reject(new Error('Generation did not stabilise within ${timeout}ms')); }
|
|
507
|
+
}
|
|
508
|
+
} catch(e) { reject(e); }
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
_poll();
|
|
512
|
+
})
|
|
513
|
+
`;
|
|
514
|
+
|
|
515
|
+
// Use eval (which has awaitPromise:true in cdp.mjs) with generous timeout.
|
|
516
|
+
// This is ONE Runtime.evaluate call — the polling loop runs in the browser.
|
|
517
|
+
const lenStr = await cdp(["eval", tab, code], timeout + 10000);
|
|
518
|
+
const currentLen = parseInt(lenStr, 10) || 0;
|
|
519
|
+
|
|
520
|
+
if (currentLen >= minLength) return currentLen;
|
|
521
|
+
throw new Error(`Generation did not stabilise within ${timeout}ms`);
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// ============================================================================
|
|
525
|
+
// DOM selector waiting (single eval, no polling)
|
|
526
|
+
// ============================================================================
|
|
527
|
+
|
|
528
|
+
/**
|
|
529
|
+
* Wait for a CSS selector to appear in the DOM using a single self-contained
|
|
530
|
+
* eval. The polling loop runs in the browser — zero CDP traffic until done.
|
|
531
|
+
*
|
|
532
|
+
* @param {string} tab - Tab identifier
|
|
533
|
+
* @param {string} selector - CSS selector to wait for
|
|
534
|
+
* @param {number} [timeoutMs=15000] - Maximum wait time in ms
|
|
535
|
+
* @param {number} [interval=500] - Base polling interval in ms (jittered ±20%)
|
|
536
|
+
* @returns {Promise<boolean>} true if selector was found, false on timeout
|
|
537
|
+
*/
|
|
538
|
+
export async function waitForSelector(
|
|
539
|
+
tab,
|
|
540
|
+
selector,
|
|
541
|
+
timeoutMs = 15000,
|
|
542
|
+
interval = 500,
|
|
543
|
+
) {
|
|
544
|
+
const code = String.raw`
|
|
545
|
+
new Promise((resolve) => {
|
|
546
|
+
const _deadline = Date.now() + ${timeoutMs};
|
|
547
|
+
const _baseInterval = ${interval};
|
|
548
|
+
|
|
549
|
+
function _jitter(ms) {
|
|
550
|
+
return Math.max(50, ms + (Math.random() * ms * 0.4 - ms * 0.2));
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
function _poll() {
|
|
554
|
+
try {
|
|
555
|
+
if (document.querySelector('${selector}')) { resolve(true); return; }
|
|
556
|
+
if (Date.now() < _deadline) { setTimeout(_poll, _jitter(_baseInterval)); }
|
|
557
|
+
else { resolve(false); }
|
|
558
|
+
} catch(_) { resolve(false); }
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
_poll();
|
|
562
|
+
})
|
|
563
|
+
`;
|
|
564
|
+
|
|
565
|
+
const result = await cdp(["eval", tab, code], timeoutMs + 5000);
|
|
566
|
+
return result === "true";
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// ============================================================================
|
|
570
|
+
// CLI argument parsing
|
|
571
|
+
// ============================================================================
|
|
572
|
+
|
|
573
|
+
/**
|
|
574
|
+
* Prepare args — if --stdin is present, read the query/prompt from stdin
|
|
575
|
+
* and replace the --stdin flag with the content. This avoids leaking queries
|
|
576
|
+
* and prompts via command-line arguments visible in the process table.
|
|
577
|
+
* Call this before parseArgs().
|
|
578
|
+
* @param {string[]} args - process.argv.slice(2)
|
|
579
|
+
* @returns {Promise<string[]>} modified args with query in place of --stdin
|
|
580
|
+
*/
|
|
581
|
+
export async function prepareArgs(args) {
|
|
582
|
+
const stdinIdx = args.indexOf("--stdin");
|
|
583
|
+
if (stdinIdx === -1) return args;
|
|
584
|
+
|
|
585
|
+
const query = await new Promise((resolve) => {
|
|
586
|
+
let data = "";
|
|
587
|
+
process.stdin.setEncoding("utf8");
|
|
588
|
+
process.stdin.on("data", (chunk) => (data += chunk));
|
|
589
|
+
process.stdin.on("end", () => resolve(data.trim()));
|
|
590
|
+
});
|
|
591
|
+
|
|
592
|
+
// Replace --stdin with the query text (parseArgs will extract it as query)
|
|
593
|
+
const modified = [...args];
|
|
594
|
+
modified[stdinIdx] = query;
|
|
595
|
+
return modified;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
/**
|
|
599
|
+
* Parse standard extractor CLI arguments
|
|
600
|
+
* @param {string[]} args - process.argv.slice(2)
|
|
601
|
+
* @returns {{query: string, tabPrefix: string|null, short: boolean, locale: string|null}}
|
|
602
|
+
*/
|
|
603
|
+
export function parseArgs(args) {
|
|
604
|
+
const short = args.includes("--short");
|
|
605
|
+
let rest = args.filter((a) => a !== "--short");
|
|
606
|
+
|
|
607
|
+
const tabFlagIdx = rest.indexOf("--tab");
|
|
608
|
+
const tabPrefix = tabFlagIdx === -1 ? null : rest[tabFlagIdx + 1];
|
|
609
|
+
if (tabFlagIdx !== -1) {
|
|
610
|
+
rest = rest.filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1);
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
const localeIdx = rest.indexOf("--locale");
|
|
614
|
+
const locale = localeIdx === -1 ? null : rest[localeIdx + 1];
|
|
615
|
+
if (localeIdx !== -1) {
|
|
616
|
+
rest = rest.filter((_, i) => i !== localeIdx && i !== localeIdx + 1);
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
const query = rest.join(" ");
|
|
620
|
+
return { query, tabPrefix, short, locale };
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
/**
|
|
624
|
+
* Validate that a query was provided, show usage and exit if not
|
|
625
|
+
* @param {string[]} args - process.argv.slice(2)
|
|
626
|
+
* @param {string} usage - Usage string for error message
|
|
627
|
+
*/
|
|
628
|
+
export function validateQuery(args, usage) {
|
|
629
|
+
if (!args.length || args[0] === "--help") {
|
|
630
|
+
process.stderr.write(usage);
|
|
631
|
+
process.exit(1);
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
// ============================================================================
|
|
636
|
+
// Output formatting
|
|
637
|
+
// ============================================================================
|
|
638
|
+
|
|
639
|
+
/**
|
|
640
|
+
* Truncate answer if short mode is enabled
|
|
641
|
+
* @param {string} answer - Full answer text
|
|
642
|
+
* @param {boolean} short - Whether to truncate
|
|
643
|
+
* @param {number} [maxLen=300] - Maximum length in short mode
|
|
644
|
+
* @returns {string} Formatted answer
|
|
645
|
+
*/
|
|
646
|
+
export function formatAnswer(answer, short, maxLen = 300) {
|
|
647
|
+
if (!short || answer.length <= maxLen) return answer;
|
|
648
|
+
const truncated = answer.slice(0, maxLen);
|
|
649
|
+
const lastSpace = truncated.lastIndexOf(" ");
|
|
650
|
+
return lastSpace > 0 ? `${truncated.slice(0, lastSpace)}…` : `${truncated}…`;
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
/**
|
|
654
|
+
* Output JSON result to stdout
|
|
655
|
+
* @param {object} data - Data to output
|
|
656
|
+
*/
|
|
657
|
+
export function outputJson(data) {
|
|
658
|
+
process.stdout.write(`${JSON.stringify(data, null, 2)}\n`);
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
/**
|
|
662
|
+
* Build a lightweight result envelope from data already collected during extraction.
|
|
663
|
+
* Zero additional CDP calls — everything here is already known.
|
|
664
|
+
* @param {object} fields
|
|
665
|
+
* @returns {object}
|
|
666
|
+
*/
|
|
667
|
+
export function buildEnvelope({
|
|
668
|
+
engine,
|
|
669
|
+
mode = "headless",
|
|
670
|
+
clipboardEmpty = null,
|
|
671
|
+
fallbackUsed = null,
|
|
672
|
+
blockedBy = null,
|
|
673
|
+
verificationResult = null,
|
|
674
|
+
inputReady = null,
|
|
675
|
+
durationMs = null,
|
|
676
|
+
} = {}) {
|
|
677
|
+
return {
|
|
678
|
+
engine,
|
|
679
|
+
mode,
|
|
680
|
+
clipboardEmpty,
|
|
681
|
+
fallbackUsed,
|
|
682
|
+
blockedBy,
|
|
683
|
+
verificationResult,
|
|
684
|
+
inputReady,
|
|
685
|
+
durationMs,
|
|
686
|
+
};
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
/**
|
|
690
|
+
* Handle and output error, then exit.
|
|
691
|
+
* If an envelope is provided, writes it to stdout as JSON so the runner
|
|
692
|
+
* can parse structured diagnostics even on failure.
|
|
693
|
+
* @param {Error} error - Error to handle
|
|
694
|
+
* @param {object} [envelope] - Optional envelope object
|
|
695
|
+
*/
|
|
696
|
+
export function handleError(error, envelope = null) {
|
|
697
|
+
if (envelope) {
|
|
698
|
+
const out = JSON.stringify({ _envelope: envelope, error: error.message });
|
|
699
|
+
process.stdout.write(`${out}\n`);
|
|
700
|
+
}
|
|
701
|
+
process.stderr.write(`Error: ${error.message}\n`);
|
|
702
|
+
process.exit(1);
|
|
703
|
+
}
|