@apmantza/greedysearch-pi 1.9.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +110 -14
- package/README.md +86 -41
- package/bin/cdp.mjs +1153 -1108
- package/bin/launch.mjs +11 -0
- package/bin/search.mjs +886 -674
- package/extractors/bing-copilot.mjs +528 -374
- package/extractors/chatgpt.mjs +436 -0
- package/extractors/common.mjs +837 -645
- package/extractors/consensus.mjs +655 -0
- package/extractors/consent.mjs +421 -388
- package/extractors/gemini.mjs +335 -217
- package/extractors/logically.mjs +567 -0
- package/extractors/selectors.mjs +3 -2
- package/extractors/semantic-scholar.mjs +219 -0
- package/index.ts +2 -1
- package/package.json +14 -6
- package/skills/greedy-search/skill.md +9 -12
- package/src/fetcher.mjs +8 -1
- package/src/formatters/results.ts +163 -128
- package/src/search/browser-lifecycle.mjs +27 -5
- package/src/search/chrome.mjs +653 -590
- package/src/search/constants.mjs +150 -39
- package/src/search/engines.mjs +114 -76
- package/src/search/fetch-source.mjs +566 -451
- package/src/search/pdf.mjs +68 -0
- package/src/search/recovery.mjs +51 -45
- package/src/search/research.mjs +2579 -0
- package/src/search/sources.mjs +77 -25
- package/src/search/synthesis-runner.mjs +142 -57
- package/src/search/synthesis.mjs +286 -246
- package/src/tools/greedy-search-handler.ts +189 -45
- package/src/tools/shared.ts +187 -186
- package/src/types.ts +110 -104
- package/test.mjs +1342 -534
package/bin/search.mjs
CHANGED
|
@@ -1,674 +1,886 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
// search.mjs - unified CLI for GreedySearch extractors
|
|
4
|
-
//
|
|
5
|
-
// Usage:
|
|
6
|
-
// node search.mjs <engine> "<query>"
|
|
7
|
-
// node search.mjs all "<query>"
|
|
8
|
-
//
|
|
9
|
-
// Engines:
|
|
10
|
-
// perplexity | pplx | p
|
|
11
|
-
// bing | copilot | b
|
|
12
|
-
// google | g
|
|
13
|
-
// gemini | gem
|
|
14
|
-
// all - fan-out to all engines in parallel
|
|
15
|
-
//
|
|
16
|
-
// Output: JSON to stdout, errors to stderr
|
|
17
|
-
//
|
|
18
|
-
// Examples:
|
|
19
|
-
// node search.mjs p "what is memoization"
|
|
20
|
-
// node search.mjs gem "latest React features"
|
|
21
|
-
// node search.mjs all "how does TCP congestion control work"
|
|
22
|
-
|
|
23
|
-
import { existsSync, readFileSync } from "node:fs";
|
|
24
|
-
// Config file for user defaults
|
|
25
|
-
import { homedir } from "node:os";
|
|
26
|
-
import { join } from "node:path";
|
|
27
|
-
import {
|
|
28
|
-
cdp,
|
|
29
|
-
closeTab,
|
|
30
|
-
closeTabs,
|
|
31
|
-
ensureChrome,
|
|
32
|
-
killHeadlessChrome,
|
|
33
|
-
openNewTab,
|
|
34
|
-
touchActivity,
|
|
35
|
-
} from "../src/search/chrome.mjs";
|
|
36
|
-
import {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
} from "../src/search/
|
|
42
|
-
import {
|
|
43
|
-
import {
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
} from "../src/search/
|
|
49
|
-
import {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
import {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
'
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
const
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
const
|
|
209
|
-
|
|
210
|
-
const
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
{
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
}
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// search.mjs - unified CLI for GreedySearch extractors
|
|
4
|
+
//
|
|
5
|
+
// Usage:
|
|
6
|
+
// node search.mjs <engine> "<query>"
|
|
7
|
+
// node search.mjs all "<query>"
|
|
8
|
+
//
|
|
9
|
+
// Engines:
|
|
10
|
+
// perplexity | pplx | p
|
|
11
|
+
// bing | copilot | b
|
|
12
|
+
// google | g
|
|
13
|
+
// gemini | gem
|
|
14
|
+
// all - fan-out to all engines in parallel
|
|
15
|
+
//
|
|
16
|
+
// Output: JSON to stdout, errors to stderr
|
|
17
|
+
//
|
|
18
|
+
// Examples:
|
|
19
|
+
// node search.mjs p "what is memoization"
|
|
20
|
+
// node search.mjs gem "latest React features"
|
|
21
|
+
// node search.mjs all "how does TCP congestion control work"
|
|
22
|
+
|
|
23
|
+
import { appendFileSync, existsSync, readFileSync } from "node:fs";
|
|
24
|
+
// Config file for user defaults
|
|
25
|
+
import { homedir } from "node:os";
|
|
26
|
+
import { join } from "node:path";
|
|
27
|
+
import {
|
|
28
|
+
cdp,
|
|
29
|
+
closeTab,
|
|
30
|
+
closeTabs,
|
|
31
|
+
ensureChrome,
|
|
32
|
+
killHeadlessChrome,
|
|
33
|
+
openNewTab,
|
|
34
|
+
touchActivity,
|
|
35
|
+
} from "../src/search/chrome.mjs";
|
|
36
|
+
import {
|
|
37
|
+
ALL_ENGINES,
|
|
38
|
+
ENGINES,
|
|
39
|
+
SYNTHESIZER,
|
|
40
|
+
VISIBLE_RECOVERY_LOG,
|
|
41
|
+
} from "../src/search/constants.mjs";
|
|
42
|
+
import { runExtractor } from "../src/search/engines.mjs";
|
|
43
|
+
import {
|
|
44
|
+
fetchMultipleSources,
|
|
45
|
+
fetchTopSource,
|
|
46
|
+
} from "../src/search/fetch-source.mjs";
|
|
47
|
+
import { writeSourcesToFiles } from "../src/search/file-sources.mjs";
|
|
48
|
+
import { writeOutput } from "../src/search/output.mjs";
|
|
49
|
+
import {
|
|
50
|
+
findHeadlessBlockedEngines,
|
|
51
|
+
isHeadlessBlockedResult,
|
|
52
|
+
isManualVerificationError,
|
|
53
|
+
} from "../src/search/recovery.mjs";
|
|
54
|
+
import {
|
|
55
|
+
buildSourceRegistry,
|
|
56
|
+
mergeFetchDataIntoSources,
|
|
57
|
+
} from "../src/search/sources.mjs";
|
|
58
|
+
import { buildConfidence } from "../src/search/synthesis.mjs";
|
|
59
|
+
import {
|
|
60
|
+
getSynthesisStartUrl,
|
|
61
|
+
normalizeSynthesizer,
|
|
62
|
+
synthesizeResults,
|
|
63
|
+
} from "../src/search/synthesis-runner.mjs";
|
|
64
|
+
import { normalizeQuery } from "../src/search/query.mjs";
|
|
65
|
+
import { runResearchMode } from "../src/search/research.mjs";
|
|
66
|
+
|
|
67
|
+
const CONFIG_DIR = join(homedir(), ".config", "greedysearch");
|
|
68
|
+
const CONFIG_FILE = join(CONFIG_DIR, "config.json");
|
|
69
|
+
|
|
70
|
+
function loadUserConfig() {
|
|
71
|
+
try {
|
|
72
|
+
if (existsSync(CONFIG_FILE)) {
|
|
73
|
+
return JSON.parse(readFileSync(CONFIG_FILE, "utf8"));
|
|
74
|
+
}
|
|
75
|
+
} catch {
|
|
76
|
+
// Ignore errors
|
|
77
|
+
}
|
|
78
|
+
return {};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function logVisibleRecovery(event) {
|
|
82
|
+
try {
|
|
83
|
+
appendFileSync(
|
|
84
|
+
VISIBLE_RECOVERY_LOG,
|
|
85
|
+
`${JSON.stringify({ at: new Date().toISOString(), ...event })}\n`,
|
|
86
|
+
"utf8",
|
|
87
|
+
);
|
|
88
|
+
} catch {
|
|
89
|
+
// Best-effort diagnostics only. Never fail a search because logging failed.
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/** Read query/prompt from stdin (used with --stdin to avoid command-line leakage) */
|
|
94
|
+
async function readStdin() {
|
|
95
|
+
return new Promise((resolve) => {
|
|
96
|
+
let data = "";
|
|
97
|
+
process.stdin.setEncoding("utf8");
|
|
98
|
+
process.stdin.on("data", (chunk) => (data += chunk));
|
|
99
|
+
process.stdin.on("end", () => resolve(data.trim()));
|
|
100
|
+
if (process.stdin.isTTY) resolve("");
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// ─── Main ──────────────────────────────────────────────────────────────────
|
|
105
|
+
|
|
106
|
+
async function main() {
|
|
107
|
+
const args = process.argv.slice(2);
|
|
108
|
+
if (args.length < 2 || args[0] === "--help") {
|
|
109
|
+
process.stderr.write(
|
|
110
|
+
`${[
|
|
111
|
+
'Usage: node search.mjs <engine> "<query>"',
|
|
112
|
+
"",
|
|
113
|
+
"Engines: all, perplexity (p), google (g), chatgpt (gpt), gemini (gem), semantic-scholar (s2), logically (log), bing (b)",
|
|
114
|
+
"",
|
|
115
|
+
"Flags:",
|
|
116
|
+
" --synthesize For engine=all: synthesize fetched sources",
|
|
117
|
+
" --synthesizer <engine> Synthesis engine (default from ~/.pi/greedyconfig)",
|
|
118
|
+
" --fast Legacy quick mode: no source fetching or synthesis",
|
|
119
|
+
" --depth <mode> Legacy: fast|standard|deep aliases, or research",
|
|
120
|
+
" --deep-research Deprecated alias for --research",
|
|
121
|
+
" --research Iterative query/learnings loop (alias: --depth research)",
|
|
122
|
+
" --breadth <n> Research mode query breadth, 1-5 (default: 3)",
|
|
123
|
+
" --iterations <n> Research mode rounds, 1-3 (default: 2)",
|
|
124
|
+
" --max-sources <n> Research mode fetched source cap, 3-12",
|
|
125
|
+
" --research-out-dir <dir> Write research bundle to a specific directory",
|
|
126
|
+
" --no-research-bundle Disable the default .pi/greedysearch-research bundle",
|
|
127
|
+
" --fetch-top-source Fetch content from top source",
|
|
128
|
+
" --inline Output JSON to stdout (for piping)",
|
|
129
|
+
" --locale <lang> Force results language (en, de, fr, etc.)",
|
|
130
|
+
" --visible Always use visible Chrome for this search",
|
|
131
|
+
" --always-visible Alias for --visible",
|
|
132
|
+
" --stdin Read query from stdin (avoids command-line leakage)",
|
|
133
|
+
"",
|
|
134
|
+
"Environment:",
|
|
135
|
+
" GREEDY_SEARCH_VISIBLE Set to 1 to show Chrome window (disables headless)",
|
|
136
|
+
" GREEDY_SEARCH_ALWAYS_VISIBLE Set to 1 to force visible mode for all runs",
|
|
137
|
+
" GREEDY_SEARCH_LOCALE Default locale (default: en)",
|
|
138
|
+
"",
|
|
139
|
+
"Examples:",
|
|
140
|
+
' node search.mjs all "Node.js streams" # Grounded: engines + fetched sources',
|
|
141
|
+
' node search.mjs all "Node.js streams" --synthesize # Add Gemini synthesis',
|
|
142
|
+
' node search.mjs all "quick check" --fast # Legacy fast: no sources/synthesis',
|
|
143
|
+
' node search.mjs all "browser automation" --research --breadth 3 --iterations 2',
|
|
144
|
+
' node search.mjs p "what is memoization" # Single engine search',
|
|
145
|
+
].join("\n")}\n`,
|
|
146
|
+
);
|
|
147
|
+
process.exit(1);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const alwaysVisible =
|
|
151
|
+
args.includes("--visible") ||
|
|
152
|
+
args.includes("--always-visible") ||
|
|
153
|
+
process.env.GREEDY_SEARCH_ALWAYS_VISIBLE === "1";
|
|
154
|
+
if (alwaysVisible) {
|
|
155
|
+
process.env.GREEDY_SEARCH_VISIBLE = "1";
|
|
156
|
+
process.env.GREEDY_SEARCH_ALWAYS_VISIBLE = "1";
|
|
157
|
+
delete process.env.GREEDY_SEARCH_HEADLESS;
|
|
158
|
+
} else if (process.env.GREEDY_SEARCH_VISIBLE !== "1") {
|
|
159
|
+
// Establish the desired mode BEFORE ensureChrome() so a stale visible
|
|
160
|
+
// recovery browser is switched back to headless before research planning
|
|
161
|
+
// and Gemini synthesis tabs are opened.
|
|
162
|
+
process.env.GREEDY_SEARCH_HEADLESS = "1";
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
await ensureChrome();
|
|
166
|
+
|
|
167
|
+
// Track activity for headless idle timeout
|
|
168
|
+
touchActivity();
|
|
169
|
+
|
|
170
|
+
const depthIdx = args.indexOf("--depth");
|
|
171
|
+
const legacyDepth =
|
|
172
|
+
depthIdx !== -1 && args[depthIdx + 1]
|
|
173
|
+
? args[depthIdx + 1].toLowerCase()
|
|
174
|
+
: null;
|
|
175
|
+
const engineArg = args.find((a) => !a.startsWith("--"))?.toLowerCase();
|
|
176
|
+
const researchMode =
|
|
177
|
+
args.includes("--research") ||
|
|
178
|
+
args.includes("--deep-research") ||
|
|
179
|
+
legacyDepth === "research";
|
|
180
|
+
const legacyFast = args.includes("--fast") || legacyDepth === "fast";
|
|
181
|
+
const legacySynthesisDepth =
|
|
182
|
+
legacyDepth === "standard" ||
|
|
183
|
+
legacyDepth === "deep" ||
|
|
184
|
+
args.includes("--deep");
|
|
185
|
+
const shouldFetchSources = engineArg === "all" && !legacyFast;
|
|
186
|
+
const shouldSynthesize =
|
|
187
|
+
engineArg === "all" &&
|
|
188
|
+
!legacyFast &&
|
|
189
|
+
(args.includes("--synthesize") || legacySynthesisDepth);
|
|
190
|
+
const groundedSynthesis = legacyDepth === "deep" || args.includes("--deep");
|
|
191
|
+
|
|
192
|
+
if (args.includes("--deep-research")) {
|
|
193
|
+
process.stderr.write(
|
|
194
|
+
"[greedysearch] --deep-research is deprecated; use --research or --depth research\n",
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
if (legacySynthesisDepth) {
|
|
198
|
+
process.stderr.write(
|
|
199
|
+
"[greedysearch] depth fast|standard|deep is deprecated; use default grounded search plus --synthesize when needed\n",
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const synthesizerIdx = args.indexOf("--synthesizer");
|
|
204
|
+
const synthesizer = normalizeSynthesizer(
|
|
205
|
+
synthesizerIdx === -1 ? SYNTHESIZER : args[synthesizerIdx + 1],
|
|
206
|
+
);
|
|
207
|
+
|
|
208
|
+
const full = args.includes("--full");
|
|
209
|
+
const short = !full;
|
|
210
|
+
const fetchSource = args.includes("--fetch-top-source");
|
|
211
|
+
const inline = args.includes("--inline");
|
|
212
|
+
const breadthIdx = args.indexOf("--breadth");
|
|
213
|
+
const iterationsIdx = args.indexOf("--iterations");
|
|
214
|
+
const maxSourcesIdx = args.indexOf("--max-sources");
|
|
215
|
+
const researchBreadth = breadthIdx === -1 ? undefined : args[breadthIdx + 1];
|
|
216
|
+
const researchIterations =
|
|
217
|
+
iterationsIdx === -1 ? undefined : args[iterationsIdx + 1];
|
|
218
|
+
const researchMaxSources =
|
|
219
|
+
maxSourcesIdx === -1 ? undefined : args[maxSourcesIdx + 1];
|
|
220
|
+
const researchOutDirIdx = args.indexOf("--research-out-dir");
|
|
221
|
+
const researchOutDir =
|
|
222
|
+
researchOutDirIdx === -1 ? undefined : args[researchOutDirIdx + 1];
|
|
223
|
+
const writeResearchBundle = !args.includes("--no-research-bundle");
|
|
224
|
+
const outIdx = args.indexOf("--out");
|
|
225
|
+
const outFile = outIdx === -1 ? null : args[outIdx + 1];
|
|
226
|
+
|
|
227
|
+
// Locale handling: CLI flag > env var > config file > default (en)
|
|
228
|
+
const localeIdx = args.indexOf("--locale");
|
|
229
|
+
const envLocale = process.env.GREEDY_SEARCH_LOCALE;
|
|
230
|
+
const userConfig = loadUserConfig();
|
|
231
|
+
let locale = "en"; // Default to English
|
|
232
|
+
|
|
233
|
+
if (localeIdx !== -1 && args[localeIdx + 1]) {
|
|
234
|
+
locale = args[localeIdx + 1];
|
|
235
|
+
} else if (envLocale) {
|
|
236
|
+
locale = envLocale;
|
|
237
|
+
} else if (userConfig.locale) {
|
|
238
|
+
locale = userConfig.locale;
|
|
239
|
+
}
|
|
240
|
+
const rest = args.filter(
|
|
241
|
+
(a, i) =>
|
|
242
|
+
a !== "--full" &&
|
|
243
|
+
a !== "--short" &&
|
|
244
|
+
a !== "--fast" &&
|
|
245
|
+
a !== "--fetch-top-source" &&
|
|
246
|
+
a !== "--synthesize" &&
|
|
247
|
+
a !== "--deep-research" &&
|
|
248
|
+
a !== "--deep" &&
|
|
249
|
+
a !== "--research" &&
|
|
250
|
+
a !== "--inline" &&
|
|
251
|
+
a !== "--stdin" &&
|
|
252
|
+
a !== "--headless" &&
|
|
253
|
+
a !== "--visible" &&
|
|
254
|
+
a !== "--always-visible" &&
|
|
255
|
+
a !== "--depth" &&
|
|
256
|
+
a !== "--synthesizer" &&
|
|
257
|
+
a !== "--out" &&
|
|
258
|
+
a !== "--locale" &&
|
|
259
|
+
a !== "--breadth" &&
|
|
260
|
+
a !== "--iterations" &&
|
|
261
|
+
a !== "--max-sources" &&
|
|
262
|
+
a !== "--research-out-dir" &&
|
|
263
|
+
a !== "--no-research-bundle" &&
|
|
264
|
+
a !== "--help" &&
|
|
265
|
+
(depthIdx === -1 || i !== depthIdx + 1) &&
|
|
266
|
+
(synthesizerIdx === -1 || i !== synthesizerIdx + 1) &&
|
|
267
|
+
(outIdx === -1 || i !== outIdx + 1) &&
|
|
268
|
+
(localeIdx === -1 || i !== localeIdx + 1) &&
|
|
269
|
+
(breadthIdx === -1 || i !== breadthIdx + 1) &&
|
|
270
|
+
(iterationsIdx === -1 || i !== iterationsIdx + 1) &&
|
|
271
|
+
(maxSourcesIdx === -1 || i !== maxSourcesIdx + 1) &&
|
|
272
|
+
(researchOutDirIdx === -1 || i !== researchOutDirIdx + 1),
|
|
273
|
+
);
|
|
274
|
+
const engine = rest[0]?.toLowerCase();
|
|
275
|
+
// Read query from stdin when --stdin flag is set (avoids leaking query in process table)
|
|
276
|
+
const useStdin = args.includes("--stdin");
|
|
277
|
+
let query;
|
|
278
|
+
if (useStdin) {
|
|
279
|
+
query = await readStdin();
|
|
280
|
+
} else {
|
|
281
|
+
query = rest.slice(1).join(" ");
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (researchMode) {
|
|
285
|
+
if (engine !== "all") {
|
|
286
|
+
process.stderr.write(
|
|
287
|
+
`[greedysearch] Research mode uses all engines; ignoring engine "${engine}".\n`,
|
|
288
|
+
);
|
|
289
|
+
}
|
|
290
|
+
const out = await runResearchMode({
|
|
291
|
+
query: normalizeQuery(query),
|
|
292
|
+
breadth: researchBreadth,
|
|
293
|
+
iterations: researchIterations,
|
|
294
|
+
maxSources: researchMaxSources,
|
|
295
|
+
locale,
|
|
296
|
+
short,
|
|
297
|
+
writeBundle: writeResearchBundle,
|
|
298
|
+
researchOutDir,
|
|
299
|
+
});
|
|
300
|
+
writeOutput(out, outFile, {
|
|
301
|
+
inline,
|
|
302
|
+
synthesize: true,
|
|
303
|
+
query,
|
|
304
|
+
});
|
|
305
|
+
return;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
if (engine === "all") {
|
|
309
|
+
await cdp(["list"]); // refresh pages cache
|
|
310
|
+
|
|
311
|
+
// Create fresh tabs for each engine in parallel, seeded directly to the
|
|
312
|
+
// engine homepage so extractors can skip the initial navigation.
|
|
313
|
+
const ENGINE_START_URLS = {
|
|
314
|
+
perplexity: "https://www.perplexity.ai/",
|
|
315
|
+
google: "https://www.google.com/",
|
|
316
|
+
"semantic-scholar": "https://www.semanticscholar.org/",
|
|
317
|
+
semanticscholar: "https://www.semanticscholar.org/",
|
|
318
|
+
s2: "https://www.semanticscholar.org/",
|
|
319
|
+
logically: "https://logically.app/research-assistant/",
|
|
320
|
+
};
|
|
321
|
+
const engineTabs = await Promise.all(
|
|
322
|
+
ALL_ENGINES.map((e) => openNewTab(ENGINE_START_URLS[e])),
|
|
323
|
+
);
|
|
324
|
+
// Refresh cache so the new tabs are discoverable by cdp.mjs
|
|
325
|
+
await cdp(["list"]);
|
|
326
|
+
|
|
327
|
+
// Time-bounded per-engine extraction so slow engines don't stall the batch.
|
|
328
|
+
const engineTimeoutFor = (engineName) => {
|
|
329
|
+
if (!legacyFast) return 70000;
|
|
330
|
+
// ChatGPT needs ~25-30s solo; under CDP contention needs more headroom
|
|
331
|
+
return engineName === "chatgpt" ? 60000 : 35000;
|
|
332
|
+
};
|
|
333
|
+
|
|
334
|
+
try {
|
|
335
|
+
const results = await Promise.allSettled(
|
|
336
|
+
ALL_ENGINES.map((e, i) =>
|
|
337
|
+
runExtractor(
|
|
338
|
+
ENGINES[e],
|
|
339
|
+
normalizeQuery(query),
|
|
340
|
+
engineTabs[i],
|
|
341
|
+
short,
|
|
342
|
+
engineTimeoutFor(e),
|
|
343
|
+
locale,
|
|
344
|
+
)
|
|
345
|
+
.then((r) => {
|
|
346
|
+
process.stderr.write(`PROGRESS:${e}:done\n`);
|
|
347
|
+
return { engine: e, ...r };
|
|
348
|
+
})
|
|
349
|
+
.catch((err) => {
|
|
350
|
+
// Do not emit PROGRESS:error yet: Bing/Perplexity may recover in
|
|
351
|
+
// visible mode. Emit the final status after recovery has run.
|
|
352
|
+
throw err;
|
|
353
|
+
}),
|
|
354
|
+
),
|
|
355
|
+
);
|
|
356
|
+
|
|
357
|
+
const out = {};
|
|
358
|
+
for (let i = 0; i < results.length; i++) {
|
|
359
|
+
const r = results[i];
|
|
360
|
+
if (r.status === "fulfilled") {
|
|
361
|
+
out[r.value.engine] = r.value;
|
|
362
|
+
} else {
|
|
363
|
+
const err = r.reason;
|
|
364
|
+
const msg = err?.message || "unknown error";
|
|
365
|
+
out[ALL_ENGINES[i]] = { error: msg };
|
|
366
|
+
if (err?.lastStage) {
|
|
367
|
+
process.stderr.write(
|
|
368
|
+
`[greedysearch] ${ALL_ENGINES[i]} failed at stage '${err.lastStage}': ${msg}\n`,
|
|
369
|
+
);
|
|
370
|
+
}
|
|
371
|
+
if (err?.partialErr) {
|
|
372
|
+
process.stderr.write(
|
|
373
|
+
`[greedysearch] ${ALL_ENGINES[i]} tail stderr:\n${err.partialErr}\n`,
|
|
374
|
+
);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Cloudflare/verification recovery: if Perplexity or Bing were blocked
|
|
380
|
+
// in headless mode, retry in visible Chrome to establish cookies,
|
|
381
|
+
// then continue headless with the profile now carrying valid session state.
|
|
382
|
+
// Recovery is allowed even in fast mode because verification failure would
|
|
383
|
+
// otherwise produce no usable result.
|
|
384
|
+
const recoveryCandidates = findHeadlessBlockedEngines(out);
|
|
385
|
+
|
|
386
|
+
if (
|
|
387
|
+
recoveryCandidates.length > 0 &&
|
|
388
|
+
process.env.GREEDY_SEARCH_VISIBLE !== "1"
|
|
389
|
+
) {
|
|
390
|
+
logVisibleRecovery({
|
|
391
|
+
scope: "all",
|
|
392
|
+
phase: "start",
|
|
393
|
+
engines: recoveryCandidates,
|
|
394
|
+
reasons: Object.fromEntries(
|
|
395
|
+
recoveryCandidates.map((engineName) => [
|
|
396
|
+
engineName,
|
|
397
|
+
{
|
|
398
|
+
error: out[engineName]?.error || null,
|
|
399
|
+
envelope: out[engineName]?._envelope || null,
|
|
400
|
+
},
|
|
401
|
+
]),
|
|
402
|
+
),
|
|
403
|
+
});
|
|
404
|
+
process.stderr.write(
|
|
405
|
+
`[greedysearch] 🔓 Headless ${recoveryCandidates.join(", ")} search hit timeout/verification/antibot signals — retrying visible to establish cookies...\n`,
|
|
406
|
+
);
|
|
407
|
+
for (const blockedEngine of recoveryCandidates) {
|
|
408
|
+
process.stderr.write(
|
|
409
|
+
`[greedysearch] ${blockedEngine} recovery starting in visible mode...\n`,
|
|
410
|
+
);
|
|
411
|
+
}
|
|
412
|
+
// Close headless tabs, kill headless Chrome
|
|
413
|
+
await closeTabs(engineTabs);
|
|
414
|
+
await killHeadlessChrome();
|
|
415
|
+
process.env.GREEDY_SEARCH_VISIBLE = "1";
|
|
416
|
+
delete process.env.GREEDY_SEARCH_HEADLESS;
|
|
417
|
+
await ensureChrome();
|
|
418
|
+
await cdp(["list"]);
|
|
419
|
+
|
|
420
|
+
// Retry blocked engines in visible Chrome
|
|
421
|
+
const retryTabs = [];
|
|
422
|
+
let keepVisibleForHuman = false;
|
|
423
|
+
let recovered = 0;
|
|
424
|
+
for (let i = 0; i < recoveryCandidates.length; i++) {
|
|
425
|
+
const tab = await openNewTab();
|
|
426
|
+
retryTabs.push(tab);
|
|
427
|
+
}
|
|
428
|
+
try {
|
|
429
|
+
// First visible retry: navigate to the engine page.
|
|
430
|
+
// Cloudflare/Turnstile may resolve and redirect, disrupting the CDP session
|
|
431
|
+
// ("Inspected target navigated or closed"). If so, the cookies are now cached
|
|
432
|
+
// and a second retry on the same tab should succeed.
|
|
433
|
+
const retries = await Promise.allSettled(
|
|
434
|
+
recoveryCandidates.map((e, i) =>
|
|
435
|
+
runExtractor(ENGINES[e], query, retryTabs[i], short, null, locale)
|
|
436
|
+
.then((r) => ({ engine: e, ...r }))
|
|
437
|
+
.catch((err) => ({ engine: e, error: err.message })),
|
|
438
|
+
),
|
|
439
|
+
);
|
|
440
|
+
const stillBlocked = [];
|
|
441
|
+
const manualVerification = [];
|
|
442
|
+
for (const r of retries) {
|
|
443
|
+
if (r.status === "fulfilled" && !r.value.error) {
|
|
444
|
+
out[r.value.engine] = r.value;
|
|
445
|
+
recovered++;
|
|
446
|
+
process.stderr.write(`PROGRESS:${r.value.engine}:done\n`);
|
|
447
|
+
} else if (r.status === "fulfilled") {
|
|
448
|
+
out[r.value.engine] = r.value;
|
|
449
|
+
stillBlocked.push(r.value.engine);
|
|
450
|
+
if (isManualVerificationError(r.value.error)) {
|
|
451
|
+
manualVerification.push(r.value.engine);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
if (recovered > 0) {
|
|
456
|
+
process.stderr.write(
|
|
457
|
+
`[greedysearch] ✅ ${recovered}/${recoveryCandidates.length} engine(s) recovered — cookies cached for future headless runs.\n`,
|
|
458
|
+
);
|
|
459
|
+
} else {
|
|
460
|
+
process.stderr.write(
|
|
461
|
+
`[greedysearch] ⚠️ Recovery attempt did not extract an answer — ${recoveryCandidates.join(", ")} may still need manual verification or a DOM fallback.\n`,
|
|
462
|
+
);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Second retry for still-blocked engines: the first retry may have resolved
|
|
466
|
+
// Cloudflare/Turnstile (navigating through the challenge), so cookies are now
|
|
467
|
+
// cached and the page should load without the blocking challenge.
|
|
468
|
+
if (stillBlocked.length > 0) {
|
|
469
|
+
process.stderr.write(
|
|
470
|
+
`[greedysearch] Second visible retry for ${stillBlocked.join(", ")} — Turnstile may have resolved on first attempt...\n`,
|
|
471
|
+
);
|
|
472
|
+
const secondRetries = await Promise.allSettled(
|
|
473
|
+
stillBlocked.map((e) => {
|
|
474
|
+
const idx = recoveryCandidates.indexOf(e);
|
|
475
|
+
return runExtractor(
|
|
476
|
+
ENGINES[e],
|
|
477
|
+
query,
|
|
478
|
+
retryTabs[idx],
|
|
479
|
+
short,
|
|
480
|
+
null,
|
|
481
|
+
locale,
|
|
482
|
+
)
|
|
483
|
+
.then((r) => ({ engine: e, ...r }))
|
|
484
|
+
.catch((err) => ({ engine: e, error: err.message }));
|
|
485
|
+
}),
|
|
486
|
+
);
|
|
487
|
+
const secondStillBlocked = [];
|
|
488
|
+
for (const r of secondRetries) {
|
|
489
|
+
if (r.status === "fulfilled" && !r.value.error) {
|
|
490
|
+
out[r.value.engine] = r.value;
|
|
491
|
+
recovered++;
|
|
492
|
+
process.stderr.write(`PROGRESS:${r.value.engine}:done\n`);
|
|
493
|
+
process.stderr.write(
|
|
494
|
+
`[greedysearch] ✅ ${r.value.engine} recovered on second visible retry.\n`,
|
|
495
|
+
);
|
|
496
|
+
} else {
|
|
497
|
+
secondStillBlocked.push(r.value?.engine || "unknown");
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
stillBlocked.length = 0;
|
|
501
|
+
stillBlocked.push(...secondStillBlocked);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
logVisibleRecovery({
|
|
505
|
+
scope: "all",
|
|
506
|
+
phase: stillBlocked.length > 0 ? "needs-human" : "success",
|
|
507
|
+
engines: recoveryCandidates,
|
|
508
|
+
results: Object.fromEntries(
|
|
509
|
+
recoveryCandidates.map((engineName) => [
|
|
510
|
+
engineName,
|
|
511
|
+
{
|
|
512
|
+
mode: out[engineName]?._envelope?.mode || null,
|
|
513
|
+
durationMs: out[engineName]?._envelope?.durationMs || null,
|
|
514
|
+
lastStage: out[engineName]?._envelope?.lastStage || null,
|
|
515
|
+
error: out[engineName]?.error || null,
|
|
516
|
+
},
|
|
517
|
+
]),
|
|
518
|
+
),
|
|
519
|
+
});
|
|
520
|
+
|
|
521
|
+
if (stillBlocked.length > 0) {
|
|
522
|
+
for (const blockedEngine of stillBlocked) {
|
|
523
|
+
process.stderr.write(`PROGRESS:${blockedEngine}:needs-human\n`);
|
|
524
|
+
}
|
|
525
|
+
keepVisibleForHuman = true;
|
|
526
|
+
out._needsHumanVerification = {
|
|
527
|
+
engines: stillBlocked,
|
|
528
|
+
message:
|
|
529
|
+
"Visible Chrome is open with the engine page loaded. Solve the Turnstile checkbox or other challenge in the visible window to store cookies. Cookies persist for future runs.",
|
|
530
|
+
};
|
|
531
|
+
process.stderr.write(
|
|
532
|
+
`[greedysearch] 🔓 ${stillBlocked.join(", ")} still blocked — keeping visible Chrome open. Solve the challenge in the window to store cookies, then rerun.\n`,
|
|
533
|
+
);
|
|
534
|
+
// Visible Chrome stays open so the user can interact with any
|
|
535
|
+
// Turnstile/Cloudflare challenge. Once solved, cookies are stored
|
|
536
|
+
// in the shared profile and future headless runs will reuse them.
|
|
537
|
+
}
|
|
538
|
+
} finally {
|
|
539
|
+
if (keepVisibleForHuman) {
|
|
540
|
+
// User must interact — keep visible Chrome open but out of the way
|
|
541
|
+
minimizeChrome().catch(() => {});
|
|
542
|
+
} else {
|
|
543
|
+
// Switch back to headless for synthesis + source fetch.
|
|
544
|
+
// killHeadlessChrome() sends Browser.close first so Chrome flushes
|
|
545
|
+
// its cookie database before the force-kill — cookies are preserved.
|
|
546
|
+
await closeTabs(retryTabs);
|
|
547
|
+
process.stderr.write(
|
|
548
|
+
"[greedysearch] Switching back to headless Chrome...\n",
|
|
549
|
+
);
|
|
550
|
+
await killHeadlessChrome();
|
|
551
|
+
delete process.env.GREEDY_SEARCH_VISIBLE;
|
|
552
|
+
process.env.GREEDY_SEARCH_HEADLESS = "1";
|
|
553
|
+
await ensureChrome();
|
|
554
|
+
await cdp(["list"]);
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
// Clear engineTabs — finally{} closeTabs handles empty arrays gracefully
|
|
559
|
+
engineTabs.length = 0;
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
for (const engineName of ALL_ENGINES) {
|
|
563
|
+
if (!out[engineName]?.error) continue;
|
|
564
|
+
if (recoveryCandidates.includes(engineName)) {
|
|
565
|
+
if (process.env.GREEDY_SEARCH_VISIBLE === "1") {
|
|
566
|
+
process.stderr.write(
|
|
567
|
+
`PROGRESS:${engineName}:${isManualVerificationError(out[engineName].error) ? "needs-human" : "error"}\n`,
|
|
568
|
+
);
|
|
569
|
+
}
|
|
570
|
+
continue;
|
|
571
|
+
}
|
|
572
|
+
process.stderr.write(`PROGRESS:${engineName}:error\n`);
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
// Build a canonical source registry across all engines
|
|
576
|
+
out._sources = buildSourceRegistry(out, query);
|
|
577
|
+
|
|
578
|
+
// Source fetching: default for all "all" searches
|
|
579
|
+
// Fetch all sources in a single batch (concurrency = source count).
|
|
580
|
+
if (shouldFetchSources && out._sources.length > 0) {
|
|
581
|
+
process.stderr.write("PROGRESS:source-fetch:start\n");
|
|
582
|
+
const fetchedSources = await fetchMultipleSources(
|
|
583
|
+
out._sources,
|
|
584
|
+
5,
|
|
585
|
+
8000,
|
|
586
|
+
);
|
|
587
|
+
|
|
588
|
+
out._sources = mergeFetchDataIntoSources(out._sources, fetchedSources);
|
|
589
|
+
out._fetchedSources = writeSourcesToFiles(fetchedSources);
|
|
590
|
+
process.stderr.write("PROGRESS:source-fetch:done\n");
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
// Optional engine-agnostic synthesis for multi-engine searches.
|
|
594
|
+
// Open the synthesizer tab HERE (after source fetch) instead of
|
|
595
|
+
// pre-opening before source fetch. Pre-opening was fragile: Chrome could
|
|
596
|
+
// be killed during visible recovery or idle-timeout between source fetch
|
|
597
|
+
// and synthesis, leaving a stale tab ID that causes "No target matching prefix".
|
|
598
|
+
if (shouldSynthesize) {
|
|
599
|
+
process.stderr.write("PROGRESS:synthesis:start\n");
|
|
600
|
+
process.stderr.write(
|
|
601
|
+
`[greedysearch] Synthesizing results with ${synthesizer}...\n`,
|
|
602
|
+
);
|
|
603
|
+
let synthesisTab = null;
|
|
604
|
+
try {
|
|
605
|
+
synthesisTab = await openNewTab(getSynthesisStartUrl(synthesizer));
|
|
606
|
+
const synthesis = await synthesizeResults(query, out, {
|
|
607
|
+
grounded: groundedSynthesis,
|
|
608
|
+
tabPrefix: synthesisTab,
|
|
609
|
+
visible: process.env.GREEDY_SEARCH_VISIBLE === "1",
|
|
610
|
+
synthesizer,
|
|
611
|
+
});
|
|
612
|
+
out._synthesis = {
|
|
613
|
+
...synthesis,
|
|
614
|
+
synthesized: true,
|
|
615
|
+
};
|
|
616
|
+
process.stderr.write("PROGRESS:synthesis:done\n");
|
|
617
|
+
} catch (e) {
|
|
618
|
+
process.stderr.write(
|
|
619
|
+
`[greedysearch] Synthesis failed: ${e.message}\n`,
|
|
620
|
+
);
|
|
621
|
+
out._synthesis = {
|
|
622
|
+
error: e.message,
|
|
623
|
+
synthesized: false,
|
|
624
|
+
synthesizedBy: synthesizer,
|
|
625
|
+
};
|
|
626
|
+
} finally {
|
|
627
|
+
if (synthesisTab) await closeTab(synthesisTab);
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
if (fetchSource) {
|
|
632
|
+
const top = pickTopSource(out);
|
|
633
|
+
if (top)
|
|
634
|
+
out._topSource = await fetchTopSource(top.canonicalUrl || top.url);
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// Include confidence metrics for grounded multi-engine searches.
|
|
638
|
+
if (!legacyFast) out._confidence = buildConfidence(out);
|
|
639
|
+
|
|
640
|
+
writeOutput(out, outFile, {
|
|
641
|
+
inline,
|
|
642
|
+
synthesize: shouldSynthesize,
|
|
643
|
+
query,
|
|
644
|
+
});
|
|
645
|
+
return;
|
|
646
|
+
} finally {
|
|
647
|
+
await closeTabs(engineTabs);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
// Single engine
|
|
652
|
+
const script = ENGINES[engine];
|
|
653
|
+
if (!script) {
|
|
654
|
+
process.stderr.write(
|
|
655
|
+
`Unknown engine: "${engine}"\nAvailable: ${Object.keys(ENGINES).join(", ")}\n`,
|
|
656
|
+
);
|
|
657
|
+
process.exit(1);
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
try {
|
|
661
|
+
const result = await runExtractor(
|
|
662
|
+
script,
|
|
663
|
+
normalizeQuery(query),
|
|
664
|
+
null,
|
|
665
|
+
short,
|
|
666
|
+
null,
|
|
667
|
+
locale,
|
|
668
|
+
);
|
|
669
|
+
if (fetchSource && result.sources?.length > 0) {
|
|
670
|
+
result.topSource = await fetchTopSource(result.sources[0].url);
|
|
671
|
+
}
|
|
672
|
+
writeOutput(result, outFile, { inline, synthesize: false, query });
|
|
673
|
+
} catch (e) {
|
|
674
|
+
const recoveryEngine = script.includes("bing")
|
|
675
|
+
? "bing"
|
|
676
|
+
: script.includes("perplexity")
|
|
677
|
+
? "perplexity"
|
|
678
|
+
: script.includes("chatgpt")
|
|
679
|
+
? "chatgpt"
|
|
680
|
+
: script.includes("semantic-scholar")
|
|
681
|
+
? "semantic-scholar"
|
|
682
|
+
: script.includes("logically")
|
|
683
|
+
? "logically"
|
|
684
|
+
: null;
|
|
685
|
+
const canRetryVisible =
|
|
686
|
+
recoveryEngine &&
|
|
687
|
+
process.env.GREEDY_SEARCH_VISIBLE !== "1" &&
|
|
688
|
+
isHeadlessBlockedResult(e);
|
|
689
|
+
|
|
690
|
+
if (canRetryVisible) {
|
|
691
|
+
logVisibleRecovery({
|
|
692
|
+
scope: "single",
|
|
693
|
+
phase: "start",
|
|
694
|
+
engines: [recoveryEngine],
|
|
695
|
+
reasons: {
|
|
696
|
+
[recoveryEngine]: {
|
|
697
|
+
error: e.message || null,
|
|
698
|
+
envelope: e.envelope || null,
|
|
699
|
+
lastStage: e.lastStage || null,
|
|
700
|
+
},
|
|
701
|
+
},
|
|
702
|
+
});
|
|
703
|
+
process.stderr.write(
|
|
704
|
+
`[greedysearch] 🔓 ${recoveryEngine} blocked in headless — retrying visible to establish cookies...\n`,
|
|
705
|
+
);
|
|
706
|
+
await killHeadlessChrome();
|
|
707
|
+
process.env.GREEDY_SEARCH_VISIBLE = "1";
|
|
708
|
+
delete process.env.GREEDY_SEARCH_HEADLESS;
|
|
709
|
+
await ensureChrome();
|
|
710
|
+
await cdp(["list"]);
|
|
711
|
+
|
|
712
|
+
const retryTab = await openNewTab();
|
|
713
|
+
let keepVisibleForHuman = false;
|
|
714
|
+
try {
|
|
715
|
+
const result = await runExtractor(
|
|
716
|
+
script,
|
|
717
|
+
query,
|
|
718
|
+
retryTab,
|
|
719
|
+
short,
|
|
720
|
+
null,
|
|
721
|
+
locale,
|
|
722
|
+
);
|
|
723
|
+
logVisibleRecovery({
|
|
724
|
+
scope: "single",
|
|
725
|
+
phase: "success",
|
|
726
|
+
engines: [recoveryEngine],
|
|
727
|
+
result: {
|
|
728
|
+
engine: recoveryEngine,
|
|
729
|
+
mode: result._envelope?.mode || null,
|
|
730
|
+
durationMs: result._envelope?.durationMs || null,
|
|
731
|
+
lastStage: result._envelope?.lastStage || null,
|
|
732
|
+
},
|
|
733
|
+
});
|
|
734
|
+
if (fetchSource && result.sources?.length > 0) {
|
|
735
|
+
result.topSource = await fetchTopSource(result.sources[0].url);
|
|
736
|
+
}
|
|
737
|
+
writeOutput(result, outFile, { inline, synthesize: false, query });
|
|
738
|
+
return;
|
|
739
|
+
} catch (retryErr) {
|
|
740
|
+
logVisibleRecovery({
|
|
741
|
+
scope: "single",
|
|
742
|
+
phase: "needs-human",
|
|
743
|
+
engines: [recoveryEngine],
|
|
744
|
+
result: {
|
|
745
|
+
engine: recoveryEngine,
|
|
746
|
+
error: retryErr.message || String(retryErr),
|
|
747
|
+
envelope: retryErr.envelope || null,
|
|
748
|
+
},
|
|
749
|
+
});
|
|
750
|
+
// Any visible retry failure: keep Chrome open so user can solve Turnstile.
|
|
751
|
+
// Once solved, cookies are stored in the shared profile for future headless runs.
|
|
752
|
+
keepVisibleForHuman = true;
|
|
753
|
+
writeOutput(
|
|
754
|
+
{
|
|
755
|
+
query,
|
|
756
|
+
error: retryErr.message,
|
|
757
|
+
_needsHumanVerification: {
|
|
758
|
+
engines: [recoveryEngine],
|
|
759
|
+
message:
|
|
760
|
+
"Visible Chrome is open with the engine page loaded. Solve the Turnstile checkbox or other challenge to store cookies. Cookies persist for future runs.",
|
|
761
|
+
},
|
|
762
|
+
},
|
|
763
|
+
outFile,
|
|
764
|
+
{ inline, synthesize: false, query },
|
|
765
|
+
);
|
|
766
|
+
return;
|
|
767
|
+
} finally {
|
|
768
|
+
if (!keepVisibleForHuman) {
|
|
769
|
+
await closeTab(retryTab);
|
|
770
|
+
await killHeadlessChrome();
|
|
771
|
+
delete process.env.GREEDY_SEARCH_VISIBLE;
|
|
772
|
+
process.env.GREEDY_SEARCH_HEADLESS = "1";
|
|
773
|
+
} else {
|
|
774
|
+
// Minimize the visible window so it's out of the way
|
|
775
|
+
minimizeChrome().catch(() => {});
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
process.stderr.write(`Error: ${e.message}\n`);
|
|
781
|
+
process.exit(1);
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
function pickTopSource(out) {
|
|
786
|
+
if (Array.isArray(out._sources) && out._sources.length > 0)
|
|
787
|
+
return out._sources[0];
|
|
788
|
+
for (const engine of ["perplexity", "google", "bing"]) {
|
|
789
|
+
const r = out[engine];
|
|
790
|
+
if (r?.sources?.length > 0) return r.sources[0];
|
|
791
|
+
}
|
|
792
|
+
return null;
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
/**
|
|
796
|
+
* Minimize Chrome window via CDP after search completes.
|
|
797
|
+
* Called at the end of search to keep window minimized.
|
|
798
|
+
* Skipped in headless mode (no window to minimize).
|
|
799
|
+
*/
|
|
800
|
+
async function minimizeChrome() {
|
|
801
|
+
// In headless mode (default), there's no window to minimize
|
|
802
|
+
if (process.env.GREEDY_SEARCH_HEADLESS === "1") return;
|
|
803
|
+
|
|
804
|
+
try {
|
|
805
|
+
const http = await import("node:http");
|
|
806
|
+
const version = await new Promise((resolve, reject) => {
|
|
807
|
+
http
|
|
808
|
+
.get(`http://localhost:9222/json/version`, (res) => {
|
|
809
|
+
let body = "";
|
|
810
|
+
res.on("data", (d) => (body += d));
|
|
811
|
+
res.on("end", () => resolve(JSON.parse(body)));
|
|
812
|
+
})
|
|
813
|
+
.on("error", reject);
|
|
814
|
+
});
|
|
815
|
+
|
|
816
|
+
const wsUrl = version.webSocketDebuggerUrl;
|
|
817
|
+
const WebSocket = globalThis.WebSocket;
|
|
818
|
+
if (!WebSocket) return;
|
|
819
|
+
|
|
820
|
+
const ws = new WebSocket(wsUrl);
|
|
821
|
+
let requestId = 0;
|
|
822
|
+
const pending = new Map();
|
|
823
|
+
|
|
824
|
+
ws.onopen = () => {
|
|
825
|
+
const id = ++requestId;
|
|
826
|
+
pending.set(id, {
|
|
827
|
+
resolve: (result) => {
|
|
828
|
+
const targets = result.targetInfos || [];
|
|
829
|
+
const pageTarget = targets.find((t) => t.type === "page");
|
|
830
|
+
if (!pageTarget) {
|
|
831
|
+
ws.close();
|
|
832
|
+
return;
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
const winId = ++requestId;
|
|
836
|
+
pending.set(winId, {
|
|
837
|
+
resolve: (winResult) => {
|
|
838
|
+
const windowId = winResult.windowId;
|
|
839
|
+
const minId = ++requestId;
|
|
840
|
+
pending.set(minId, { resolve: () => {}, reject: () => {} });
|
|
841
|
+
ws.send(
|
|
842
|
+
JSON.stringify({
|
|
843
|
+
id: minId,
|
|
844
|
+
method: "Browser.setWindowBounds",
|
|
845
|
+
params: { windowId, bounds: { windowState: "minimized" } },
|
|
846
|
+
}),
|
|
847
|
+
);
|
|
848
|
+
setTimeout(() => ws.close(), 500);
|
|
849
|
+
},
|
|
850
|
+
reject: () => ws.close(),
|
|
851
|
+
});
|
|
852
|
+
ws.send(
|
|
853
|
+
JSON.stringify({
|
|
854
|
+
id: winId,
|
|
855
|
+
method: "Browser.getWindowForTarget",
|
|
856
|
+
params: { targetId: pageTarget.targetId },
|
|
857
|
+
}),
|
|
858
|
+
);
|
|
859
|
+
},
|
|
860
|
+
reject: () => ws.close(),
|
|
861
|
+
});
|
|
862
|
+
ws.send(JSON.stringify({ id, method: "Target.getTargets", params: {} }));
|
|
863
|
+
};
|
|
864
|
+
|
|
865
|
+
ws.onmessage = (event) => {
|
|
866
|
+
const msg = JSON.parse(event.data);
|
|
867
|
+
if (msg.id && pending.has(msg.id)) {
|
|
868
|
+
const { resolve, reject } = pending.get(msg.id);
|
|
869
|
+
pending.delete(msg.id);
|
|
870
|
+
if (msg.error) reject?.(msg.error);
|
|
871
|
+
else resolve?.(msg.result);
|
|
872
|
+
}
|
|
873
|
+
};
|
|
874
|
+
|
|
875
|
+
setTimeout(() => ws.close(), 3000);
|
|
876
|
+
} catch {
|
|
877
|
+
// Best-effort
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
main().finally(async () => {
|
|
882
|
+
// Touch activity timestamp for headless idle timeout
|
|
883
|
+
touchActivity();
|
|
884
|
+
// Ensure window is minimized after search completes (best-effort, non-blocking)
|
|
885
|
+
minimizeChrome().catch(() => {});
|
|
886
|
+
});
|