@apmantza/greedysearch-pi 1.9.2 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +132 -2
  2. package/README.md +82 -47
  3. package/bin/cdp.mjs +1153 -1108
  4. package/bin/launch.mjs +9 -0
  5. package/bin/search.mjs +318 -81
  6. package/extractors/bing-copilot.mjs +48 -18
  7. package/extractors/chatgpt.mjs +553 -0
  8. package/extractors/common.mjs +213 -22
  9. package/extractors/consensus.mjs +655 -0
  10. package/extractors/consent.mjs +182 -18
  11. package/extractors/gemini.mjs +350 -217
  12. package/extractors/google-ai.mjs +129 -128
  13. package/extractors/logically.mjs +629 -0
  14. package/extractors/perplexity.mjs +547 -217
  15. package/extractors/selectors.mjs +3 -2
  16. package/extractors/semantic-scholar.mjs +219 -0
  17. package/package.json +8 -4
  18. package/skills/greedy-search/skill.md +20 -12
  19. package/src/fetcher.mjs +23 -1
  20. package/src/formatters/results.ts +185 -128
  21. package/src/search/browser-lifecycle.mjs +27 -5
  22. package/src/search/challenge-detect.mjs +205 -0
  23. package/src/search/chrome.mjs +653 -590
  24. package/src/search/constants.mjs +155 -39
  25. package/src/search/engines.mjs +114 -76
  26. package/src/search/fetch-source.mjs +566 -451
  27. package/src/search/pdf.mjs +68 -0
  28. package/src/search/progress.mjs +145 -0
  29. package/src/search/recovery.mjs +73 -45
  30. package/src/search/research.mjs +1419 -62
  31. package/src/search/scale-aware.mjs +93 -0
  32. package/src/search/simple-research.mjs +520 -0
  33. package/src/search/sources.mjs +52 -22
  34. package/src/search/synthesis-runner.mjs +105 -26
  35. package/src/search/synthesis.mjs +286 -246
  36. package/src/tools/greedy-search-handler.ts +129 -59
  37. package/src/tools/shared.ts +312 -186
  38. package/src/types.ts +110 -104
  39. package/test.mjs +537 -18
@@ -1,128 +1,129 @@
1
- #!/usr/bin/env node
2
-
3
- // extractors/google-ai.mjs
4
- // Navigate Google AI Mode (udm=50), wait for answer, return clean answer + sources.
5
- //
6
- // Usage:
7
- // node extractors/google-ai.mjs "<query>" [--tab <prefix>]
8
- //
9
- // Output (stdout): JSON { answer, sources, query, url }
10
- // Errors go to stderr only — stdout is always clean JSON for piping.
11
-
12
- import {
13
- cdp,
14
- formatAnswer,
15
- getOrOpenTab,
16
- handleError,
17
- jitter,
18
- outputJson,
19
- parseArgs,
20
- prepareArgs,
21
- TIMING,
22
- validateQuery,
23
- waitForStreamComplete,
24
- } from "./common.mjs";
25
- import { dismissConsent, handleVerification } from "./consent.mjs";
26
- import { SELECTORS } from "./selectors.mjs";
27
-
28
- const S = SELECTORS.google;
29
-
30
- const MIN_ANSWER_LENGTH = 50;
31
-
32
- async function extractAnswer(tab) {
33
- const excludeFilter = S.sourceExclude
34
- .map((e) => `!a.href.includes('${e}')`)
35
- .join(" && ");
36
- const raw = await cdp([
37
- "eval",
38
- tab,
39
- String.raw`
40
- (function() {
41
- var el = document.querySelector('${S.answerContainer}');
42
- if (!el) return JSON.stringify({ answer: '', sources: [] });
43
- var answer = el.innerText.trim();
44
- var sources = Array.from(document.querySelectorAll('${S.sourceLink}'))
45
- .filter(a => ${excludeFilter})
46
- .map(a => ({ url: a.href.split('#')[0], title: (a.closest('${S.sourceHeadingParent}')?.querySelector('h3, [role=heading]')?.innerText || a.innerText?.trim().split('\n')[0] || '').slice(0, 100) }))
47
- .filter(s => s.url && s.url.length > 10)
48
- .filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
49
- .slice(0, 10);
50
- return JSON.stringify({ answer, sources });
51
- })()
52
- `,
53
- ]);
54
- return JSON.parse(raw);
55
- }
56
-
57
- // ============================================================================
58
- // Main
59
- // ============================================================================
60
-
61
- const USAGE =
62
- 'Usage: node extractors/google-ai.mjs "<query>" [--tab <prefix>]\n';
63
-
64
- async function main() {
65
- const args = await prepareArgs(process.argv.slice(2));
66
- validateQuery(args, USAGE);
67
-
68
- const { query, tabPrefix, short, locale } = parseArgs(args);
69
-
70
- try {
71
- // Only refresh page list when creating a fresh tab (no prefix provided)
72
- if (!tabPrefix) await cdp(["list"]);
73
- const tab = await getOrOpenTab(tabPrefix);
74
-
75
- // Build URL with language parameter (default to English)
76
- const langParam = locale ? `&hl=${encodeURIComponent(locale)}` : "&hl=en";
77
- const url = `https://www.google.com/search?q=${encodeURIComponent(query)}&udm=50${langParam}`;
78
- await new Promise((r) => setTimeout(r, jitter(TIMING.postNav)));
79
- await dismissConsent(tab, cdp);
80
-
81
- // If consent redirected us away, navigate back
82
- const currentUrl = await cdp(["eval", tab, "document.location.href"]).catch(
83
- () => "",
84
- );
85
- if (!currentUrl.includes("google.com/search")) {
86
- await cdp(["nav", tab, url], 20000);
87
- await new Promise((r) => setTimeout(r, jitter(TIMING.postNav)));
88
- }
89
-
90
- // Handle "verify you're human" — auto-click simple buttons, wait for user on hard CAPTCHA
91
- const verifyResult = await handleVerification(tab, cdp, 10000);
92
- if (verifyResult === "needs-human")
93
- throw new Error(
94
- "Google verification required — could not be completed automatically",
95
- );
96
- if (verifyResult === "clicked" || verifyResult === "cleared-by-user") {
97
- // Re-navigate to the search URL after verification
98
- await cdp(["nav", tab, url], 20000);
99
- await new Promise((r) => setTimeout(r, jitter(TIMING.postNav)));
100
- }
101
-
102
- await waitForStreamComplete(tab, {
103
- timeout: 30000,
104
- selector: `document.querySelector('${S.answerContainer}')`,
105
- minLength: MIN_ANSWER_LENGTH,
106
- });
107
-
108
- const { answer, sources } = await extractAnswer(tab);
109
- if (!answer)
110
- throw new Error(
111
- "No answer extracted — Google AI Mode may not have responded",
112
- );
113
-
114
- const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
115
- () => url,
116
- );
117
- outputJson({
118
- query,
119
- url: finalUrl,
120
- answer: formatAnswer(answer, short),
121
- sources,
122
- });
123
- } catch (e) {
124
- handleError(e);
125
- }
126
- }
127
-
128
- main();
1
+ #!/usr/bin/env node
2
+
3
+ // extractors/google-ai.mjs
4
+ // Navigate Google AI Mode (udm=50), wait for answer, return clean answer + sources.
5
+ //
6
+ // Usage:
7
+ // node extractors/google-ai.mjs "<query>" [--tab <prefix>]
8
+ //
9
+ // Output (stdout): JSON { answer, sources, query, url }
10
+ // Errors go to stderr only — stdout is always clean JSON for piping.
11
+
12
+ import {
13
+ cdp,
14
+ formatAnswer,
15
+ getOrOpenTab,
16
+ handleError,
17
+ jitter,
18
+ outputJson,
19
+ parseArgs,
20
+ prepareArgs,
21
+ TIMING,
22
+ validateQuery,
23
+ waitForStreamComplete,
24
+ } from "./common.mjs";
25
+ import { dismissConsent, handleVerification } from "./consent.mjs";
26
+ import { SELECTORS } from "./selectors.mjs";
27
+
28
+ const S = SELECTORS.google;
29
+
30
+ const MIN_ANSWER_LENGTH = 50;
31
+
32
+ async function extractAnswer(tab) {
33
+ const excludeFilter = S.sourceExclude
34
+ .map((e) => `!a.href.includes('${e}')`)
35
+ .join(" && ");
36
+ const raw = await cdp([
37
+ "eval",
38
+ tab,
39
+ String.raw`
40
+ (function() {
41
+ var el = document.querySelector('${S.answerContainer}');
42
+ if (!el) return JSON.stringify({ answer: '', sources: [] });
43
+ var answer = el.innerText.trim();
44
+ var sources = Array.from(document.querySelectorAll('${S.sourceLink}'))
45
+ .filter(a => ${excludeFilter})
46
+ .map(a => ({ url: a.href.split('#')[0], title: (a.closest('${S.sourceHeadingParent}')?.querySelector('h3, [role=heading]')?.innerText || a.innerText?.trim().split('\n')[0] || '').slice(0, 100) }))
47
+ .filter(s => s.url && s.url.length > 10)
48
+ .filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
49
+ .slice(0, 10);
50
+ return JSON.stringify({ answer, sources });
51
+ })()
52
+ `,
53
+ ]);
54
+ return JSON.parse(raw);
55
+ }
56
+
57
+ // ============================================================================
58
+ // Main
59
+ // ============================================================================
60
+
61
+ const USAGE =
62
+ 'Usage: node extractors/google-ai.mjs "<query>" [--tab <prefix>]\n';
63
+
64
+ async function main() {
65
+ const args = await prepareArgs(process.argv.slice(2));
66
+ validateQuery(args, USAGE);
67
+
68
+ const { query, tabPrefix, short, locale } = parseArgs(args);
69
+
70
+ try {
71
+ // Only refresh page list when creating a fresh tab (no prefix provided)
72
+ if (!tabPrefix) await cdp(["list"]);
73
+ const tab = await getOrOpenTab(tabPrefix);
74
+
75
+ // Build URL with language parameter (default to English)
76
+ const langParam = locale ? `&hl=${encodeURIComponent(locale)}` : "&hl=en";
77
+ const url = `https://www.google.com/search?q=${encodeURIComponent(query)}&udm=50${langParam}`;
78
+ await new Promise((r) => setTimeout(r, jitter(TIMING.postNav)));
79
+ await dismissConsent(tab, cdp);
80
+
81
+ // If consent redirected us away, navigate back
82
+ const currentUrl = await cdp(["eval", tab, "document.location.href"]).catch(
83
+ () => "",
84
+ );
85
+ if (!currentUrl.includes("google.com/search")) {
86
+ await cdp(["nav", tab, url], 20000);
87
+ await new Promise((r) => setTimeout(r, jitter(TIMING.postNav)));
88
+ }
89
+
90
+ // Handle "verify you're human" — auto-click simple buttons, wait for user on hard CAPTCHA
91
+ const verifyResult = await handleVerification(tab, cdp, 10000);
92
+ if (verifyResult === "needs-human")
93
+ throw new Error(
94
+ "Google verification required — could not be completed automatically",
95
+ );
96
+ if (verifyResult === "clicked" || verifyResult === "cleared-by-user") {
97
+ // Re-navigate to the search URL after verification
98
+ await cdp(["nav", tab, url], 20000);
99
+ await new Promise((r) => setTimeout(r, jitter(TIMING.postNav)));
100
+ }
101
+
102
+ await waitForStreamComplete(tab, {
103
+ timeout: 30000,
104
+ stableRounds: 5,
105
+ selector: `document.querySelector('${S.answerContainer}')`,
106
+ minLength: MIN_ANSWER_LENGTH,
107
+ });
108
+
109
+ const { answer, sources } = await extractAnswer(tab);
110
+ if (!answer)
111
+ throw new Error(
112
+ "No answer extracted — Google AI Mode may not have responded",
113
+ );
114
+
115
+ const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
116
+ () => url,
117
+ );
118
+ outputJson({
119
+ query,
120
+ url: finalUrl,
121
+ answer: formatAnswer(answer, short),
122
+ sources,
123
+ });
124
+ } catch (e) {
125
+ handleError(e);
126
+ }
127
+ }
128
+
129
+ main();