@apmantza/greedysearch-pi 1.9.2 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +132 -2
  2. package/README.md +82 -47
  3. package/bin/cdp.mjs +1153 -1108
  4. package/bin/launch.mjs +9 -0
  5. package/bin/search.mjs +318 -81
  6. package/extractors/bing-copilot.mjs +48 -18
  7. package/extractors/chatgpt.mjs +553 -0
  8. package/extractors/common.mjs +213 -22
  9. package/extractors/consensus.mjs +655 -0
  10. package/extractors/consent.mjs +182 -18
  11. package/extractors/gemini.mjs +350 -217
  12. package/extractors/google-ai.mjs +129 -128
  13. package/extractors/logically.mjs +629 -0
  14. package/extractors/perplexity.mjs +547 -217
  15. package/extractors/selectors.mjs +3 -2
  16. package/extractors/semantic-scholar.mjs +219 -0
  17. package/package.json +8 -4
  18. package/skills/greedy-search/skill.md +20 -12
  19. package/src/fetcher.mjs +23 -1
  20. package/src/formatters/results.ts +185 -128
  21. package/src/search/browser-lifecycle.mjs +27 -5
  22. package/src/search/challenge-detect.mjs +205 -0
  23. package/src/search/chrome.mjs +653 -590
  24. package/src/search/constants.mjs +155 -39
  25. package/src/search/engines.mjs +114 -76
  26. package/src/search/fetch-source.mjs +566 -451
  27. package/src/search/pdf.mjs +68 -0
  28. package/src/search/progress.mjs +145 -0
  29. package/src/search/recovery.mjs +73 -45
  30. package/src/search/research.mjs +1419 -62
  31. package/src/search/scale-aware.mjs +93 -0
  32. package/src/search/simple-research.mjs +520 -0
  33. package/src/search/sources.mjs +52 -22
  34. package/src/search/synthesis-runner.mjs +105 -26
  35. package/src/search/synthesis.mjs +286 -246
  36. package/src/tools/greedy-search-handler.ts +129 -59
  37. package/src/tools/shared.ts +312 -186
  38. package/src/types.ts +110 -104
  39. package/test.mjs +537 -18
@@ -1,217 +1,350 @@
1
- #!/usr/bin/env node
2
-
3
- // extractors/gemini.mjs
4
- // Navigate gemini.google.com/app, submit query, wait for answer, return clean answer + sources.
5
- //
6
- // Usage:
7
- // node extractors/gemini.mjs "<query>" [--tab <prefix>]
8
- //
9
- // Output (stdout): JSON { answer, sources, query, url }
10
- // Errors go to stderr only — stdout is always clean JSON for piping.
11
-
12
- import {
13
- cdp,
14
- formatAnswer,
15
- getOrOpenTab,
16
- handleError,
17
- injectClipboardInterceptor,
18
- jitter,
19
- outputJson,
20
- parseArgs,
21
- parseSourcesFromMarkdown,
22
- prepareArgs,
23
- TIMING,
24
- validateQuery,
25
- waitForSelector,
26
- waitForStreamComplete,
27
- } from "./common.mjs";
28
- import { dismissConsent, handleVerification } from "./consent.mjs";
29
- import { SELECTORS } from "./selectors.mjs";
30
-
31
- const S = SELECTORS.gemini;
32
- const GLOBAL_VAR = "__geminiClipboard";
33
-
34
- // ============================================================================
35
- // Gemini-specific helpers
36
- // ============================================================================
37
-
38
- async function typeIntoGemini(tab, text) {
39
- // 1. Focus the input area via click (more reliable than eval focus for shadow-DOM editors)
40
- await cdp(["click", tab, S.input]);
41
- await new Promise((r) => setTimeout(r, jitter(200)));
42
-
43
- // 2. Type using CDP Input.insertText (more reliable than document.execCommand)
44
- await cdp(["type", tab, text]);
45
- await new Promise((r) => setTimeout(r, jitter(300)));
46
-
47
- // 3. Verify the text was actually inserted
48
- const inserted = await cdp([
49
- "eval",
50
- tab,
51
- `(function() {
52
- var el = document.querySelector('${S.input}');
53
- if (!el) return false;
54
- var content = el.innerText || el.textContent || '';
55
- return content.trim().length >= ${Math.floor(text.length * 0.8)};
56
- })()`,
57
- ]);
58
- if (inserted !== "true") {
59
- throw new Error(
60
- "Gemini input field did not accept text — input verification failed",
61
- );
62
- }
63
- }
64
-
65
- async function scrollToBottom(tab) {
66
- await cdp([
67
- "eval",
68
- tab,
69
- `(function() {
70
- const chat = document.querySelector('chat-window, [role="main"], main') || document.body;
71
- chat.scrollTo ? chat.scrollTo({ top: chat.scrollHeight, behavior: 'smooth' }) : window.scrollTo(0, document.body.scrollHeight);
72
- })()`,
73
- ]);
74
- }
75
-
76
- async function extractAnswer(tab, query = "") {
77
- const queryNorm = query.toLowerCase().trim();
78
-
79
- // Wait for the assistant response copy button to appear.
80
- // A fresh conversation has 1 copy button (user message); after the
81
- // assistant responds there are 2+. This prevents clicking the user's
82
- // copy button before React hydrates the assistant's.
83
- let copyReady = false;
84
- const copyDeadline = Date.now() + 12000;
85
- while (Date.now() < copyDeadline) {
86
- const count = await cdp([
87
- "eval",
88
- tab,
89
- `document.querySelectorAll('${S.copyButton}').length`,
90
- ]);
91
- if (parseInt(count, 10) >= 2) {
92
- copyReady = true;
93
- break;
94
- }
95
- await new Promise((r) => setTimeout(r, 800));
96
- }
97
- if (!copyReady) {
98
- console.error("[gemini] Warning: assistant copy button did not appear");
99
- }
100
-
101
- // Click the LAST copy button (assistant's response at the bottom)
102
- await cdp([
103
- "eval",
104
- tab,
105
- `(() => {
106
- const buttons = document.querySelectorAll('${S.copyButton}');
107
- buttons[buttons.length - 1]?.click();
108
- })()`,
109
- ]);
110
- await new Promise((r) => setTimeout(r, 600));
111
-
112
- let answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
113
-
114
- // Retry once if clipboard contains the user's query instead of the response.
115
- // This can happen when the assistant response hasn't rendered its copy button yet.
116
- if (
117
- answer &&
118
- queryNorm &&
119
- (answer.toLowerCase().trim() === queryNorm ||
120
- answer.trim().length < queryNorm.length)
121
- ) {
122
- console.error("[gemini] Clipboard echoed query, retrying in 2s...");
123
- await new Promise((r) => setTimeout(r, 2000));
124
- await cdp([
125
- "eval",
126
- tab,
127
- `(() => {
128
- const buttons = document.querySelectorAll('${S.copyButton}');
129
- buttons[buttons.length - 1]?.click();
130
- })()`,
131
- ]);
132
- await new Promise((r) => setTimeout(r, 600));
133
- answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
134
- }
135
-
136
- if (!answer) throw new Error("Clipboard interceptor returned empty text");
137
-
138
- const sources = parseSourcesFromMarkdown(answer);
139
- return { answer: answer.trim(), sources };
140
- }
141
-
142
- // ============================================================================
143
- // Main
144
- // ============================================================================
145
-
146
- const USAGE = 'Usage: node extractors/gemini.mjs "<query>" [--tab <prefix>]\n';
147
-
148
- async function main() {
149
- const args = await prepareArgs(process.argv.slice(2));
150
- validateQuery(args, USAGE);
151
-
152
- const { query, tabPrefix, short } = parseArgs(args);
153
-
154
- try {
155
- await cdp(["list"]);
156
- const tab = await getOrOpenTab(tabPrefix);
157
-
158
- // Skip navigation if tab was pre-seeded to Gemini (e.g. by search.mjs
159
- // opening the tab in parallel with source fetch to save ~4s nav time).
160
- const currentUrl = await cdp(["eval", tab, "document.location.href"]).catch(() => "");
161
- let onGemini = false;
162
- try {
163
- const host = new URL(currentUrl).hostname.toLowerCase();
164
- onGemini = host === "gemini.google.com" || host.endsWith(".gemini.google.com");
165
- } catch {}
166
-
167
- if (!onGemini) {
168
- await cdp(["nav", tab, "https://gemini.google.com/app"], 20000);
169
- await new Promise((r) => setTimeout(r, 600));
170
- }
171
- await dismissConsent(tab, cdp);
172
- await handleVerification(tab, cdp, 10000);
173
-
174
- // Wait for input to be ready
175
- await waitForSelector(tab, S.input, 8000, TIMING.inputPoll);
176
- await new Promise((r) => setTimeout(r, jitter(TIMING.postClick)));
177
-
178
- await injectClipboardInterceptor(tab, GLOBAL_VAR);
179
- await typeIntoGemini(tab, query);
180
- await new Promise((r) => setTimeout(r, jitter(TIMING.postType)));
181
-
182
- await cdp([
183
- "eval",
184
- tab,
185
- `document.querySelector('${S.sendButton}')?.click()`,
186
- ]);
187
-
188
- // Wait for Gemini's response to finish streaming before extracting.
189
- // Periodic scrolling keeps lazy-loaded content triggered in the viewport.
190
- let pollTick = 0;
191
- const scrollInterval = setInterval(() => {
192
- if (++pollTick % 10 === 0) scrollToBottom(tab).catch(() => null);
193
- }, 6000);
194
- try {
195
- await waitForStreamComplete(tab, { timeout: 45000, minLength: 50 });
196
- } finally {
197
- clearInterval(scrollInterval);
198
- }
199
-
200
- const { answer, sources } = await extractAnswer(tab, query);
201
- if (!answer) throw new Error("No answer captured from Gemini clipboard");
202
-
203
- const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
204
- () => "https://gemini.google.com/app",
205
- );
206
- outputJson({
207
- query,
208
- url: finalUrl,
209
- answer: formatAnswer(answer, short),
210
- sources,
211
- });
212
- } catch (e) {
213
- handleError(e);
214
- }
215
- }
216
-
217
- main();
1
+ #!/usr/bin/env node
2
+
3
+ // extractors/gemini.mjs
4
+ // Navigate gemini.google.com/app, submit query, wait for answer, return clean answer + sources.
5
+ //
6
+ // Usage:
7
+ // node extractors/gemini.mjs "<query>" [--tab <prefix>]
8
+ //
9
+ // Output (stdout): JSON { answer, sources, query, url }
10
+ // Errors go to stderr only — stdout is always clean JSON for piping.
11
+
12
+ import {
13
+ cdp,
14
+ cdpWithInput,
15
+ formatAnswer,
16
+ getOrOpenTab,
17
+ handleError,
18
+ injectClipboardInterceptor,
19
+ jitter,
20
+ outputJson,
21
+ parseArgs,
22
+ parseSourcesFromMarkdown,
23
+ prepareArgs,
24
+ TIMING,
25
+ validateQuery,
26
+ waitForSelector,
27
+ waitForStreamComplete,
28
+ } from "./common.mjs";
29
+ import { ensureChrome } from "../src/search/chrome.mjs";
30
+ import { dismissConsent, handleVerification } from "./consent.mjs";
31
+ import { SELECTORS } from "./selectors.mjs";
32
+
33
+ const S = SELECTORS.gemini;
34
+ const GLOBAL_VAR = "__geminiClipboard";
35
+
36
+ // ============================================================================
37
+ // Gemini-specific helpers
38
+ // ============================================================================
39
+
40
+ async function typeIntoGemini(tab, text) {
41
+ // 1. Focus the input area via click (more reliable than eval focus for shadow-DOM editors)
42
+ await cdp(["click", tab, S.input]);
43
+ await new Promise((r) => setTimeout(r, jitter(200)));
44
+
45
+ // 2. Type using CDP Input.insertText (more reliable than document.execCommand).
46
+ // Pass long research prompts through stdin so Windows does not reject the
47
+ // cdp.mjs process spawn with ENAMETOOLONG.
48
+ await cdpWithInput(["type", tab, "--stdin"], text);
49
+ await new Promise((r) => setTimeout(r, jitter(300)));
50
+
51
+ // 3. Verify the text was actually inserted
52
+ const inserted = await cdp([
53
+ "eval",
54
+ tab,
55
+ `(function() {
56
+ var el = document.querySelector('${S.input}');
57
+ if (!el) return false;
58
+ var content = el.innerText || el.textContent || '';
59
+ return content.trim().length >= ${Math.floor(text.length * 0.8)};
60
+ })()`,
61
+ ]);
62
+ if (inserted !== "true") {
63
+ throw new Error(
64
+ "Gemini input field did not accept text — input verification failed",
65
+ );
66
+ }
67
+ }
68
+
69
+ async function scrollToBottom(tab) {
70
+ await cdp([
71
+ "eval",
72
+ tab,
73
+ `(function() {
74
+ const chat = document.querySelector('chat-window, [role="main"], main') || document.body;
75
+ chat.scrollTo ? chat.scrollTo({ top: chat.scrollHeight, behavior: 'smooth' }) : window.scrollTo(0, document.body.scrollHeight);
76
+ })()`,
77
+ ]);
78
+ }
79
+
80
+ /**
81
+ * Read the assistant response from the model-response element.
82
+ * Used as a fallback when the copy-button click captures the user's
83
+ * query text instead of the response (which happens when the response
84
+ * never rendered, or when the "last copy button on the page" is not
85
+ * the assistant's response copy button).
86
+ */
87
+ async function extractAnswerFromDom(tab) {
88
+ const raw = await cdp(
89
+ [
90
+ "eval",
91
+ tab,
92
+ String.raw`
93
+ new Promise((resolve) => {
94
+ const _deadline = Date.now() + 6000;
95
+ function _tryExtract() {
96
+ const resp = document.querySelector('model-response');
97
+ if (resp) {
98
+ const text = (resp.innerText || resp.textContent || '').trim();
99
+ const idx = text.indexOf('\n');
100
+ const answer = idx >= 0 ? text.slice(idx + 1).trim() : text;
101
+ if (answer) {
102
+ const seen = new Set();
103
+ const sources = [];
104
+ for (const link of resp.querySelectorAll('a[href]')) {
105
+ const url = link.href;
106
+ if (!url || seen.has(url)) continue;
107
+ seen.add(url);
108
+ const title = (link.innerText || link.textContent || '').replace(/\s+/g, ' ').trim();
109
+ sources.push({ title, url });
110
+ if (sources.length >= 10) break;
111
+ }
112
+ return resolve(JSON.stringify({ answer, sources }));
113
+ }
114
+ }
115
+ if (Date.now() < _deadline) {
116
+ setTimeout(_tryExtract, 500);
117
+ } else {
118
+ resolve(JSON.stringify({ answer: '', sources: [] }));
119
+ }
120
+ }
121
+ _tryExtract();
122
+ })
123
+ `,
124
+ ],
125
+ 8000,
126
+ );
127
+ try {
128
+ return JSON.parse(raw);
129
+ } catch {
130
+ return { answer: "", sources: [] };
131
+ }
132
+ }
133
+
134
+ async function extractAnswer(tab, query = "") {
135
+ const queryNorm = query.toLowerCase().trim();
136
+
137
+ // Wait for the model-response element to have content (not just the
138
+ // "Gemini said" label). The old approach waited for copy button
139
+ // count >= 2, which is unreliable: the Gemini UI has many copy
140
+ // icons (copy link, copy code, etc.), and the last one on the page
141
+ // is not always the assistant response copy button.
142
+ //
143
+ // minLength: 60 — Gemini renders a streaming header/prefix
144
+ // ("Gemini said" + UI chrome = ~25 chars) before the body arrives.
145
+ // The old 20-char threshold often resolved at the header stage and
146
+ // the copy button click then captured a partial/header-only result.
147
+ let modelReady = false;
148
+ const modelDeadline = Date.now() + 12000;
149
+ while (Date.now() < modelDeadline) {
150
+ const ready = await cdp([
151
+ "eval",
152
+ tab,
153
+ String.raw`(() => {
154
+ const r = document.querySelector('model-response');
155
+ if (!r) return false;
156
+ const t = (r.innerText || '').trim();
157
+ // Must have content beyond the locale-specific label
158
+ // ("Gemini said" / "Το Gemini είπε" / etc.) and ideally
159
+ // a copy button rendered on the response.
160
+ return t.length > 60;
161
+ })()`,
162
+ ]);
163
+ if (ready === "true") {
164
+ modelReady = true;
165
+ break;
166
+ }
167
+ await new Promise((r) => setTimeout(r, 800));
168
+ }
169
+ if (!modelReady) {
170
+ console.error("[gemini] Warning: model-response did not render content");
171
+ }
172
+
173
+ // Click the copy button on the model-response element specifically,
174
+ // not the absolute last copy button on the page. The page has many
175
+ // copy icons (copy link, copy code, etc.) and the last one is not
176
+ // always the assistant's response copy button.
177
+ await cdp([
178
+ "eval",
179
+ tab,
180
+ `(() => {
181
+ const resp = document.querySelector('model-response');
182
+ if (!resp) return 'no-model-response';
183
+ const btn = resp.querySelector('${S.copyButton}');
184
+ if (!btn) return 'no-copy-button';
185
+ btn.click();
186
+ return 'clicked';
187
+ })()`,
188
+ ]);
189
+ await new Promise((r) => setTimeout(r, 600));
190
+
191
+ let answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
192
+
193
+ // Retry once if clipboard contains the user's query instead of the response.
194
+ // This can happen when the assistant response hasn't rendered its copy button yet.
195
+ if (
196
+ answer &&
197
+ queryNorm &&
198
+ (answer.toLowerCase().trim() === queryNorm ||
199
+ answer.trim().length < queryNorm.length)
200
+ ) {
201
+ console.error("[gemini] Clipboard echoed query, retrying in 2s...");
202
+ await new Promise((r) => setTimeout(r, 2000));
203
+ await cdp([
204
+ "eval",
205
+ tab,
206
+ `(() => {
207
+ const resp = document.querySelector('model-response');
208
+ if (!resp) return 'no-model-response';
209
+ const btn = resp.querySelector('${S.copyButton}');
210
+ if (!btn) return 'no-copy-button';
211
+ btn.click();
212
+ return 'clicked';
213
+ })()`,
214
+ ]);
215
+ await new Promise((r) => setTimeout(r, 600));
216
+ answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
217
+ }
218
+
219
+ // DOM fallback: if the clipboard is empty or still echoes the query,
220
+ // read the model-response innerText directly. This handles the case
221
+ // where the copy button never rendered (response never appeared) or
222
+ // the click didn't fire.
223
+ let domFallback = null;
224
+ if (
225
+ !answer ||
226
+ (queryNorm &&
227
+ (answer.toLowerCase().trim() === queryNorm ||
228
+ answer.trim().length < queryNorm.length))
229
+ ) {
230
+ domFallback = await extractAnswerFromDom(tab);
231
+ if (domFallback.answer) {
232
+ answer = domFallback.answer;
233
+ }
234
+ }
235
+
236
+ if (!answer) {
237
+ throw new Error(
238
+ "Gemini returned no answer — model-response never rendered content",
239
+ );
240
+ }
241
+
242
+ const sourcesInline = parseSourcesFromMarkdown(answer);
243
+ const sourceMap = new Map();
244
+ for (const s of [...(domFallback?.sources || []), ...sourcesInline]) {
245
+ if (s?.url && !sourceMap.has(s.url)) sourceMap.set(s.url, s);
246
+ }
247
+ const sources = Array.from(sourceMap.values()).slice(0, 10);
248
+
249
+ return { answer: answer.trim(), sources };
250
+ }
251
+
252
+ // ============================================================================
253
+ // Main
254
+ // ============================================================================
255
+
256
+ const USAGE = 'Usage: node extractors/gemini.mjs "<query>" [--tab <prefix>]\n';
257
+
258
+ async function main() {
259
+ const args = await prepareArgs(process.argv.slice(2));
260
+ validateQuery(args, USAGE);
261
+
262
+ const { query, tabPrefix, short } = parseArgs(args);
263
+
264
+ // Default to headless unless the caller explicitly set GREEDY_SEARCH_VISIBLE=1.
265
+ // This prevents a stale visible-mode env in the parent process from making
266
+ // Gemini run visible when research synthesis/learning/planning expects headless.
267
+ if (
268
+ process.env.GREEDY_SEARCH_VISIBLE !== "1" &&
269
+ process.env.GREEDY_SEARCH_ALWAYS_VISIBLE !== "1"
270
+ ) {
271
+ process.env.GREEDY_SEARCH_HEADLESS = "1";
272
+ }
273
+
274
+ // Ensure Chrome is in the requested mode (headless by default). If a prior
275
+ // session left a visible Chrome running on port 9222, ensureChrome detects
276
+ // the mismatch, kills it, and relaunches headless before the gemini tab
277
+ // opens.
278
+ await ensureChrome();
279
+
280
+ try {
281
+ await cdp(["list"]);
282
+ const tab = await getOrOpenTab(tabPrefix);
283
+
284
+ // Skip navigation if tab was pre-seeded to Gemini (e.g. by search.mjs
285
+ // opening the tab in parallel with source fetch to save ~4s nav time).
286
+ const currentUrl = await cdp(["eval", tab, "document.location.href"]).catch(
287
+ () => "",
288
+ );
289
+ let onGemini = false;
290
+ try {
291
+ const host = new URL(currentUrl).hostname.toLowerCase();
292
+ onGemini =
293
+ host === "gemini.google.com" || host.endsWith(".gemini.google.com");
294
+ } catch {}
295
+
296
+ if (!onGemini) {
297
+ await cdp(["nav", tab, "https://gemini.google.com/app"], 20000);
298
+ await new Promise((r) => setTimeout(r, 600));
299
+ }
300
+ await dismissConsent(tab, cdp);
301
+ await handleVerification(tab, cdp, 10000);
302
+
303
+ // Wait for input to be ready
304
+ await waitForSelector(tab, S.input, 8000, TIMING.inputPoll);
305
+ await new Promise((r) => setTimeout(r, jitter(TIMING.postClick)));
306
+
307
+ await injectClipboardInterceptor(tab, GLOBAL_VAR);
308
+ await typeIntoGemini(tab, query);
309
+ await new Promise((r) => setTimeout(r, jitter(TIMING.postType)));
310
+
311
+ await cdp([
312
+ "eval",
313
+ tab,
314
+ `document.querySelector('${S.sendButton}')?.click()`,
315
+ ]);
316
+
317
+ // Wait for Gemini's response to finish streaming before extracting.
318
+ // Periodic scrolling keeps lazy-loaded content triggered in the viewport.
319
+ let pollTick = 0;
320
+ const scrollInterval = setInterval(() => {
321
+ if (++pollTick % 10 === 0) scrollToBottom(tab).catch(() => null);
322
+ }, 6000);
323
+ try {
324
+ await waitForStreamComplete(tab, {
325
+ timeout: 45000,
326
+ stableRounds: 5,
327
+ minLength: 60,
328
+ });
329
+ } finally {
330
+ clearInterval(scrollInterval);
331
+ }
332
+
333
+ const { answer, sources } = await extractAnswer(tab, query);
334
+ if (!answer) throw new Error("No answer captured from Gemini clipboard");
335
+
336
+ const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
337
+ () => "https://gemini.google.com/app",
338
+ );
339
+ outputJson({
340
+ query,
341
+ url: finalUrl,
342
+ answer: formatAnswer(answer, short),
343
+ sources,
344
+ });
345
+ } catch (e) {
346
+ handleError(e);
347
+ }
348
+ }
349
+
350
+ main();