@apmantza/greedysearch-pi 1.9.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,217 +1,335 @@
1
- #!/usr/bin/env node
2
-
3
- // extractors/gemini.mjs
4
- // Navigate gemini.google.com/app, submit query, wait for answer, return clean answer + sources.
5
- //
6
- // Usage:
7
- // node extractors/gemini.mjs "<query>" [--tab <prefix>]
8
- //
9
- // Output (stdout): JSON { answer, sources, query, url }
10
- // Errors go to stderr only — stdout is always clean JSON for piping.
11
-
12
- import {
13
- cdp,
14
- formatAnswer,
15
- getOrOpenTab,
16
- handleError,
17
- injectClipboardInterceptor,
18
- jitter,
19
- outputJson,
20
- parseArgs,
21
- parseSourcesFromMarkdown,
22
- prepareArgs,
23
- TIMING,
24
- validateQuery,
25
- waitForSelector,
26
- waitForStreamComplete,
27
- } from "./common.mjs";
28
- import { dismissConsent, handleVerification } from "./consent.mjs";
29
- import { SELECTORS } from "./selectors.mjs";
30
-
31
- const S = SELECTORS.gemini;
32
- const GLOBAL_VAR = "__geminiClipboard";
33
-
34
- // ============================================================================
35
- // Gemini-specific helpers
36
- // ============================================================================
37
-
38
- async function typeIntoGemini(tab, text) {
39
- // 1. Focus the input area via click (more reliable than eval focus for shadow-DOM editors)
40
- await cdp(["click", tab, S.input]);
41
- await new Promise((r) => setTimeout(r, jitter(200)));
42
-
43
- // 2. Type using CDP Input.insertText (more reliable than document.execCommand)
44
- await cdp(["type", tab, text]);
45
- await new Promise((r) => setTimeout(r, jitter(300)));
46
-
47
- // 3. Verify the text was actually inserted
48
- const inserted = await cdp([
49
- "eval",
50
- tab,
51
- `(function() {
52
- var el = document.querySelector('${S.input}');
53
- if (!el) return false;
54
- var content = el.innerText || el.textContent || '';
55
- return content.trim().length >= ${Math.floor(text.length * 0.8)};
56
- })()`,
57
- ]);
58
- if (inserted !== "true") {
59
- throw new Error(
60
- "Gemini input field did not accept text — input verification failed",
61
- );
62
- }
63
- }
64
-
65
- async function scrollToBottom(tab) {
66
- await cdp([
67
- "eval",
68
- tab,
69
- `(function() {
70
- const chat = document.querySelector('chat-window, [role="main"], main') || document.body;
71
- chat.scrollTo ? chat.scrollTo({ top: chat.scrollHeight, behavior: 'smooth' }) : window.scrollTo(0, document.body.scrollHeight);
72
- })()`,
73
- ]);
74
- }
75
-
76
- async function extractAnswer(tab, query = "") {
77
- const queryNorm = query.toLowerCase().trim();
78
-
79
- // Wait for the assistant response copy button to appear.
80
- // A fresh conversation has 1 copy button (user message); after the
81
- // assistant responds there are 2+. This prevents clicking the user's
82
- // copy button before React hydrates the assistant's.
83
- let copyReady = false;
84
- const copyDeadline = Date.now() + 12000;
85
- while (Date.now() < copyDeadline) {
86
- const count = await cdp([
87
- "eval",
88
- tab,
89
- `document.querySelectorAll('${S.copyButton}').length`,
90
- ]);
91
- if (parseInt(count, 10) >= 2) {
92
- copyReady = true;
93
- break;
94
- }
95
- await new Promise((r) => setTimeout(r, 800));
96
- }
97
- if (!copyReady) {
98
- console.error("[gemini] Warning: assistant copy button did not appear");
99
- }
100
-
101
- // Click the LAST copy button (assistant's response at the bottom)
102
- await cdp([
103
- "eval",
104
- tab,
105
- `(() => {
106
- const buttons = document.querySelectorAll('${S.copyButton}');
107
- buttons[buttons.length - 1]?.click();
108
- })()`,
109
- ]);
110
- await new Promise((r) => setTimeout(r, 600));
111
-
112
- let answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
113
-
114
- // Retry once if clipboard contains the user's query instead of the response.
115
- // This can happen when the assistant response hasn't rendered its copy button yet.
116
- if (
117
- answer &&
118
- queryNorm &&
119
- (answer.toLowerCase().trim() === queryNorm ||
120
- answer.trim().length < queryNorm.length)
121
- ) {
122
- console.error("[gemini] Clipboard echoed query, retrying in 2s...");
123
- await new Promise((r) => setTimeout(r, 2000));
124
- await cdp([
125
- "eval",
126
- tab,
127
- `(() => {
128
- const buttons = document.querySelectorAll('${S.copyButton}');
129
- buttons[buttons.length - 1]?.click();
130
- })()`,
131
- ]);
132
- await new Promise((r) => setTimeout(r, 600));
133
- answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
134
- }
135
-
136
- if (!answer) throw new Error("Clipboard interceptor returned empty text");
137
-
138
- const sources = parseSourcesFromMarkdown(answer);
139
- return { answer: answer.trim(), sources };
140
- }
141
-
142
- // ============================================================================
143
- // Main
144
- // ============================================================================
145
-
146
- const USAGE = 'Usage: node extractors/gemini.mjs "<query>" [--tab <prefix>]\n';
147
-
148
- async function main() {
149
- const args = await prepareArgs(process.argv.slice(2));
150
- validateQuery(args, USAGE);
151
-
152
- const { query, tabPrefix, short } = parseArgs(args);
153
-
154
- try {
155
- await cdp(["list"]);
156
- const tab = await getOrOpenTab(tabPrefix);
157
-
158
- // Skip navigation if tab was pre-seeded to Gemini (e.g. by search.mjs
159
- // opening the tab in parallel with source fetch to save ~4s nav time).
160
- const currentUrl = await cdp(["eval", tab, "document.location.href"]).catch(() => "");
161
- let onGemini = false;
162
- try {
163
- const host = new URL(currentUrl).hostname.toLowerCase();
164
- onGemini = host === "gemini.google.com" || host.endsWith(".gemini.google.com");
165
- } catch {}
166
-
167
- if (!onGemini) {
168
- await cdp(["nav", tab, "https://gemini.google.com/app"], 20000);
169
- await new Promise((r) => setTimeout(r, 600));
170
- }
171
- await dismissConsent(tab, cdp);
172
- await handleVerification(tab, cdp, 10000);
173
-
174
- // Wait for input to be ready
175
- await waitForSelector(tab, S.input, 8000, TIMING.inputPoll);
176
- await new Promise((r) => setTimeout(r, jitter(TIMING.postClick)));
177
-
178
- await injectClipboardInterceptor(tab, GLOBAL_VAR);
179
- await typeIntoGemini(tab, query);
180
- await new Promise((r) => setTimeout(r, jitter(TIMING.postType)));
181
-
182
- await cdp([
183
- "eval",
184
- tab,
185
- `document.querySelector('${S.sendButton}')?.click()`,
186
- ]);
187
-
188
- // Wait for Gemini's response to finish streaming before extracting.
189
- // Periodic scrolling keeps lazy-loaded content triggered in the viewport.
190
- let pollTick = 0;
191
- const scrollInterval = setInterval(() => {
192
- if (++pollTick % 10 === 0) scrollToBottom(tab).catch(() => null);
193
- }, 6000);
194
- try {
195
- await waitForStreamComplete(tab, { timeout: 45000, minLength: 50 });
196
- } finally {
197
- clearInterval(scrollInterval);
198
- }
199
-
200
- const { answer, sources } = await extractAnswer(tab, query);
201
- if (!answer) throw new Error("No answer captured from Gemini clipboard");
202
-
203
- const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
204
- () => "https://gemini.google.com/app",
205
- );
206
- outputJson({
207
- query,
208
- url: finalUrl,
209
- answer: formatAnswer(answer, short),
210
- sources,
211
- });
212
- } catch (e) {
213
- handleError(e);
214
- }
215
- }
216
-
217
- main();
1
+ #!/usr/bin/env node
2
+
3
+ // extractors/gemini.mjs
4
+ // Navigate gemini.google.com/app, submit query, wait for answer, return clean answer + sources.
5
+ //
6
+ // Usage:
7
+ // node extractors/gemini.mjs "<query>" [--tab <prefix>]
8
+ //
9
+ // Output (stdout): JSON { answer, sources, query, url }
10
+ // Errors go to stderr only — stdout is always clean JSON for piping.
11
+
12
+ import {
13
+ cdp,
14
+ cdpWithInput,
15
+ formatAnswer,
16
+ getOrOpenTab,
17
+ handleError,
18
+ injectClipboardInterceptor,
19
+ jitter,
20
+ outputJson,
21
+ parseArgs,
22
+ parseSourcesFromMarkdown,
23
+ prepareArgs,
24
+ TIMING,
25
+ validateQuery,
26
+ waitForSelector,
27
+ waitForStreamComplete,
28
+ } from "./common.mjs";
29
+ import { ensureChrome } from "../src/search/chrome.mjs";
30
+ import { dismissConsent, handleVerification } from "./consent.mjs";
31
+ import { SELECTORS } from "./selectors.mjs";
32
+
33
+ const S = SELECTORS.gemini;
34
+ const GLOBAL_VAR = "__geminiClipboard";
35
+
36
+ // ============================================================================
37
+ // Gemini-specific helpers
38
+ // ============================================================================
39
+
40
+ async function typeIntoGemini(tab, text) {
41
+ // 1. Focus the input area via click (more reliable than eval focus for shadow-DOM editors)
42
+ await cdp(["click", tab, S.input]);
43
+ await new Promise((r) => setTimeout(r, jitter(200)));
44
+
45
+ // 2. Type using CDP Input.insertText (more reliable than document.execCommand).
46
+ // Pass long research prompts through stdin so Windows does not reject the
47
+ // cdp.mjs process spawn with ENAMETOOLONG.
48
+ await cdpWithInput(["type", tab, "--stdin"], text);
49
+ await new Promise((r) => setTimeout(r, jitter(300)));
50
+
51
+ // 3. Verify the text was actually inserted
52
+ const inserted = await cdp([
53
+ "eval",
54
+ tab,
55
+ `(function() {
56
+ var el = document.querySelector('${S.input}');
57
+ if (!el) return false;
58
+ var content = el.innerText || el.textContent || '';
59
+ return content.trim().length >= ${Math.floor(text.length * 0.8)};
60
+ })()`,
61
+ ]);
62
+ if (inserted !== "true") {
63
+ throw new Error(
64
+ "Gemini input field did not accept text — input verification failed",
65
+ );
66
+ }
67
+ }
68
+
69
+ async function scrollToBottom(tab) {
70
+ await cdp([
71
+ "eval",
72
+ tab,
73
+ `(function() {
74
+ const chat = document.querySelector('chat-window, [role="main"], main') || document.body;
75
+ chat.scrollTo ? chat.scrollTo({ top: chat.scrollHeight, behavior: 'smooth' }) : window.scrollTo(0, document.body.scrollHeight);
76
+ })()`,
77
+ ]);
78
+ }
79
+
80
+ /**
81
+ * Read the assistant response from the model-response element.
82
+ * Used as a fallback when the copy-button click captures the user's
83
+ * query text instead of the response (which happens when the response
84
+ * never rendered, or when the "last copy button on the page" is not
85
+ * the assistant's response copy button).
86
+ */
87
+ async function extractAnswerFromDom(tab) {
88
+ const raw = await cdp([
89
+ "eval",
90
+ tab,
91
+ String.raw`
92
+ (() => {
93
+ // The model-response element is a custom element <model-response>.
94
+ // Its innerText starts with the "Gemini said" label in the
95
+ // current locale; strip that prefix and return the rest.
96
+ const resp = document.querySelector('model-response');
97
+ if (!resp) return JSON.stringify({ answer: '', sources: [] });
98
+ const text = (resp.innerText || resp.textContent || '').trim();
99
+ // Strip the locale-specific "Gemini said" label prefix.
100
+ // It varies ("Το Gemini είπε" in Greek, "Gemini said" in
101
+ // English, etc.) so we just look for the first newline and
102
+ // take what follows.
103
+ const idx = text.indexOf('\n');
104
+ const answer = idx >= 0 ? text.slice(idx + 1).trim() : text;
105
+ if (!answer) return JSON.stringify({ answer: '', sources: [] });
106
+ // Extract source links from the response.
107
+ const seen = new Set();
108
+ const sources = [];
109
+ for (const link of resp.querySelectorAll('a[href]')) {
110
+ const url = link.href;
111
+ if (!url || seen.has(url)) continue;
112
+ seen.add(url);
113
+ const title = (link.innerText || link.textContent || '').replace(/\s+/g, ' ').trim();
114
+ sources.push({ title, url });
115
+ if (sources.length >= 10) break;
116
+ }
117
+ return JSON.stringify({ answer, sources });
118
+ })()
119
+ `,
120
+ ]);
121
+ try {
122
+ return JSON.parse(raw);
123
+ } catch {
124
+ return { answer: "", sources: [] };
125
+ }
126
+ }
127
+
128
+ async function extractAnswer(tab, query = "") {
129
+ const queryNorm = query.toLowerCase().trim();
130
+
131
+ // Wait for the model-response element to have content (not just the
132
+ // "Gemini said" label). The old approach waited for copy button
133
+ // count >= 2, which is unreliable: the Gemini UI has many copy
134
+ // icons (copy link, copy code, etc.), and the last one on the page
135
+ // is not always the assistant response copy button.
136
+ let modelReady = false;
137
+ const modelDeadline = Date.now() + 12000;
138
+ while (Date.now() < modelDeadline) {
139
+ const ready = await cdp([
140
+ "eval",
141
+ tab,
142
+ String.raw`(() => {
143
+ const r = document.querySelector('model-response');
144
+ if (!r) return false;
145
+ const t = (r.innerText || '').trim();
146
+ // Must have content beyond the locale-specific label
147
+ // ("Gemini said" / "Το Gemini είπε" / etc.) and ideally
148
+ // a copy button rendered on the response.
149
+ return t.length > 20;
150
+ })()`,
151
+ ]);
152
+ if (ready === true) {
153
+ modelReady = true;
154
+ break;
155
+ }
156
+ await new Promise((r) => setTimeout(r, 800));
157
+ }
158
+ if (!modelReady) {
159
+ console.error("[gemini] Warning: model-response did not render content");
160
+ }
161
+
162
+ // Click the copy button on the model-response element specifically,
163
+ // not the absolute last copy button on the page. The page has many
164
+ // copy icons (copy link, copy code, etc.) and the last one is not
165
+ // always the assistant's response copy button.
166
+ await cdp([
167
+ "eval",
168
+ tab,
169
+ `(() => {
170
+ const resp = document.querySelector('model-response');
171
+ if (!resp) return 'no-model-response';
172
+ const btn = resp.querySelector('${S.copyButton}');
173
+ if (!btn) return 'no-copy-button';
174
+ btn.click();
175
+ return 'clicked';
176
+ })()`,
177
+ ]);
178
+ await new Promise((r) => setTimeout(r, 600));
179
+
180
+ let answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
181
+
182
+ // Retry once if clipboard contains the user's query instead of the response.
183
+ // This can happen when the assistant response hasn't rendered its copy button yet.
184
+ if (
185
+ answer &&
186
+ queryNorm &&
187
+ (answer.toLowerCase().trim() === queryNorm ||
188
+ answer.trim().length < queryNorm.length)
189
+ ) {
190
+ console.error("[gemini] Clipboard echoed query, retrying in 2s...");
191
+ await new Promise((r) => setTimeout(r, 2000));
192
+ await cdp([
193
+ "eval",
194
+ tab,
195
+ `(() => {
196
+ const resp = document.querySelector('model-response');
197
+ if (!resp) return 'no-model-response';
198
+ const btn = resp.querySelector('${S.copyButton}');
199
+ if (!btn) return 'no-copy-button';
200
+ btn.click();
201
+ return 'clicked';
202
+ })()`,
203
+ ]);
204
+ await new Promise((r) => setTimeout(r, 600));
205
+ answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
206
+ }
207
+
208
+ // DOM fallback: if the clipboard is empty or still echoes the query,
209
+ // read the model-response innerText directly. This handles the case
210
+ // where the copy button never rendered (response never appeared) or
211
+ // the click didn't fire.
212
+ let domFallback = null;
213
+ if (
214
+ !answer ||
215
+ (queryNorm &&
216
+ (answer.toLowerCase().trim() === queryNorm ||
217
+ answer.trim().length < queryNorm.length))
218
+ ) {
219
+ domFallback = await extractAnswerFromDom(tab);
220
+ if (domFallback.answer) {
221
+ answer = domFallback.answer;
222
+ }
223
+ }
224
+
225
+ if (!answer) {
226
+ throw new Error(
227
+ "Gemini returned no answer — model-response never rendered content",
228
+ );
229
+ }
230
+
231
+ const sourcesInline = parseSourcesFromMarkdown(answer);
232
+ const sourceMap = new Map();
233
+ for (const s of [...(domFallback?.sources || []), ...sourcesInline]) {
234
+ if (s?.url && !sourceMap.has(s.url)) sourceMap.set(s.url, s);
235
+ }
236
+ const sources = Array.from(sourceMap.values()).slice(0, 10);
237
+
238
+ return { answer: answer.trim(), sources };
239
+ }
240
+
241
+ // ============================================================================
242
+ // Main
243
+ // ============================================================================
244
+
245
+ const USAGE = 'Usage: node extractors/gemini.mjs "<query>" [--tab <prefix>]\n';
246
+
247
+ async function main() {
248
+ const args = await prepareArgs(process.argv.slice(2));
249
+ validateQuery(args, USAGE);
250
+
251
+ const { query, tabPrefix, short } = parseArgs(args);
252
+
253
+ // Default to headless unless the caller explicitly set GREEDY_SEARCH_VISIBLE=1.
254
+ // This prevents a stale visible-mode env in the parent process from making
255
+ // Gemini run visible when research synthesis/learning/planning expects headless.
256
+ if (
257
+ process.env.GREEDY_SEARCH_VISIBLE !== "1" &&
258
+ process.env.GREEDY_SEARCH_ALWAYS_VISIBLE !== "1"
259
+ ) {
260
+ process.env.GREEDY_SEARCH_HEADLESS = "1";
261
+ }
262
+
263
+ // Ensure Chrome is in the requested mode (headless by default). If a prior
264
+ // session left a visible Chrome running on port 9222, ensureChrome detects
265
+ // the mismatch, kills it, and relaunches headless before the gemini tab
266
+ // opens.
267
+ await ensureChrome();
268
+
269
+ try {
270
+ await cdp(["list"]);
271
+ const tab = await getOrOpenTab(tabPrefix);
272
+
273
+ // Skip navigation if tab was pre-seeded to Gemini (e.g. by search.mjs
274
+ // opening the tab in parallel with source fetch to save ~4s nav time).
275
+ const currentUrl = await cdp(["eval", tab, "document.location.href"]).catch(
276
+ () => "",
277
+ );
278
+ let onGemini = false;
279
+ try {
280
+ const host = new URL(currentUrl).hostname.toLowerCase();
281
+ onGemini =
282
+ host === "gemini.google.com" || host.endsWith(".gemini.google.com");
283
+ } catch {}
284
+
285
+ if (!onGemini) {
286
+ await cdp(["nav", tab, "https://gemini.google.com/app"], 20000);
287
+ await new Promise((r) => setTimeout(r, 600));
288
+ }
289
+ await dismissConsent(tab, cdp);
290
+ await handleVerification(tab, cdp, 10000);
291
+
292
+ // Wait for input to be ready
293
+ await waitForSelector(tab, S.input, 8000, TIMING.inputPoll);
294
+ await new Promise((r) => setTimeout(r, jitter(TIMING.postClick)));
295
+
296
+ await injectClipboardInterceptor(tab, GLOBAL_VAR);
297
+ await typeIntoGemini(tab, query);
298
+ await new Promise((r) => setTimeout(r, jitter(TIMING.postType)));
299
+
300
+ await cdp([
301
+ "eval",
302
+ tab,
303
+ `document.querySelector('${S.sendButton}')?.click()`,
304
+ ]);
305
+
306
+ // Wait for Gemini's response to finish streaming before extracting.
307
+ // Periodic scrolling keeps lazy-loaded content triggered in the viewport.
308
+ let pollTick = 0;
309
+ const scrollInterval = setInterval(() => {
310
+ if (++pollTick % 10 === 0) scrollToBottom(tab).catch(() => null);
311
+ }, 6000);
312
+ try {
313
+ await waitForStreamComplete(tab, { timeout: 45000, minLength: 50 });
314
+ } finally {
315
+ clearInterval(scrollInterval);
316
+ }
317
+
318
+ const { answer, sources } = await extractAnswer(tab, query);
319
+ if (!answer) throw new Error("No answer captured from Gemini clipboard");
320
+
321
+ const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
322
+ () => "https://gemini.google.com/app",
323
+ );
324
+ outputJson({
325
+ query,
326
+ url: finalUrl,
327
+ answer: formatAnswer(answer, short),
328
+ sources,
329
+ });
330
+ } catch (e) {
331
+ handleError(e);
332
+ }
333
+ }
334
+
335
+ main();