@apmantza/greedysearch-pi 1.4.2 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,180 +1,142 @@
1
- #!/usr/bin/env node
2
- // extractors/gemini.mjs
3
- // Navigate gemini.google.com/app, submit query, wait for answer, return clean answer + sources.
4
- //
5
- // Usage:
6
- // node extractors/gemini.mjs "<query>" [--tab <prefix>]
7
- //
8
- // Output (stdout): JSON { answer, sources, query, url }
9
- // Errors go to stderr only stdout is always clean JSON for piping.
10
-
11
- import { readFileSync, existsSync } from 'fs';
12
- import { spawn } from 'child_process';
13
- import { tmpdir } from 'os';
14
- import { join, dirname } from 'path';
15
- import { fileURLToPath } from 'url';
16
- import { dismissConsent, handleVerification } from './consent.mjs';
17
- import { SELECTORS } from './selectors.mjs';
18
-
19
- const __dir = dirname(fileURLToPath(import.meta.url));
20
- const CDP = join(__dir, '..', 'cdp.mjs');
21
- const PAGES_CACHE = `${tmpdir().replace(/\\/g, '/')}/cdp-pages.json`;
22
-
23
- const COPY_POLL_INTERVAL = 600;
24
- const COPY_TIMEOUT = 120000;
25
-
26
- const S = SELECTORS.gemini;
27
-
28
- // ---------------------------------------------------------------------------
29
-
30
- function cdp(args, timeoutMs = 30000) {
31
- return new Promise((resolve, reject) => {
32
- const proc = spawn('node', [CDP, ...args], { stdio: ['ignore', 'pipe', 'pipe'] });
33
- let out = '', err = '';
34
- proc.stdout.on('data', d => out += d);
35
- proc.stderr.on('data', d => err += d);
36
- const timer = setTimeout(() => { proc.kill(); reject(new Error(`cdp timeout: ${args[0]}`)); }, timeoutMs);
37
- proc.on('close', code => {
38
- clearTimeout(timer);
39
- if (code !== 0) reject(new Error(err.trim() || `cdp exit ${code}`));
40
- else resolve(out.trim());
41
- });
42
- });
43
- }
44
-
45
- async function getOrOpenTab(tabPrefix) {
46
- if (tabPrefix) return tabPrefix;
47
- // Always open a fresh tab to avoid SPA navigation issues
48
- const list = await cdp(['list']);
49
- const anchor = list.split('\n')[0]?.slice(0, 8);
50
- if (!anchor) throw new Error('No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?');
51
- const raw = await cdp(['evalraw', anchor, 'Target.createTarget', '{"url":"about:blank"}']);
52
- const { targetId } = JSON.parse(raw);
53
- await cdp(['list']); // refresh cache
54
- return targetId.slice(0, 8);
55
- }
56
-
57
- async function typeIntoGemini(tab, text) {
58
- await cdp(['eval', tab, `
59
- (function(t) {
60
- var el = document.querySelector('${S.input}');
61
- if (!el) return false;
62
- el.focus();
63
- document.execCommand('insertText', false, t);
64
- return true;
65
- })(${JSON.stringify(text)})
66
- `]);
67
- }
68
-
69
- async function injectClipboardInterceptor(tab) {
70
- // Override both clipboard APIs — Gemini uses clipboard.write(ClipboardItem) for rich copy.
71
- await cdp(['eval', tab, `
72
- window.__geminiClipboard = null;
73
- const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
74
- navigator.clipboard.writeText = function(text) {
75
- window.__geminiClipboard = text;
76
- return _origWriteText(text);
77
- };
78
- const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
79
- navigator.clipboard.write = async function(items) {
80
- try {
81
- for (const item of items) {
82
- if (item.types && item.types.includes('text/plain')) {
83
- const blob = await item.getType('text/plain');
84
- window.__geminiClipboard = await blob.text();
85
- break;
86
- }
87
- }
88
- } catch(e) {}
89
- return _origWrite(items);
90
- };
91
- `]);
92
- }
93
-
94
- async function waitForCopyButton(tab) {
95
- // The "Copy response" button appears only after streaming is complete.
96
- const deadline = Date.now() + COPY_TIMEOUT;
97
- while (Date.now() < deadline) {
98
- await new Promise(r => setTimeout(r, COPY_POLL_INTERVAL));
99
- const found = await cdp(['eval', tab,
100
- `!!document.querySelector('${S.copyButton}')`
101
- ]).catch(() => 'false');
102
- if (found === 'true') return;
103
- }
104
- throw new Error(`Gemini copy button did not appear within ${COPY_TIMEOUT}ms`);
105
- }
106
-
107
- async function extractAnswer(tab) {
108
- // Click copy button → our interceptor captures the text.
109
- await cdp(['eval', tab, `document.querySelector('${S.copyButton}')?.click()`]);
110
- await new Promise(r => setTimeout(r, 400));
111
-
112
- const answer = await cdp(['eval', tab, `window.__geminiClipboard || ''`]);
113
- if (!answer) throw new Error('Clipboard interceptor returned empty text');
114
-
115
- // Regex parse Markdown links from clipboard — robust against DOM changes
116
- const sources = Array.from(answer.matchAll(/\[([^\]]+)\]\((https?:\/\/[^\s\)]+)\)/g))
117
- .map(m => ({ title: m[1], url: m[2] }))
118
- .filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
119
- .slice(0, 10);
120
-
121
- return { answer: answer.trim(), sources };
122
- }
123
-
124
- // ---------------------------------------------------------------------------
125
-
126
- async function main() {
127
- const args = process.argv.slice(2);
128
- if (!args.length || args[0] === '--help') {
129
- process.stderr.write('Usage: node extractors/gemini.mjs "<query>" [--tab <prefix>]\n');
130
- process.exit(1);
131
- }
132
-
133
- const short = args.includes('--short');
134
- const rest = args.filter(a => a !== '--short');
135
- const tabFlagIdx = rest.indexOf('--tab');
136
- const tabPrefix = tabFlagIdx !== -1 ? rest[tabFlagIdx + 1] : null;
137
- const query = tabFlagIdx !== -1
138
- ? rest.filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1).join(' ')
139
- : rest.join(' ');
140
-
141
- try {
142
- await cdp(['list']);
143
- const tab = await getOrOpenTab(tabPrefix);
144
-
145
- // Each search = fresh conversation
146
- await cdp(['nav', tab, 'https://gemini.google.com/app'], 35000);
147
- await new Promise(r => setTimeout(r, 2000));
148
- await dismissConsent(tab, cdp);
149
- await handleVerification(tab, cdp, 60000);
150
-
151
- // Wait for input to be ready
152
- const deadline = Date.now() + 10000;
153
- while (Date.now() < deadline) {
154
- const ready = await cdp(['eval', tab, `!!document.querySelector('${S.input}')`]).catch(() => 'false');
155
- if (ready === 'true') break;
156
- await new Promise(r => setTimeout(r, 400));
157
- }
158
- await new Promise(r => setTimeout(r, 300));
159
-
160
- await injectClipboardInterceptor(tab);
161
- await typeIntoGemini(tab, query);
162
- await new Promise(r => setTimeout(r, 400));
163
-
164
- await cdp(['eval', tab, `document.querySelector('${S.sendButton}')?.click()`]);
165
-
166
- await waitForCopyButton(tab);
167
-
168
- const { answer, sources } = await extractAnswer(tab);
169
- if (!answer) throw new Error('No answer captured from Gemini clipboard');
170
- const out = short ? answer.slice(0, 300).replace(/\s+\S*$/, '') + '…' : answer;
171
-
172
- const finalUrl = await cdp(['eval', tab, 'document.location.href']).catch(() => 'https://gemini.google.com/app');
173
- process.stdout.write(JSON.stringify({ query, url: finalUrl, answer: out, sources }, null, 2) + '\n');
174
- } catch (e) {
175
- process.stderr.write(`Error: ${e.message}\n`);
176
- process.exit(1);
177
- }
178
- }
179
-
180
- main();
1
+ #!/usr/bin/env node
2
+
3
+ // extractors/gemini.mjs
4
+ // Navigate gemini.google.com/app, submit query, wait for answer, return clean answer + sources.
5
+ //
6
+ // Usage:
7
+ // node extractors/gemini.mjs "<query>" [--tab <prefix>]
8
+ //
9
+ // Output (stdout): JSON { answer, sources, query, url }
10
+ // Errors go to stderr only — stdout is always clean JSON for piping.
11
+
12
+ import {
13
+ cdp,
14
+ formatAnswer,
15
+ getOrOpenTab,
16
+ handleError,
17
+ injectClipboardInterceptor,
18
+ outputJson,
19
+ parseArgs,
20
+ parseSourcesFromMarkdown,
21
+ validateQuery,
22
+ } from "./common.mjs";
23
+ import { dismissConsent, handleVerification } from "./consent.mjs";
24
+ import { SELECTORS } from "./selectors.mjs";
25
+
26
+ const S = SELECTORS.gemini;
27
+ const GLOBAL_VAR = "__geminiClipboard";
28
+
29
+ // ============================================================================
30
+ // Gemini-specific helpers
31
+ // ============================================================================
32
+
33
+ async function typeIntoGemini(tab, text) {
34
+ await cdp([
35
+ "eval",
36
+ tab,
37
+ `
38
+ (function(t) {
39
+ var el = document.querySelector('${S.input}');
40
+ if (!el) return false;
41
+ el.focus();
42
+ document.execCommand('insertText', false, t);
43
+ return true;
44
+ })(${JSON.stringify(text)})
45
+ `,
46
+ ]);
47
+ }
48
+
49
+ async function waitForCopyButton(tab, timeout = 120000) {
50
+ const deadline = Date.now() + timeout;
51
+ while (Date.now() < deadline) {
52
+ await new Promise((r) => setTimeout(r, 600));
53
+ const found = await cdp([
54
+ "eval",
55
+ tab,
56
+ `!!document.querySelector('${S.copyButton}')`,
57
+ ]).catch(() => "false");
58
+ if (found === "true") return;
59
+ }
60
+ throw new Error(`Gemini copy button did not appear within ${timeout}ms`);
61
+ }
62
+
63
+ async function extractAnswer(tab) {
64
+ await cdp([
65
+ "eval",
66
+ tab,
67
+ `document.querySelector('${S.copyButton}')?.click()`,
68
+ ]);
69
+ await new Promise((r) => setTimeout(r, 400));
70
+
71
+ const answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
72
+ if (!answer) throw new Error("Clipboard interceptor returned empty text");
73
+
74
+ const sources = parseSourcesFromMarkdown(answer);
75
+ return { answer: answer.trim(), sources };
76
+ }
77
+
78
+ // ============================================================================
79
+ // Main
80
+ // ============================================================================
81
+
82
+ const USAGE = 'Usage: node extractors/gemini.mjs "<query>" [--tab <prefix>]\n';
83
+
84
+ async function main() {
85
+ const args = process.argv.slice(2);
86
+ validateQuery(args, USAGE);
87
+
88
+ const { query, tabPrefix, short } = parseArgs(args);
89
+
90
+ try {
91
+ await cdp(["list"]);
92
+ const tab = await getOrOpenTab(tabPrefix);
93
+
94
+ // Each search = fresh conversation
95
+ await cdp(["nav", tab, "https://gemini.google.com/app"], 35000);
96
+ await new Promise((r) => setTimeout(r, 2000));
97
+ await dismissConsent(tab, cdp);
98
+ await handleVerification(tab, cdp, 60000);
99
+
100
+ // Wait for input to be ready
101
+ const deadline = Date.now() + 10000;
102
+ while (Date.now() < deadline) {
103
+ const ready = await cdp([
104
+ "eval",
105
+ tab,
106
+ `!!document.querySelector('${S.input}')`,
107
+ ]).catch(() => "false");
108
+ if (ready === "true") break;
109
+ await new Promise((r) => setTimeout(r, 400));
110
+ }
111
+ await new Promise((r) => setTimeout(r, 300));
112
+
113
+ await injectClipboardInterceptor(tab, GLOBAL_VAR);
114
+ await typeIntoGemini(tab, query);
115
+ await new Promise((r) => setTimeout(r, 400));
116
+
117
+ await cdp([
118
+ "eval",
119
+ tab,
120
+ `document.querySelector('${S.sendButton}')?.click()`,
121
+ ]);
122
+
123
+ await waitForCopyButton(tab);
124
+
125
+ const { answer, sources } = await extractAnswer(tab);
126
+ if (!answer) throw new Error("No answer captured from Gemini clipboard");
127
+
128
+ const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
129
+ () => "https://gemini.google.com/app",
130
+ );
131
+ outputJson({
132
+ query,
133
+ url: finalUrl,
134
+ answer: formatAnswer(answer, short),
135
+ sources,
136
+ });
137
+ } catch (e) {
138
+ handleError(e);
139
+ }
140
+ }
141
+
142
+ main();