@apmantza/greedysearch-pi 1.6.2 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,237 +0,0 @@
1
- // extractors/common.mjs — shared utilities for CDP-based extractors
2
- // Extracts common patterns: cdp wrapper, tab management, clipboard interception, source parsing
3
-
4
- import { spawn } from "node:child_process";
5
- import { dirname, join } from "node:path";
6
- import { fileURLToPath } from "node:url";
7
-
8
- const __dir = dirname(fileURLToPath(import.meta.url));
9
- const CDP = join(__dir, "..", "cdp.mjs");
10
-
11
- // ============================================================================
12
- // CDP wrapper
13
- // ============================================================================
14
-
15
- /**
16
- * Execute a CDP command through the cdp.mjs CLI
17
- * @param {string[]} args - Command arguments
18
- * @param {number} [timeoutMs=30000] - Timeout in milliseconds
19
- * @returns {Promise<string>} Command output
20
- */
21
- export function cdp(args, timeoutMs = 30000) {
22
- return new Promise((resolve, reject) => {
23
- const proc = spawn("node", [CDP, ...args], {
24
- stdio: ["ignore", "pipe", "pipe"],
25
- });
26
- let out = "";
27
- let err = "";
28
- proc.stdout.on("data", (d) => (out += d));
29
- proc.stderr.on("data", (d) => (err += d));
30
- const timer = setTimeout(() => {
31
- proc.kill();
32
- reject(new Error(`cdp timeout: ${args[0]}`));
33
- }, timeoutMs);
34
- proc.on("close", (code) => {
35
- clearTimeout(timer);
36
- if (code !== 0) reject(new Error(err.trim() || `cdp exit ${code}`));
37
- else resolve(out.trim());
38
- });
39
- });
40
- }
41
-
42
- // ============================================================================
43
- // Tab management
44
- // ============================================================================
45
-
46
- /**
47
- * Get an existing tab by prefix or open a new one
48
- * @param {string|null} tabPrefix - Existing tab prefix, or null to create new
49
- * @returns {Promise<string>} Tab identifier
50
- */
51
- export async function getOrOpenTab(tabPrefix) {
52
- if (tabPrefix) return tabPrefix;
53
- // Always open a fresh tab to avoid SPA navigation issues
54
- const list = await cdp(["list"]);
55
- const anchor = list.split("\n")[0]?.slice(0, 8);
56
- if (!anchor)
57
- throw new Error(
58
- "No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?",
59
- );
60
- const raw = await cdp([
61
- "evalraw",
62
- anchor,
63
- "Target.createTarget",
64
- '{"url":"about:blank"}',
65
- ]);
66
- const { targetId } = JSON.parse(raw);
67
- await cdp(["list"]); // refresh cache
68
- return targetId.slice(0, 8);
69
- }
70
-
71
- // ============================================================================
72
- // Clipboard interception (for extractors that use copy-to-clipboard)
73
- // ============================================================================
74
-
75
- /**
76
- * Inject clipboard interceptor to capture text when copy buttons are clicked.
77
- * Each engine uses a unique global variable to avoid conflicts.
78
- * @param {string} tab - Tab identifier
79
- * @param {string} globalVar - Global variable name (e.g., '__pplxClipboard', '__geminiClipboard')
80
- */
81
- export async function injectClipboardInterceptor(tab, globalVar) {
82
- const code = `
83
- window.${globalVar} = null;
84
- const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
85
- navigator.clipboard.writeText = function(text) {
86
- window.${globalVar} = text;
87
- return _origWriteText(text);
88
- };
89
- const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
90
- navigator.clipboard.write = async function(items) {
91
- try {
92
- for (const item of items) {
93
- if (item.types && item.types.includes('text/plain')) {
94
- const blob = await item.getType('text/plain');
95
- window.${globalVar} = await blob.text();
96
- break;
97
- }
98
- }
99
- } catch(e) {}
100
- return _origWrite(items);
101
- };
102
- `;
103
- await cdp(["eval", tab, code]);
104
- }
105
-
106
- // ============================================================================
107
- // Source extraction from markdown
108
- // ============================================================================
109
-
110
- /**
111
- * Parse Markdown links from text to extract sources
112
- * @param {string} text - Text containing Markdown links like [title](url)
113
- * @returns {Array<{title: string, url: string}>} Extracted sources
114
- */
115
- export function parseSourcesFromMarkdown(text) {
116
- return Array.from(text.matchAll(/\[([^\]]+)\]\((https?:\/\/[^\s)]+)\)/g))
117
- .map((m) => ({ title: m[1], url: m[2] }))
118
- .filter((v, i, arr) => arr.findIndex((x) => x.url === v.url) === i)
119
- .slice(0, 10);
120
- }
121
-
122
- // ============================================================================
123
- // Stream completion detection
124
- // ============================================================================
125
-
126
- /**
127
- * Wait for generation/streaming to complete by monitoring text length stability
128
- * @param {string} tab - Tab identifier
129
- * @param {object} options - Options
130
- * @param {number} [options.timeout=30000] - Maximum wait time in ms
131
- * @param {number} [options.interval=600] - Polling interval in ms
132
- * @param {number} [options.stableRounds=3] - Required stable rounds to consider complete
133
- * @param {string} [options.selector='document.body'] - Element to monitor (default: body)
134
- * @returns {Promise<number>} Final text length
135
- */
136
- export async function waitForStreamComplete(tab, options = {}) {
137
- const {
138
- timeout = 30000,
139
- interval = 600,
140
- stableRounds = 3,
141
- selector = "document.body",
142
- } = options;
143
-
144
- const deadline = Date.now() + timeout;
145
- let lastLen = -1;
146
- let stableCount = 0;
147
-
148
- while (Date.now() < deadline) {
149
- await new Promise((r) => setTimeout(r, interval));
150
- const lenStr = await cdp([
151
- "eval",
152
- tab,
153
- `${selector}?.innerText?.length ?? 0`,
154
- ]).catch(() => "0");
155
- const currentLen = parseInt(lenStr, 10) || 0;
156
-
157
- if (currentLen > 0) {
158
- if (currentLen === lastLen) {
159
- stableCount++;
160
- if (stableCount >= stableRounds) return currentLen;
161
- } else {
162
- lastLen = currentLen;
163
- stableCount = 0;
164
- }
165
- }
166
- }
167
-
168
- throw new Error(`Generation did not stabilise within ${timeout}ms`);
169
- }
170
-
171
- // ============================================================================
172
- // CLI argument parsing
173
- // ============================================================================
174
-
175
- /**
176
- * Parse standard extractor CLI arguments
177
- * @param {string[]} args - process.argv.slice(2)
178
- * @returns {{query: string, tabPrefix: string|null, short: boolean}}
179
- */
180
- export function parseArgs(args) {
181
- const short = args.includes("--short");
182
- const rest = args.filter((a) => a !== "--short");
183
- const tabFlagIdx = rest.indexOf("--tab");
184
- const tabPrefix = tabFlagIdx !== -1 ? rest[tabFlagIdx + 1] : null;
185
- const query =
186
- tabFlagIdx !== -1
187
- ? rest
188
- .filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1)
189
- .join(" ")
190
- : rest.join(" ");
191
- return { query, tabPrefix, short };
192
- }
193
-
194
- /**
195
- * Validate that a query was provided, show usage and exit if not
196
- * @param {string[]} args - process.argv.slice(2)
197
- * @param {string} usage - Usage string for error message
198
- */
199
- export function validateQuery(args, usage) {
200
- if (!args.length || args[0] === "--help") {
201
- process.stderr.write(usage);
202
- process.exit(1);
203
- }
204
- }
205
-
206
- // ============================================================================
207
- // Output formatting
208
- // ============================================================================
209
-
210
- /**
211
- * Truncate answer if short mode is enabled
212
- * @param {string} answer - Full answer text
213
- * @param {boolean} short - Whether to truncate
214
- * @param {number} [maxLen=300] - Maximum length in short mode
215
- * @returns {string} Formatted answer
216
- */
217
- export function formatAnswer(answer, short, maxLen = 300) {
218
- if (!short || answer.length <= maxLen) return answer;
219
- return `${answer.slice(0, maxLen).replace(/\s+\S*$/, "")}…`;
220
- }
221
-
222
- /**
223
- * Output JSON result to stdout
224
- * @param {object} data - Data to output
225
- */
226
- export function outputJson(data) {
227
- process.stdout.write(`${JSON.stringify(data, null, 2)}\n`);
228
- }
229
-
230
- /**
231
- * Handle and output error, then exit
232
- * @param {Error} error - Error to handle
233
- */
234
- export function handleError(error) {
235
- process.stderr.write(`Error: ${error.message}\n`);
236
- process.exit(1);
237
- }
@@ -1,273 +0,0 @@
1
- // consent.mjs — auto-dismiss common cookie/consent banners and human-verification pages
2
- // Call dismissConsent(tab, cdpFn) after navigating to any page.
3
-
4
- const CONSENT_JS = `
5
- (function() {
6
- // Google consent page (consent.google.com)
7
- var g = document.querySelector('#L2AGLb, button[jsname="b3VHJd"], .tHlp8d');
8
- if (g) { g.click(); return 'google'; }
9
-
10
- // OneTrust (used by many sites including Stack Overflow)
11
- var ot = document.querySelector('#onetrust-accept-btn-handler, .onetrust-accept-btn-handler');
12
- if (ot) { ot.click(); return 'onetrust'; }
13
-
14
- // Generic "accept all" / "agree" buttons
15
- var btns = Array.from(document.querySelectorAll('button, a[role=button]'));
16
- var accept = btns.find(b => /^(accept all|accept cookies|agree|i agree|got it|allow all|allow cookies)$/i.test(b.innerText?.trim()));
17
- if (accept) { accept.click(); return 'generic:' + accept.innerText.trim(); }
18
-
19
- return null;
20
- })()
21
- `;
22
-
23
- // Detect and auto-click human verification challenges (Google, Microsoft, Cloudflare)
24
- const VERIFY_DETECT_JS = `
25
- (function() {
26
- var url = document.location.href;
27
-
28
- // --- Google "sorry" page (hard CAPTCHA, can't auto-solve) ---
29
- if (url.includes('/sorry/') || url.includes('sorry.google')) return 'sorry-page';
30
-
31
- // --- Microsoft account verification page ---
32
- if (url.includes('login.microsoftonline.com') || url.includes('login.live.com') || url.includes('account.microsoft.com')) {
33
- // Look for "Verify" or "Continue" buttons on Microsoft auth pages
34
- var msBtns = Array.from(document.querySelectorAll('button, input[type=submit], a'));
35
- var msVerify = msBtns.find(b => /verify|continue|next/i.test(b.innerText?.trim() || b.value || ''));
36
- if (msVerify) { msVerify.click(); return 'clicked-ms-verify:' + (msVerify.innerText?.trim() || msVerify.value); }
37
- }
38
-
39
- // --- Bing Copilot / Microsoft "Verify you're human" interstitial ---
40
- // Copilot sometimes shows a modal with "Continue" or "Verify" before allowing queries
41
- if (url.includes('copilot.microsoft.com') || url.includes('bing.com/chat')) {
42
- // Look for verification modal/dialog
43
- var modal = document.querySelector('[role="dialog"], .b_modal, .bnp_hfly, [class*="verify"], [class*="challenge"]');
44
- if (modal) {
45
- // Find any actionable button in the modal
46
- var modalBtns = Array.from(modal.querySelectorAll('button, a[role="button"], input[type="submit"]'));
47
- var actionBtn = modalBtns.find(b => /^(continue|verify|submit|next|i agree|accept|got it)$/i.test(b.innerText?.trim() || b.value || ''));
48
- if (actionBtn) { actionBtn.click(); return 'clicked-copilot-modal:' + actionBtn.innerText.trim(); }
49
- }
50
-
51
- // Check for Turnstile iframe (Copilot uses Cloudflare Turnstile)
52
- var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"], iframe[title*="challenge"], iframe[title*="Widget"]');
53
- if (turnstileIframe) {
54
- // Try clicking the iframe container or nearby checkbox
55
- var container = turnstileIframe.closest('[class*="turnstile"], [class*="challenge"], [id*="turnstile"]') || turnstileIframe.parentElement;
56
- if (container) {
57
- var checkbox = container.querySelector('input[type="checkbox"]');
58
- if (checkbox && !checkbox.checked) {
59
- checkbox.click();
60
- return 'clicked-turnstile-in-iframe';
61
- }
62
- // Click the container itself (Turnstile often captures clicks on parent)
63
- container.click();
64
- return 'clicked-turnstile-container-near-iframe';
65
- }
66
- }
67
- }
68
-
69
- // --- Cloudflare Turnstile (used by Copilot and many sites) ---
70
- // Turnstile widget in iframe
71
- var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"]');
72
- if (turnstileIframe) {
73
- // Try to find and click the checkbox inside the iframe's container
74
- var turnstileCheckbox = document.querySelector('#cf-turnstile-response, [data-turnstile-callback] input, .cf-turnstile input[type="checkbox"]');
75
- if (turnstileCheckbox && !turnstileCheckbox.checked) {
76
- turnstileCheckbox.click();
77
- return 'clicked-turnstile-checkbox';
78
- }
79
- // Try clicking the turnstile container itself (some implementations)
80
- var turnstileContainer = document.querySelector('.cf-turnstile, [data-sitekey]');
81
- if (turnstileContainer) {
82
- turnstileContainer.click();
83
- return 'clicked-turnstile-container';
84
- }
85
- }
86
-
87
- // --- Cloudflare "Verify you are human" challenge page ---
88
- if (url.includes('challenges.cloudflare.com') || document.querySelector('#challenge-running, #challenge-stage')) {
89
- var cfCheckbox = document.querySelector('#cf-stage input[type="checkbox"], .ctp-checkbox-container input');
90
- if (cfCheckbox) { cfCheckbox.click(); return 'clicked-cloudflare-checkbox'; }
91
- var cfBtn = document.querySelector('#challenge-form button, .cf-challenge button');
92
- if (cfBtn) { cfBtn.click(); return 'clicked-cloudflare-button'; }
93
- }
94
-
95
- // --- Microsoft "I am human" / "Verify" challenge ---
96
- // Microsoft uses various verification UIs
97
- var msHumanBtn = document.querySelector('button[id*="i0"], button[id*="id__"]');
98
- if (msHumanBtn && /verify|human|robot|continue/i.test(msHumanBtn.innerText?.trim())) {
99
- msHumanBtn.click();
100
- return 'clicked-ms-human:' + msHumanBtn.innerText.trim();
101
- }
102
-
103
- // --- Generic verification buttons (catch-all) ---
104
- var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
105
- var verify = btns.find(b => {
106
- var t = (b.innerText?.trim() || b.value || '').toLowerCase();
107
- return (t.includes('verify') || t.includes('human') || t.includes('robot') || t.includes('continue') || t.includes('proceed')) &&
108
- !t.includes('verified') && !document.querySelector('iframe[src*="recaptcha"]');
109
- });
110
- if (verify) {
111
- verify.click();
112
- return 'clicked-verify:' + (verify.innerText?.trim() || verify.value);
113
- }
114
-
115
- // --- Google reCAPTCHA (no image challenge, just checkbox) ---
116
- var recaptchaCheckbox = document.querySelector('.recaptcha-checkbox-unchecked, input[type=checkbox][id*="recaptcha"]');
117
- if (recaptchaCheckbox) { recaptchaCheckbox.click(); return 'clicked-recaptcha'; }
118
-
119
- return null;
120
- })()
121
- `;
122
-
123
- // Retry loop for verification — keeps checking and clicking until page changes or timeout
124
- const VERIFY_RETRY_JS = `
125
- (function() {
126
- var url = document.location.href;
127
-
128
- // Check if we're still on a verification page
129
- var isVerifyPage = url.includes('/sorry/') ||
130
- url.includes('challenges.cloudflare.com') ||
131
- url.includes('login.microsoftonline.com') ||
132
- document.querySelector('#challenge-running, #challenge-stage, .cf-turnstile, [role="dialog"]');
133
-
134
- if (!isVerifyPage) return 'cleared';
135
-
136
- // Try clicking any verify/continue button again
137
- var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
138
- var btn = btns.find(b => {
139
- var t = (b.innerText?.trim() || b.value || '').toLowerCase();
140
- return t.includes('verify') || t.includes('human') || t.includes('robot') || t.includes('continue') || t.includes('next') || t.includes('submit');
141
- });
142
- if (btn) { btn.click(); return 'clicked:' + (btn.innerText?.trim() || btn.value); }
143
-
144
- // Try Turnstile checkbox
145
- var cf = document.querySelector('#cf-stage input[type="checkbox"], .cf-turnstile input');
146
- if (cf && !cf.checked) { cf.click(); return 'clicked-turnstile'; }
147
-
148
- // Check for modal dialog with continue button (Copilot interstitial)
149
- var modal = document.querySelector('[role="dialog"], .b_modal, [class*="verify"]');
150
- if (modal) {
151
- var modalBtn = modal.querySelector('button, a[role="button"]');
152
- if (modalBtn) { modalBtn.click(); return 'clicked-modal-btn:' + modalBtn.innerText.trim(); }
153
- }
154
-
155
- return 'still-verifying';
156
- })()
157
- `;
158
-
159
- export async function dismissConsent(tab, cdp) {
160
- const result = await cdp(["eval", tab, CONSENT_JS]).catch(() => null);
161
- if (result && result !== "null") {
162
- await new Promise((r) => setTimeout(r, 1500));
163
- }
164
- }
165
-
166
- // Get iframe bounding box for coordinate-based clicking (for cross-origin Turnstile)
167
- const GET_IFRAME_CENTER_JS = `
168
- (function() {
169
- var iframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"], iframe[title*="challenge"], iframe[title*="Widget"]');
170
- if (!iframe) return null;
171
- var rect = iframe.getBoundingClientRect();
172
- // Click near the center-left where the checkbox usually is
173
- return JSON.stringify({ x: rect.left + 30, y: rect.top + rect.height / 2 });
174
- })()
175
- `;
176
-
177
- // Returns 'clear' | 'clicked' | 'needs-human'
178
- export async function handleVerification(tab, cdp, waitMs = 60000) {
179
- const result = await cdp(["eval", tab, VERIFY_DETECT_JS]).catch(() => null);
180
-
181
- if (!result || result === "null") return "clear";
182
-
183
- // Hard CAPTCHA page — wait for user to solve it manually
184
- if (result === "sorry-page") {
185
- process.stderr.write(
186
- `[greedysearch] Google CAPTCHA detected — please solve it in the browser window (waiting up to ${Math.floor(waitMs / 1000)}s)...\n`,
187
- );
188
- const deadline = Date.now() + waitMs;
189
- while (Date.now() < deadline) {
190
- await new Promise((r) => setTimeout(r, 2000));
191
- const url = await cdp(["eval", tab, "document.location.href"]).catch(
192
- () => "",
193
- );
194
- if (!url.includes("/sorry/")) return "cleared-by-user";
195
- }
196
- return "needs-human";
197
- }
198
-
199
- // We clicked something — wait for page to update, then keep retrying
200
- if (result.startsWith("clicked-")) {
201
- process.stderr.write(`[greedysearch] Clicked verification: ${result}\n`);
202
- await new Promise((r) => setTimeout(r, 2000));
203
-
204
- // Keep checking if verification cleared, retry clicking for up to waitMs
205
- const deadline = Date.now() + waitMs;
206
- while (Date.now() < deadline) {
207
- const retryResult = await cdp(["eval", tab, VERIFY_RETRY_JS]).catch(
208
- () => null,
209
- );
210
-
211
- if (retryResult === "cleared" || !retryResult || retryResult === "null") {
212
- process.stderr.write(`[greedysearch] Verification cleared.\n`);
213
- await new Promise((r) => setTimeout(r, 1000));
214
- return "clicked";
215
- }
216
-
217
- if (retryResult.startsWith("clicked:")) {
218
- process.stderr.write(`[greedysearch] Retrying verification click...\n`);
219
- await new Promise((r) => setTimeout(r, 2000));
220
- }
221
-
222
- // If verification is stuck, try clicking the Turnstile iframe by coordinates
223
- const iframeCenter = await cdp(["eval", tab, GET_IFRAME_CENTER_JS]).catch(
224
- () => null,
225
- );
226
- if (iframeCenter && iframeCenter !== "null") {
227
- try {
228
- const { x, y } = JSON.parse(iframeCenter);
229
- process.stderr.write(
230
- `[greedysearch] Trying coordinate click on Turnstile iframe at (${x}, ${y})...\n`,
231
- );
232
- await cdp(["clickxy", tab, String(x), String(y)]);
233
- await new Promise((r) => setTimeout(r, 3000));
234
- } catch {}
235
- }
236
-
237
- await new Promise((r) => setTimeout(r, 1500));
238
- }
239
-
240
- // Still stuck — might need user intervention
241
- process.stderr.write(
242
- `[greedysearch] Verification may require manual intervention.\n`,
243
- );
244
- return "needs-human";
245
- }
246
-
247
- // Detection didn't find anything initially, but check for Turnstile iframe with coordinates
248
- if (result === "null" || !result) {
249
- const iframeCenter = await cdp(["eval", tab, GET_IFRAME_CENTER_JS]).catch(
250
- () => null,
251
- );
252
- if (iframeCenter && iframeCenter !== "null") {
253
- process.stderr.write(
254
- `[greedysearch] Found Turnstile iframe, attempting coordinate click...\n`,
255
- );
256
- try {
257
- const { x, y } = JSON.parse(iframeCenter);
258
- await cdp(["clickxy", tab, String(x), String(y)]);
259
- await new Promise((r) => setTimeout(r, 3000));
260
-
261
- // Check if it worked
262
- const cleared = await cdp(["eval", tab, VERIFY_RETRY_JS]).catch(
263
- () => null,
264
- );
265
- if (cleared === "cleared" || cleared === "null") {
266
- return "clicked";
267
- }
268
- } catch {}
269
- }
270
- }
271
-
272
- return "clear";
273
- }
@@ -1,163 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- // extractors/gemini.mjs
4
- // Navigate gemini.google.com/app, submit query, wait for answer, return clean answer + sources.
5
- //
6
- // Usage:
7
- // node extractors/gemini.mjs "<query>" [--tab <prefix>]
8
- //
9
- // Output (stdout): JSON { answer, sources, query, url }
10
- // Errors go to stderr only — stdout is always clean JSON for piping.
11
-
12
- import {
13
- cdp,
14
- formatAnswer,
15
- getOrOpenTab,
16
- handleError,
17
- injectClipboardInterceptor,
18
- outputJson,
19
- parseArgs,
20
- parseSourcesFromMarkdown,
21
- validateQuery,
22
- } from "./common.mjs";
23
- import { dismissConsent, handleVerification } from "./consent.mjs";
24
- import { SELECTORS } from "./selectors.mjs";
25
-
26
- const S = SELECTORS.gemini;
27
- const GLOBAL_VAR = "__geminiClipboard";
28
-
29
- // ============================================================================
30
- // Gemini-specific helpers
31
- // ============================================================================
32
-
33
- async function typeIntoGemini(tab, text) {
34
- await cdp([
35
- "eval",
36
- tab,
37
- `
38
- (function(t) {
39
- var el = document.querySelector('${S.input}');
40
- if (!el) return false;
41
- el.focus();
42
- document.execCommand('insertText', false, t);
43
- return true;
44
- })(${JSON.stringify(text)})
45
- `,
46
- ]);
47
- }
48
-
49
- async function waitForCopyButton(tab, timeout = 120000) {
50
- const deadline = Date.now() + timeout;
51
- let scrollCount = 0;
52
- while (Date.now() < deadline) {
53
- await new Promise((r) => setTimeout(r, 600));
54
-
55
- // Gentle scroll every ~6 seconds to keep page "active" (anti-bot evasion)
56
- if (++scrollCount % 10 === 0) {
57
- await cdp([
58
- "eval",
59
- tab,
60
- `
61
- (function() {
62
- const chat = document.querySelector('chat-window, [role="main"], main') || document.body;
63
- const currentScroll = chat.scrollTop || window.scrollY || 0;
64
- const scrollHeight = chat.scrollHeight || document.body.scrollHeight || 0;
65
- // Small random scroll movement to mimic human reading
66
- const jitter = Math.floor(Math.random() * 50) - 25;
67
- const targetScroll = Math.min(scrollHeight, Math.max(0, currentScroll + jitter));
68
- chat.scrollTo ? chat.scrollTo({ top: targetScroll, behavior: 'smooth' }) : window.scrollTo(0, targetScroll);
69
- })()
70
- `,
71
- ]).catch(() => null);
72
- }
73
-
74
- const found = await cdp([
75
- "eval",
76
- tab,
77
- `!!document.querySelector('${S.copyButton}')`,
78
- ]).catch(() => "false");
79
- if (found === "true") return;
80
- }
81
- throw new Error(`Gemini copy button did not appear within ${timeout}ms`);
82
- }
83
-
84
- async function extractAnswer(tab) {
85
- await cdp([
86
- "eval",
87
- tab,
88
- `document.querySelector('${S.copyButton}')?.click()`,
89
- ]);
90
- await new Promise((r) => setTimeout(r, 400));
91
-
92
- const answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
93
- if (!answer) throw new Error("Clipboard interceptor returned empty text");
94
-
95
- const sources = parseSourcesFromMarkdown(answer);
96
- return { answer: answer.trim(), sources };
97
- }
98
-
99
- // ============================================================================
100
- // Main
101
- // ============================================================================
102
-
103
- const USAGE = 'Usage: node extractors/gemini.mjs "<query>" [--tab <prefix>]\n';
104
-
105
- async function main() {
106
- const args = process.argv.slice(2);
107
- validateQuery(args, USAGE);
108
-
109
- const { query, tabPrefix, short } = parseArgs(args);
110
-
111
- try {
112
- await cdp(["list"]);
113
- const tab = await getOrOpenTab(tabPrefix);
114
-
115
- // Each search = fresh conversation
116
- await cdp(["nav", tab, "https://gemini.google.com/app"], 35000);
117
- await new Promise((r) => setTimeout(r, 2000));
118
- await dismissConsent(tab, cdp);
119
- await handleVerification(tab, cdp, 60000);
120
-
121
- // Wait for input to be ready
122
- const deadline = Date.now() + 10000;
123
- while (Date.now() < deadline) {
124
- const ready = await cdp([
125
- "eval",
126
- tab,
127
- `!!document.querySelector('${S.input}')`,
128
- ]).catch(() => "false");
129
- if (ready === "true") break;
130
- await new Promise((r) => setTimeout(r, 400));
131
- }
132
- await new Promise((r) => setTimeout(r, 300));
133
-
134
- await injectClipboardInterceptor(tab, GLOBAL_VAR);
135
- await typeIntoGemini(tab, query);
136
- await new Promise((r) => setTimeout(r, 400));
137
-
138
- await cdp([
139
- "eval",
140
- tab,
141
- `document.querySelector('${S.sendButton}')?.click()`,
142
- ]);
143
-
144
- await waitForCopyButton(tab);
145
-
146
- const { answer, sources } = await extractAnswer(tab);
147
- if (!answer) throw new Error("No answer captured from Gemini clipboard");
148
-
149
- const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
150
- () => "https://gemini.google.com/app",
151
- );
152
- outputJson({
153
- query,
154
- url: finalUrl,
155
- answer: formatAnswer(answer, short),
156
- sources,
157
- });
158
- } catch (e) {
159
- handleError(e);
160
- }
161
- }
162
-
163
- main();