@apmantza/greedysearch-pi 1.6.2 → 1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -1
- package/cdp.mjs +0 -1004
- package/coding-task.mjs +0 -392
- package/extractors/bing-copilot.mjs +0 -167
- package/extractors/common.mjs +0 -237
- package/extractors/consent.mjs +0 -273
- package/extractors/gemini.mjs +0 -163
- package/extractors/google-ai.mjs +0 -156
- package/extractors/perplexity.mjs +0 -128
- package/extractors/selectors.mjs +0 -52
- package/launch.mjs +0 -288
- package/search.mjs +0 -1242
package/extractors/common.mjs
DELETED
|
@@ -1,237 +0,0 @@
|
|
|
1
|
-
// extractors/common.mjs — shared utilities for CDP-based extractors
|
|
2
|
-
// Extracts common patterns: cdp wrapper, tab management, clipboard interception, source parsing
|
|
3
|
-
|
|
4
|
-
import { spawn } from "node:child_process";
|
|
5
|
-
import { dirname, join } from "node:path";
|
|
6
|
-
import { fileURLToPath } from "node:url";
|
|
7
|
-
|
|
8
|
-
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
9
|
-
const CDP = join(__dir, "..", "cdp.mjs");
|
|
10
|
-
|
|
11
|
-
// ============================================================================
|
|
12
|
-
// CDP wrapper
|
|
13
|
-
// ============================================================================
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Execute a CDP command through the cdp.mjs CLI
|
|
17
|
-
* @param {string[]} args - Command arguments
|
|
18
|
-
* @param {number} [timeoutMs=30000] - Timeout in milliseconds
|
|
19
|
-
* @returns {Promise<string>} Command output
|
|
20
|
-
*/
|
|
21
|
-
export function cdp(args, timeoutMs = 30000) {
|
|
22
|
-
return new Promise((resolve, reject) => {
|
|
23
|
-
const proc = spawn("node", [CDP, ...args], {
|
|
24
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
25
|
-
});
|
|
26
|
-
let out = "";
|
|
27
|
-
let err = "";
|
|
28
|
-
proc.stdout.on("data", (d) => (out += d));
|
|
29
|
-
proc.stderr.on("data", (d) => (err += d));
|
|
30
|
-
const timer = setTimeout(() => {
|
|
31
|
-
proc.kill();
|
|
32
|
-
reject(new Error(`cdp timeout: ${args[0]}`));
|
|
33
|
-
}, timeoutMs);
|
|
34
|
-
proc.on("close", (code) => {
|
|
35
|
-
clearTimeout(timer);
|
|
36
|
-
if (code !== 0) reject(new Error(err.trim() || `cdp exit ${code}`));
|
|
37
|
-
else resolve(out.trim());
|
|
38
|
-
});
|
|
39
|
-
});
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
// ============================================================================
|
|
43
|
-
// Tab management
|
|
44
|
-
// ============================================================================
|
|
45
|
-
|
|
46
|
-
/**
|
|
47
|
-
* Get an existing tab by prefix or open a new one
|
|
48
|
-
* @param {string|null} tabPrefix - Existing tab prefix, or null to create new
|
|
49
|
-
* @returns {Promise<string>} Tab identifier
|
|
50
|
-
*/
|
|
51
|
-
export async function getOrOpenTab(tabPrefix) {
|
|
52
|
-
if (tabPrefix) return tabPrefix;
|
|
53
|
-
// Always open a fresh tab to avoid SPA navigation issues
|
|
54
|
-
const list = await cdp(["list"]);
|
|
55
|
-
const anchor = list.split("\n")[0]?.slice(0, 8);
|
|
56
|
-
if (!anchor)
|
|
57
|
-
throw new Error(
|
|
58
|
-
"No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?",
|
|
59
|
-
);
|
|
60
|
-
const raw = await cdp([
|
|
61
|
-
"evalraw",
|
|
62
|
-
anchor,
|
|
63
|
-
"Target.createTarget",
|
|
64
|
-
'{"url":"about:blank"}',
|
|
65
|
-
]);
|
|
66
|
-
const { targetId } = JSON.parse(raw);
|
|
67
|
-
await cdp(["list"]); // refresh cache
|
|
68
|
-
return targetId.slice(0, 8);
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
// ============================================================================
|
|
72
|
-
// Clipboard interception (for extractors that use copy-to-clipboard)
|
|
73
|
-
// ============================================================================
|
|
74
|
-
|
|
75
|
-
/**
|
|
76
|
-
* Inject clipboard interceptor to capture text when copy buttons are clicked.
|
|
77
|
-
* Each engine uses a unique global variable to avoid conflicts.
|
|
78
|
-
* @param {string} tab - Tab identifier
|
|
79
|
-
* @param {string} globalVar - Global variable name (e.g., '__pplxClipboard', '__geminiClipboard')
|
|
80
|
-
*/
|
|
81
|
-
export async function injectClipboardInterceptor(tab, globalVar) {
|
|
82
|
-
const code = `
|
|
83
|
-
window.${globalVar} = null;
|
|
84
|
-
const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
|
|
85
|
-
navigator.clipboard.writeText = function(text) {
|
|
86
|
-
window.${globalVar} = text;
|
|
87
|
-
return _origWriteText(text);
|
|
88
|
-
};
|
|
89
|
-
const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
|
|
90
|
-
navigator.clipboard.write = async function(items) {
|
|
91
|
-
try {
|
|
92
|
-
for (const item of items) {
|
|
93
|
-
if (item.types && item.types.includes('text/plain')) {
|
|
94
|
-
const blob = await item.getType('text/plain');
|
|
95
|
-
window.${globalVar} = await blob.text();
|
|
96
|
-
break;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
} catch(e) {}
|
|
100
|
-
return _origWrite(items);
|
|
101
|
-
};
|
|
102
|
-
`;
|
|
103
|
-
await cdp(["eval", tab, code]);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
// ============================================================================
|
|
107
|
-
// Source extraction from markdown
|
|
108
|
-
// ============================================================================
|
|
109
|
-
|
|
110
|
-
/**
|
|
111
|
-
* Parse Markdown links from text to extract sources
|
|
112
|
-
* @param {string} text - Text containing Markdown links like [title](url)
|
|
113
|
-
* @returns {Array<{title: string, url: string}>} Extracted sources
|
|
114
|
-
*/
|
|
115
|
-
export function parseSourcesFromMarkdown(text) {
|
|
116
|
-
return Array.from(text.matchAll(/\[([^\]]+)\]\((https?:\/\/[^\s)]+)\)/g))
|
|
117
|
-
.map((m) => ({ title: m[1], url: m[2] }))
|
|
118
|
-
.filter((v, i, arr) => arr.findIndex((x) => x.url === v.url) === i)
|
|
119
|
-
.slice(0, 10);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
// ============================================================================
|
|
123
|
-
// Stream completion detection
|
|
124
|
-
// ============================================================================
|
|
125
|
-
|
|
126
|
-
/**
|
|
127
|
-
* Wait for generation/streaming to complete by monitoring text length stability
|
|
128
|
-
* @param {string} tab - Tab identifier
|
|
129
|
-
* @param {object} options - Options
|
|
130
|
-
* @param {number} [options.timeout=30000] - Maximum wait time in ms
|
|
131
|
-
* @param {number} [options.interval=600] - Polling interval in ms
|
|
132
|
-
* @param {number} [options.stableRounds=3] - Required stable rounds to consider complete
|
|
133
|
-
* @param {string} [options.selector='document.body'] - Element to monitor (default: body)
|
|
134
|
-
* @returns {Promise<number>} Final text length
|
|
135
|
-
*/
|
|
136
|
-
export async function waitForStreamComplete(tab, options = {}) {
|
|
137
|
-
const {
|
|
138
|
-
timeout = 30000,
|
|
139
|
-
interval = 600,
|
|
140
|
-
stableRounds = 3,
|
|
141
|
-
selector = "document.body",
|
|
142
|
-
} = options;
|
|
143
|
-
|
|
144
|
-
const deadline = Date.now() + timeout;
|
|
145
|
-
let lastLen = -1;
|
|
146
|
-
let stableCount = 0;
|
|
147
|
-
|
|
148
|
-
while (Date.now() < deadline) {
|
|
149
|
-
await new Promise((r) => setTimeout(r, interval));
|
|
150
|
-
const lenStr = await cdp([
|
|
151
|
-
"eval",
|
|
152
|
-
tab,
|
|
153
|
-
`${selector}?.innerText?.length ?? 0`,
|
|
154
|
-
]).catch(() => "0");
|
|
155
|
-
const currentLen = parseInt(lenStr, 10) || 0;
|
|
156
|
-
|
|
157
|
-
if (currentLen > 0) {
|
|
158
|
-
if (currentLen === lastLen) {
|
|
159
|
-
stableCount++;
|
|
160
|
-
if (stableCount >= stableRounds) return currentLen;
|
|
161
|
-
} else {
|
|
162
|
-
lastLen = currentLen;
|
|
163
|
-
stableCount = 0;
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
throw new Error(`Generation did not stabilise within ${timeout}ms`);
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
// ============================================================================
|
|
172
|
-
// CLI argument parsing
|
|
173
|
-
// ============================================================================
|
|
174
|
-
|
|
175
|
-
/**
|
|
176
|
-
* Parse standard extractor CLI arguments
|
|
177
|
-
* @param {string[]} args - process.argv.slice(2)
|
|
178
|
-
* @returns {{query: string, tabPrefix: string|null, short: boolean}}
|
|
179
|
-
*/
|
|
180
|
-
export function parseArgs(args) {
|
|
181
|
-
const short = args.includes("--short");
|
|
182
|
-
const rest = args.filter((a) => a !== "--short");
|
|
183
|
-
const tabFlagIdx = rest.indexOf("--tab");
|
|
184
|
-
const tabPrefix = tabFlagIdx !== -1 ? rest[tabFlagIdx + 1] : null;
|
|
185
|
-
const query =
|
|
186
|
-
tabFlagIdx !== -1
|
|
187
|
-
? rest
|
|
188
|
-
.filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1)
|
|
189
|
-
.join(" ")
|
|
190
|
-
: rest.join(" ");
|
|
191
|
-
return { query, tabPrefix, short };
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
/**
|
|
195
|
-
* Validate that a query was provided, show usage and exit if not
|
|
196
|
-
* @param {string[]} args - process.argv.slice(2)
|
|
197
|
-
* @param {string} usage - Usage string for error message
|
|
198
|
-
*/
|
|
199
|
-
export function validateQuery(args, usage) {
|
|
200
|
-
if (!args.length || args[0] === "--help") {
|
|
201
|
-
process.stderr.write(usage);
|
|
202
|
-
process.exit(1);
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
// ============================================================================
|
|
207
|
-
// Output formatting
|
|
208
|
-
// ============================================================================
|
|
209
|
-
|
|
210
|
-
/**
|
|
211
|
-
* Truncate answer if short mode is enabled
|
|
212
|
-
* @param {string} answer - Full answer text
|
|
213
|
-
* @param {boolean} short - Whether to truncate
|
|
214
|
-
* @param {number} [maxLen=300] - Maximum length in short mode
|
|
215
|
-
* @returns {string} Formatted answer
|
|
216
|
-
*/
|
|
217
|
-
export function formatAnswer(answer, short, maxLen = 300) {
|
|
218
|
-
if (!short || answer.length <= maxLen) return answer;
|
|
219
|
-
return `${answer.slice(0, maxLen).replace(/\s+\S*$/, "")}…`;
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
/**
|
|
223
|
-
* Output JSON result to stdout
|
|
224
|
-
* @param {object} data - Data to output
|
|
225
|
-
*/
|
|
226
|
-
export function outputJson(data) {
|
|
227
|
-
process.stdout.write(`${JSON.stringify(data, null, 2)}\n`);
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
/**
|
|
231
|
-
* Handle and output error, then exit
|
|
232
|
-
* @param {Error} error - Error to handle
|
|
233
|
-
*/
|
|
234
|
-
export function handleError(error) {
|
|
235
|
-
process.stderr.write(`Error: ${error.message}\n`);
|
|
236
|
-
process.exit(1);
|
|
237
|
-
}
|
package/extractors/consent.mjs
DELETED
|
@@ -1,273 +0,0 @@
|
|
|
1
|
-
// consent.mjs — auto-dismiss common cookie/consent banners and human-verification pages
|
|
2
|
-
// Call dismissConsent(tab, cdpFn) after navigating to any page.
|
|
3
|
-
|
|
4
|
-
const CONSENT_JS = `
|
|
5
|
-
(function() {
|
|
6
|
-
// Google consent page (consent.google.com)
|
|
7
|
-
var g = document.querySelector('#L2AGLb, button[jsname="b3VHJd"], .tHlp8d');
|
|
8
|
-
if (g) { g.click(); return 'google'; }
|
|
9
|
-
|
|
10
|
-
// OneTrust (used by many sites including Stack Overflow)
|
|
11
|
-
var ot = document.querySelector('#onetrust-accept-btn-handler, .onetrust-accept-btn-handler');
|
|
12
|
-
if (ot) { ot.click(); return 'onetrust'; }
|
|
13
|
-
|
|
14
|
-
// Generic "accept all" / "agree" buttons
|
|
15
|
-
var btns = Array.from(document.querySelectorAll('button, a[role=button]'));
|
|
16
|
-
var accept = btns.find(b => /^(accept all|accept cookies|agree|i agree|got it|allow all|allow cookies)$/i.test(b.innerText?.trim()));
|
|
17
|
-
if (accept) { accept.click(); return 'generic:' + accept.innerText.trim(); }
|
|
18
|
-
|
|
19
|
-
return null;
|
|
20
|
-
})()
|
|
21
|
-
`;
|
|
22
|
-
|
|
23
|
-
// Detect and auto-click human verification challenges (Google, Microsoft, Cloudflare)
|
|
24
|
-
const VERIFY_DETECT_JS = `
|
|
25
|
-
(function() {
|
|
26
|
-
var url = document.location.href;
|
|
27
|
-
|
|
28
|
-
// --- Google "sorry" page (hard CAPTCHA, can't auto-solve) ---
|
|
29
|
-
if (url.includes('/sorry/') || url.includes('sorry.google')) return 'sorry-page';
|
|
30
|
-
|
|
31
|
-
// --- Microsoft account verification page ---
|
|
32
|
-
if (url.includes('login.microsoftonline.com') || url.includes('login.live.com') || url.includes('account.microsoft.com')) {
|
|
33
|
-
// Look for "Verify" or "Continue" buttons on Microsoft auth pages
|
|
34
|
-
var msBtns = Array.from(document.querySelectorAll('button, input[type=submit], a'));
|
|
35
|
-
var msVerify = msBtns.find(b => /verify|continue|next/i.test(b.innerText?.trim() || b.value || ''));
|
|
36
|
-
if (msVerify) { msVerify.click(); return 'clicked-ms-verify:' + (msVerify.innerText?.trim() || msVerify.value); }
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// --- Bing Copilot / Microsoft "Verify you're human" interstitial ---
|
|
40
|
-
// Copilot sometimes shows a modal with "Continue" or "Verify" before allowing queries
|
|
41
|
-
if (url.includes('copilot.microsoft.com') || url.includes('bing.com/chat')) {
|
|
42
|
-
// Look for verification modal/dialog
|
|
43
|
-
var modal = document.querySelector('[role="dialog"], .b_modal, .bnp_hfly, [class*="verify"], [class*="challenge"]');
|
|
44
|
-
if (modal) {
|
|
45
|
-
// Find any actionable button in the modal
|
|
46
|
-
var modalBtns = Array.from(modal.querySelectorAll('button, a[role="button"], input[type="submit"]'));
|
|
47
|
-
var actionBtn = modalBtns.find(b => /^(continue|verify|submit|next|i agree|accept|got it)$/i.test(b.innerText?.trim() || b.value || ''));
|
|
48
|
-
if (actionBtn) { actionBtn.click(); return 'clicked-copilot-modal:' + actionBtn.innerText.trim(); }
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// Check for Turnstile iframe (Copilot uses Cloudflare Turnstile)
|
|
52
|
-
var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"], iframe[title*="challenge"], iframe[title*="Widget"]');
|
|
53
|
-
if (turnstileIframe) {
|
|
54
|
-
// Try clicking the iframe container or nearby checkbox
|
|
55
|
-
var container = turnstileIframe.closest('[class*="turnstile"], [class*="challenge"], [id*="turnstile"]') || turnstileIframe.parentElement;
|
|
56
|
-
if (container) {
|
|
57
|
-
var checkbox = container.querySelector('input[type="checkbox"]');
|
|
58
|
-
if (checkbox && !checkbox.checked) {
|
|
59
|
-
checkbox.click();
|
|
60
|
-
return 'clicked-turnstile-in-iframe';
|
|
61
|
-
}
|
|
62
|
-
// Click the container itself (Turnstile often captures clicks on parent)
|
|
63
|
-
container.click();
|
|
64
|
-
return 'clicked-turnstile-container-near-iframe';
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
// --- Cloudflare Turnstile (used by Copilot and many sites) ---
|
|
70
|
-
// Turnstile widget in iframe
|
|
71
|
-
var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"]');
|
|
72
|
-
if (turnstileIframe) {
|
|
73
|
-
// Try to find and click the checkbox inside the iframe's container
|
|
74
|
-
var turnstileCheckbox = document.querySelector('#cf-turnstile-response, [data-turnstile-callback] input, .cf-turnstile input[type="checkbox"]');
|
|
75
|
-
if (turnstileCheckbox && !turnstileCheckbox.checked) {
|
|
76
|
-
turnstileCheckbox.click();
|
|
77
|
-
return 'clicked-turnstile-checkbox';
|
|
78
|
-
}
|
|
79
|
-
// Try clicking the turnstile container itself (some implementations)
|
|
80
|
-
var turnstileContainer = document.querySelector('.cf-turnstile, [data-sitekey]');
|
|
81
|
-
if (turnstileContainer) {
|
|
82
|
-
turnstileContainer.click();
|
|
83
|
-
return 'clicked-turnstile-container';
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
// --- Cloudflare "Verify you are human" challenge page ---
|
|
88
|
-
if (url.includes('challenges.cloudflare.com') || document.querySelector('#challenge-running, #challenge-stage')) {
|
|
89
|
-
var cfCheckbox = document.querySelector('#cf-stage input[type="checkbox"], .ctp-checkbox-container input');
|
|
90
|
-
if (cfCheckbox) { cfCheckbox.click(); return 'clicked-cloudflare-checkbox'; }
|
|
91
|
-
var cfBtn = document.querySelector('#challenge-form button, .cf-challenge button');
|
|
92
|
-
if (cfBtn) { cfBtn.click(); return 'clicked-cloudflare-button'; }
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
// --- Microsoft "I am human" / "Verify" challenge ---
|
|
96
|
-
// Microsoft uses various verification UIs
|
|
97
|
-
var msHumanBtn = document.querySelector('button[id*="i0"], button[id*="id__"]');
|
|
98
|
-
if (msHumanBtn && /verify|human|robot|continue/i.test(msHumanBtn.innerText?.trim())) {
|
|
99
|
-
msHumanBtn.click();
|
|
100
|
-
return 'clicked-ms-human:' + msHumanBtn.innerText.trim();
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
// --- Generic verification buttons (catch-all) ---
|
|
104
|
-
var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
|
|
105
|
-
var verify = btns.find(b => {
|
|
106
|
-
var t = (b.innerText?.trim() || b.value || '').toLowerCase();
|
|
107
|
-
return (t.includes('verify') || t.includes('human') || t.includes('robot') || t.includes('continue') || t.includes('proceed')) &&
|
|
108
|
-
!t.includes('verified') && !document.querySelector('iframe[src*="recaptcha"]');
|
|
109
|
-
});
|
|
110
|
-
if (verify) {
|
|
111
|
-
verify.click();
|
|
112
|
-
return 'clicked-verify:' + (verify.innerText?.trim() || verify.value);
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
// --- Google reCAPTCHA (no image challenge, just checkbox) ---
|
|
116
|
-
var recaptchaCheckbox = document.querySelector('.recaptcha-checkbox-unchecked, input[type=checkbox][id*="recaptcha"]');
|
|
117
|
-
if (recaptchaCheckbox) { recaptchaCheckbox.click(); return 'clicked-recaptcha'; }
|
|
118
|
-
|
|
119
|
-
return null;
|
|
120
|
-
})()
|
|
121
|
-
`;
|
|
122
|
-
|
|
123
|
-
// Retry loop for verification — keeps checking and clicking until page changes or timeout
|
|
124
|
-
const VERIFY_RETRY_JS = `
|
|
125
|
-
(function() {
|
|
126
|
-
var url = document.location.href;
|
|
127
|
-
|
|
128
|
-
// Check if we're still on a verification page
|
|
129
|
-
var isVerifyPage = url.includes('/sorry/') ||
|
|
130
|
-
url.includes('challenges.cloudflare.com') ||
|
|
131
|
-
url.includes('login.microsoftonline.com') ||
|
|
132
|
-
document.querySelector('#challenge-running, #challenge-stage, .cf-turnstile, [role="dialog"]');
|
|
133
|
-
|
|
134
|
-
if (!isVerifyPage) return 'cleared';
|
|
135
|
-
|
|
136
|
-
// Try clicking any verify/continue button again
|
|
137
|
-
var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
|
|
138
|
-
var btn = btns.find(b => {
|
|
139
|
-
var t = (b.innerText?.trim() || b.value || '').toLowerCase();
|
|
140
|
-
return t.includes('verify') || t.includes('human') || t.includes('robot') || t.includes('continue') || t.includes('next') || t.includes('submit');
|
|
141
|
-
});
|
|
142
|
-
if (btn) { btn.click(); return 'clicked:' + (btn.innerText?.trim() || btn.value); }
|
|
143
|
-
|
|
144
|
-
// Try Turnstile checkbox
|
|
145
|
-
var cf = document.querySelector('#cf-stage input[type="checkbox"], .cf-turnstile input');
|
|
146
|
-
if (cf && !cf.checked) { cf.click(); return 'clicked-turnstile'; }
|
|
147
|
-
|
|
148
|
-
// Check for modal dialog with continue button (Copilot interstitial)
|
|
149
|
-
var modal = document.querySelector('[role="dialog"], .b_modal, [class*="verify"]');
|
|
150
|
-
if (modal) {
|
|
151
|
-
var modalBtn = modal.querySelector('button, a[role="button"]');
|
|
152
|
-
if (modalBtn) { modalBtn.click(); return 'clicked-modal-btn:' + modalBtn.innerText.trim(); }
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
return 'still-verifying';
|
|
156
|
-
})()
|
|
157
|
-
`;
|
|
158
|
-
|
|
159
|
-
export async function dismissConsent(tab, cdp) {
|
|
160
|
-
const result = await cdp(["eval", tab, CONSENT_JS]).catch(() => null);
|
|
161
|
-
if (result && result !== "null") {
|
|
162
|
-
await new Promise((r) => setTimeout(r, 1500));
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
// Get iframe bounding box for coordinate-based clicking (for cross-origin Turnstile)
|
|
167
|
-
const GET_IFRAME_CENTER_JS = `
|
|
168
|
-
(function() {
|
|
169
|
-
var iframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"], iframe[title*="challenge"], iframe[title*="Widget"]');
|
|
170
|
-
if (!iframe) return null;
|
|
171
|
-
var rect = iframe.getBoundingClientRect();
|
|
172
|
-
// Click near the center-left where the checkbox usually is
|
|
173
|
-
return JSON.stringify({ x: rect.left + 30, y: rect.top + rect.height / 2 });
|
|
174
|
-
})()
|
|
175
|
-
`;
|
|
176
|
-
|
|
177
|
-
// Returns 'clear' | 'clicked' | 'needs-human'
|
|
178
|
-
export async function handleVerification(tab, cdp, waitMs = 60000) {
|
|
179
|
-
const result = await cdp(["eval", tab, VERIFY_DETECT_JS]).catch(() => null);
|
|
180
|
-
|
|
181
|
-
if (!result || result === "null") return "clear";
|
|
182
|
-
|
|
183
|
-
// Hard CAPTCHA page — wait for user to solve it manually
|
|
184
|
-
if (result === "sorry-page") {
|
|
185
|
-
process.stderr.write(
|
|
186
|
-
`[greedysearch] Google CAPTCHA detected — please solve it in the browser window (waiting up to ${Math.floor(waitMs / 1000)}s)...\n`,
|
|
187
|
-
);
|
|
188
|
-
const deadline = Date.now() + waitMs;
|
|
189
|
-
while (Date.now() < deadline) {
|
|
190
|
-
await new Promise((r) => setTimeout(r, 2000));
|
|
191
|
-
const url = await cdp(["eval", tab, "document.location.href"]).catch(
|
|
192
|
-
() => "",
|
|
193
|
-
);
|
|
194
|
-
if (!url.includes("/sorry/")) return "cleared-by-user";
|
|
195
|
-
}
|
|
196
|
-
return "needs-human";
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
// We clicked something — wait for page to update, then keep retrying
|
|
200
|
-
if (result.startsWith("clicked-")) {
|
|
201
|
-
process.stderr.write(`[greedysearch] Clicked verification: ${result}\n`);
|
|
202
|
-
await new Promise((r) => setTimeout(r, 2000));
|
|
203
|
-
|
|
204
|
-
// Keep checking if verification cleared, retry clicking for up to waitMs
|
|
205
|
-
const deadline = Date.now() + waitMs;
|
|
206
|
-
while (Date.now() < deadline) {
|
|
207
|
-
const retryResult = await cdp(["eval", tab, VERIFY_RETRY_JS]).catch(
|
|
208
|
-
() => null,
|
|
209
|
-
);
|
|
210
|
-
|
|
211
|
-
if (retryResult === "cleared" || !retryResult || retryResult === "null") {
|
|
212
|
-
process.stderr.write(`[greedysearch] Verification cleared.\n`);
|
|
213
|
-
await new Promise((r) => setTimeout(r, 1000));
|
|
214
|
-
return "clicked";
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
if (retryResult.startsWith("clicked:")) {
|
|
218
|
-
process.stderr.write(`[greedysearch] Retrying verification click...\n`);
|
|
219
|
-
await new Promise((r) => setTimeout(r, 2000));
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
// If verification is stuck, try clicking the Turnstile iframe by coordinates
|
|
223
|
-
const iframeCenter = await cdp(["eval", tab, GET_IFRAME_CENTER_JS]).catch(
|
|
224
|
-
() => null,
|
|
225
|
-
);
|
|
226
|
-
if (iframeCenter && iframeCenter !== "null") {
|
|
227
|
-
try {
|
|
228
|
-
const { x, y } = JSON.parse(iframeCenter);
|
|
229
|
-
process.stderr.write(
|
|
230
|
-
`[greedysearch] Trying coordinate click on Turnstile iframe at (${x}, ${y})...\n`,
|
|
231
|
-
);
|
|
232
|
-
await cdp(["clickxy", tab, String(x), String(y)]);
|
|
233
|
-
await new Promise((r) => setTimeout(r, 3000));
|
|
234
|
-
} catch {}
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
await new Promise((r) => setTimeout(r, 1500));
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
// Still stuck — might need user intervention
|
|
241
|
-
process.stderr.write(
|
|
242
|
-
`[greedysearch] Verification may require manual intervention.\n`,
|
|
243
|
-
);
|
|
244
|
-
return "needs-human";
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
// Detection didn't find anything initially, but check for Turnstile iframe with coordinates
|
|
248
|
-
if (result === "null" || !result) {
|
|
249
|
-
const iframeCenter = await cdp(["eval", tab, GET_IFRAME_CENTER_JS]).catch(
|
|
250
|
-
() => null,
|
|
251
|
-
);
|
|
252
|
-
if (iframeCenter && iframeCenter !== "null") {
|
|
253
|
-
process.stderr.write(
|
|
254
|
-
`[greedysearch] Found Turnstile iframe, attempting coordinate click...\n`,
|
|
255
|
-
);
|
|
256
|
-
try {
|
|
257
|
-
const { x, y } = JSON.parse(iframeCenter);
|
|
258
|
-
await cdp(["clickxy", tab, String(x), String(y)]);
|
|
259
|
-
await new Promise((r) => setTimeout(r, 3000));
|
|
260
|
-
|
|
261
|
-
// Check if it worked
|
|
262
|
-
const cleared = await cdp(["eval", tab, VERIFY_RETRY_JS]).catch(
|
|
263
|
-
() => null,
|
|
264
|
-
);
|
|
265
|
-
if (cleared === "cleared" || cleared === "null") {
|
|
266
|
-
return "clicked";
|
|
267
|
-
}
|
|
268
|
-
} catch {}
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
return "clear";
|
|
273
|
-
}
|
package/extractors/gemini.mjs
DELETED
|
@@ -1,163 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
// extractors/gemini.mjs
|
|
4
|
-
// Navigate gemini.google.com/app, submit query, wait for answer, return clean answer + sources.
|
|
5
|
-
//
|
|
6
|
-
// Usage:
|
|
7
|
-
// node extractors/gemini.mjs "<query>" [--tab <prefix>]
|
|
8
|
-
//
|
|
9
|
-
// Output (stdout): JSON { answer, sources, query, url }
|
|
10
|
-
// Errors go to stderr only — stdout is always clean JSON for piping.
|
|
11
|
-
|
|
12
|
-
import {
|
|
13
|
-
cdp,
|
|
14
|
-
formatAnswer,
|
|
15
|
-
getOrOpenTab,
|
|
16
|
-
handleError,
|
|
17
|
-
injectClipboardInterceptor,
|
|
18
|
-
outputJson,
|
|
19
|
-
parseArgs,
|
|
20
|
-
parseSourcesFromMarkdown,
|
|
21
|
-
validateQuery,
|
|
22
|
-
} from "./common.mjs";
|
|
23
|
-
import { dismissConsent, handleVerification } from "./consent.mjs";
|
|
24
|
-
import { SELECTORS } from "./selectors.mjs";
|
|
25
|
-
|
|
26
|
-
const S = SELECTORS.gemini;
|
|
27
|
-
const GLOBAL_VAR = "__geminiClipboard";
|
|
28
|
-
|
|
29
|
-
// ============================================================================
|
|
30
|
-
// Gemini-specific helpers
|
|
31
|
-
// ============================================================================
|
|
32
|
-
|
|
33
|
-
async function typeIntoGemini(tab, text) {
|
|
34
|
-
await cdp([
|
|
35
|
-
"eval",
|
|
36
|
-
tab,
|
|
37
|
-
`
|
|
38
|
-
(function(t) {
|
|
39
|
-
var el = document.querySelector('${S.input}');
|
|
40
|
-
if (!el) return false;
|
|
41
|
-
el.focus();
|
|
42
|
-
document.execCommand('insertText', false, t);
|
|
43
|
-
return true;
|
|
44
|
-
})(${JSON.stringify(text)})
|
|
45
|
-
`,
|
|
46
|
-
]);
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
async function waitForCopyButton(tab, timeout = 120000) {
|
|
50
|
-
const deadline = Date.now() + timeout;
|
|
51
|
-
let scrollCount = 0;
|
|
52
|
-
while (Date.now() < deadline) {
|
|
53
|
-
await new Promise((r) => setTimeout(r, 600));
|
|
54
|
-
|
|
55
|
-
// Gentle scroll every ~6 seconds to keep page "active" (anti-bot evasion)
|
|
56
|
-
if (++scrollCount % 10 === 0) {
|
|
57
|
-
await cdp([
|
|
58
|
-
"eval",
|
|
59
|
-
tab,
|
|
60
|
-
`
|
|
61
|
-
(function() {
|
|
62
|
-
const chat = document.querySelector('chat-window, [role="main"], main') || document.body;
|
|
63
|
-
const currentScroll = chat.scrollTop || window.scrollY || 0;
|
|
64
|
-
const scrollHeight = chat.scrollHeight || document.body.scrollHeight || 0;
|
|
65
|
-
// Small random scroll movement to mimic human reading
|
|
66
|
-
const jitter = Math.floor(Math.random() * 50) - 25;
|
|
67
|
-
const targetScroll = Math.min(scrollHeight, Math.max(0, currentScroll + jitter));
|
|
68
|
-
chat.scrollTo ? chat.scrollTo({ top: targetScroll, behavior: 'smooth' }) : window.scrollTo(0, targetScroll);
|
|
69
|
-
})()
|
|
70
|
-
`,
|
|
71
|
-
]).catch(() => null);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
const found = await cdp([
|
|
75
|
-
"eval",
|
|
76
|
-
tab,
|
|
77
|
-
`!!document.querySelector('${S.copyButton}')`,
|
|
78
|
-
]).catch(() => "false");
|
|
79
|
-
if (found === "true") return;
|
|
80
|
-
}
|
|
81
|
-
throw new Error(`Gemini copy button did not appear within ${timeout}ms`);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
async function extractAnswer(tab) {
|
|
85
|
-
await cdp([
|
|
86
|
-
"eval",
|
|
87
|
-
tab,
|
|
88
|
-
`document.querySelector('${S.copyButton}')?.click()`,
|
|
89
|
-
]);
|
|
90
|
-
await new Promise((r) => setTimeout(r, 400));
|
|
91
|
-
|
|
92
|
-
const answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
|
|
93
|
-
if (!answer) throw new Error("Clipboard interceptor returned empty text");
|
|
94
|
-
|
|
95
|
-
const sources = parseSourcesFromMarkdown(answer);
|
|
96
|
-
return { answer: answer.trim(), sources };
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
// ============================================================================
|
|
100
|
-
// Main
|
|
101
|
-
// ============================================================================
|
|
102
|
-
|
|
103
|
-
const USAGE = 'Usage: node extractors/gemini.mjs "<query>" [--tab <prefix>]\n';
|
|
104
|
-
|
|
105
|
-
async function main() {
|
|
106
|
-
const args = process.argv.slice(2);
|
|
107
|
-
validateQuery(args, USAGE);
|
|
108
|
-
|
|
109
|
-
const { query, tabPrefix, short } = parseArgs(args);
|
|
110
|
-
|
|
111
|
-
try {
|
|
112
|
-
await cdp(["list"]);
|
|
113
|
-
const tab = await getOrOpenTab(tabPrefix);
|
|
114
|
-
|
|
115
|
-
// Each search = fresh conversation
|
|
116
|
-
await cdp(["nav", tab, "https://gemini.google.com/app"], 35000);
|
|
117
|
-
await new Promise((r) => setTimeout(r, 2000));
|
|
118
|
-
await dismissConsent(tab, cdp);
|
|
119
|
-
await handleVerification(tab, cdp, 60000);
|
|
120
|
-
|
|
121
|
-
// Wait for input to be ready
|
|
122
|
-
const deadline = Date.now() + 10000;
|
|
123
|
-
while (Date.now() < deadline) {
|
|
124
|
-
const ready = await cdp([
|
|
125
|
-
"eval",
|
|
126
|
-
tab,
|
|
127
|
-
`!!document.querySelector('${S.input}')`,
|
|
128
|
-
]).catch(() => "false");
|
|
129
|
-
if (ready === "true") break;
|
|
130
|
-
await new Promise((r) => setTimeout(r, 400));
|
|
131
|
-
}
|
|
132
|
-
await new Promise((r) => setTimeout(r, 300));
|
|
133
|
-
|
|
134
|
-
await injectClipboardInterceptor(tab, GLOBAL_VAR);
|
|
135
|
-
await typeIntoGemini(tab, query);
|
|
136
|
-
await new Promise((r) => setTimeout(r, 400));
|
|
137
|
-
|
|
138
|
-
await cdp([
|
|
139
|
-
"eval",
|
|
140
|
-
tab,
|
|
141
|
-
`document.querySelector('${S.sendButton}')?.click()`,
|
|
142
|
-
]);
|
|
143
|
-
|
|
144
|
-
await waitForCopyButton(tab);
|
|
145
|
-
|
|
146
|
-
const { answer, sources } = await extractAnswer(tab);
|
|
147
|
-
if (!answer) throw new Error("No answer captured from Gemini clipboard");
|
|
148
|
-
|
|
149
|
-
const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
|
|
150
|
-
() => "https://gemini.google.com/app",
|
|
151
|
-
);
|
|
152
|
-
outputJson({
|
|
153
|
-
query,
|
|
154
|
-
url: finalUrl,
|
|
155
|
-
answer: formatAnswer(answer, short),
|
|
156
|
-
sources,
|
|
157
|
-
});
|
|
158
|
-
} catch (e) {
|
|
159
|
-
handleError(e);
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
main();
|