@apmantza/greedysearch-pi 1.0.6 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extractors/consent.mjs +76 -29
- package/extractors/google-ai.mjs +11 -2
- package/package.json +1 -1
- package/search.mjs +15 -22
package/extractors/consent.mjs
CHANGED
|
@@ -1,29 +1,76 @@
|
|
|
1
|
-
// consent.mjs — auto-dismiss common cookie/consent banners
|
|
2
|
-
// Call dismissConsent(tab, cdpFn) after navigating to any page.
|
|
3
|
-
|
|
4
|
-
const CONSENT_JS = `
|
|
5
|
-
(function() {
|
|
6
|
-
// Google consent page (consent.google.com)
|
|
7
|
-
var g = document.querySelector('#L2AGLb, button[jsname="b3VHJd"], .tHlp8d');
|
|
8
|
-
if (g) { g.click(); return 'google'; }
|
|
9
|
-
|
|
10
|
-
// OneTrust (used by many sites including Stack Overflow)
|
|
11
|
-
var ot = document.querySelector('#onetrust-accept-btn-handler, .onetrust-accept-btn-handler');
|
|
12
|
-
if (ot) { ot.click(); return 'onetrust'; }
|
|
13
|
-
|
|
14
|
-
// Generic "accept all" / "agree" buttons
|
|
15
|
-
var btns = Array.from(document.querySelectorAll('button, a[role=button]'));
|
|
16
|
-
var accept = btns.find(b => /^(accept all|accept cookies|agree|i agree|got it|allow all|allow cookies)$/i.test(b.innerText?.trim()));
|
|
17
|
-
if (accept) { accept.click(); return 'generic:' + accept.innerText.trim(); }
|
|
18
|
-
|
|
19
|
-
return null;
|
|
20
|
-
})()
|
|
21
|
-
`;
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
1
|
+
// consent.mjs — auto-dismiss common cookie/consent banners and human-verification pages
|
|
2
|
+
// Call dismissConsent(tab, cdpFn) after navigating to any page.
|
|
3
|
+
|
|
4
|
+
const CONSENT_JS = `
|
|
5
|
+
(function() {
|
|
6
|
+
// Google consent page (consent.google.com)
|
|
7
|
+
var g = document.querySelector('#L2AGLb, button[jsname="b3VHJd"], .tHlp8d');
|
|
8
|
+
if (g) { g.click(); return 'google'; }
|
|
9
|
+
|
|
10
|
+
// OneTrust (used by many sites including Stack Overflow)
|
|
11
|
+
var ot = document.querySelector('#onetrust-accept-btn-handler, .onetrust-accept-btn-handler');
|
|
12
|
+
if (ot) { ot.click(); return 'onetrust'; }
|
|
13
|
+
|
|
14
|
+
// Generic "accept all" / "agree" buttons
|
|
15
|
+
var btns = Array.from(document.querySelectorAll('button, a[role=button]'));
|
|
16
|
+
var accept = btns.find(b => /^(accept all|accept cookies|agree|i agree|got it|allow all|allow cookies)$/i.test(b.innerText?.trim()));
|
|
17
|
+
if (accept) { accept.click(); return 'generic:' + accept.innerText.trim(); }
|
|
18
|
+
|
|
19
|
+
return null;
|
|
20
|
+
})()
|
|
21
|
+
`;
|
|
22
|
+
|
|
23
|
+
// Detect Google's "verify you're human" / unusual traffic page
|
|
24
|
+
const VERIFY_DETECT_JS = `
|
|
25
|
+
(function() {
|
|
26
|
+
var url = document.location.href;
|
|
27
|
+
if (url.includes('/sorry/') || url.includes('sorry.google')) return 'sorry-page';
|
|
28
|
+
|
|
29
|
+
// Simple click-through verify button (not image CAPTCHA)
|
|
30
|
+
var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
|
|
31
|
+
var verify = btns.find(b => /verify|human|not a robot|continue/i.test(b.innerText?.trim() || b.value || ''));
|
|
32
|
+
if (verify && !document.querySelector('iframe[src*="recaptcha"]')) {
|
|
33
|
+
verify.click();
|
|
34
|
+
return 'clicked-verify:' + (verify.innerText?.trim() || verify.value);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Unchecked reCAPTCHA / Turnstile checkbox (no image challenge)
|
|
38
|
+
var checkbox = document.querySelector('.recaptcha-checkbox-unchecked, input[type=checkbox][id*="recaptcha"], #cf-stage input[type=checkbox]');
|
|
39
|
+
if (checkbox) { checkbox.click(); return 'clicked-checkbox'; }
|
|
40
|
+
|
|
41
|
+
return null;
|
|
42
|
+
})()
|
|
43
|
+
`;
|
|
44
|
+
|
|
45
|
+
export async function dismissConsent(tab, cdp) {
|
|
46
|
+
const result = await cdp(['eval', tab, CONSENT_JS]).catch(() => null);
|
|
47
|
+
if (result && result !== 'null') {
|
|
48
|
+
await new Promise(r => setTimeout(r, 1500));
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Returns 'clear' | 'clicked' | 'needs-human'
|
|
53
|
+
export async function handleVerification(tab, cdp, waitMs = 60000) {
|
|
54
|
+
const result = await cdp(['eval', tab, VERIFY_DETECT_JS]).catch(() => null);
|
|
55
|
+
|
|
56
|
+
if (!result || result === 'null') return 'clear';
|
|
57
|
+
|
|
58
|
+
if (result === 'sorry-page') {
|
|
59
|
+
// Hard CAPTCHA page — wait for user to solve it manually
|
|
60
|
+
process.stderr.write(`[greedysearch] Google verification required — please solve it in the browser window (waiting up to ${waitMs / 1000}s)...\n`);
|
|
61
|
+
const deadline = Date.now() + waitMs;
|
|
62
|
+
while (Date.now() < deadline) {
|
|
63
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
64
|
+
const url = await cdp(['eval', tab, 'document.location.href']).catch(() => '');
|
|
65
|
+
if (!url.includes('/sorry/')) return 'cleared-by-user';
|
|
66
|
+
}
|
|
67
|
+
return 'needs-human';
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (result.startsWith('clicked-')) {
|
|
71
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
72
|
+
return 'clicked';
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return 'clear';
|
|
76
|
+
}
|
package/extractors/google-ai.mjs
CHANGED
|
@@ -13,10 +13,10 @@ import { spawn } from 'child_process';
|
|
|
13
13
|
import { tmpdir, homedir } from 'os';
|
|
14
14
|
import { join, dirname } from 'path';
|
|
15
15
|
import { fileURLToPath } from 'url';
|
|
16
|
-
import { dismissConsent } from './consent.mjs';
|
|
16
|
+
import { dismissConsent, handleVerification } from './consent.mjs';
|
|
17
17
|
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
18
18
|
|
|
19
|
-
const CDP = join(
|
|
19
|
+
const CDP = join(homedir(), '.claude', 'skills', 'chrome-cdp', 'scripts', 'cdp.mjs');
|
|
20
20
|
const PAGES_CACHE = `${tmpdir().replace(/\\/g, '/')}/cdp-pages.json`;
|
|
21
21
|
|
|
22
22
|
const STREAM_POLL_INTERVAL = 600;
|
|
@@ -135,6 +135,15 @@ async function main() {
|
|
|
135
135
|
await new Promise(r => setTimeout(r, 1500));
|
|
136
136
|
}
|
|
137
137
|
|
|
138
|
+
// Handle "verify you're human" — auto-click simple buttons, wait for user on hard CAPTCHA
|
|
139
|
+
const verifyResult = await handleVerification(tab, cdp, 60000);
|
|
140
|
+
if (verifyResult === 'needs-human') throw new Error('Google verification required — could not be completed automatically');
|
|
141
|
+
if (verifyResult === 'clicked' || verifyResult === 'cleared-by-user') {
|
|
142
|
+
// Re-navigate to the search URL after verification
|
|
143
|
+
await cdp(['nav', tab, url], 35000);
|
|
144
|
+
await new Promise(r => setTimeout(r, 1500));
|
|
145
|
+
}
|
|
146
|
+
|
|
138
147
|
await waitForStreamComplete(tab);
|
|
139
148
|
|
|
140
149
|
const { answer, sources } = await extractAnswer(tab);
|
package/package.json
CHANGED
package/search.mjs
CHANGED
|
@@ -27,8 +27,6 @@ import { tmpdir } from 'os';
|
|
|
27
27
|
import http from 'http';
|
|
28
28
|
|
|
29
29
|
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
30
|
-
// Pi installs chrome-cdp-skill to ~/.pi/agent/git/...; fall back to Claude Code path
|
|
31
|
-
// Always use the bundled Windows-compatible cdp.mjs
|
|
32
30
|
const CDP = join(__dir, 'cdp.mjs');
|
|
33
31
|
const PAGES_CACHE = `${tmpdir().replace(/\\/g, '/')}/cdp-pages.json`;
|
|
34
32
|
|
|
@@ -184,26 +182,26 @@ function writeOutput(data, outFile) {
|
|
|
184
182
|
}
|
|
185
183
|
}
|
|
186
184
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
function isGreedySearchChromeUp() {
|
|
185
|
+
function probePort9223(timeoutMs = 3000) {
|
|
190
186
|
return new Promise(resolve => {
|
|
191
|
-
const req = http.get(
|
|
192
|
-
resolve(res.statusCode === 200);
|
|
187
|
+
const req = http.get('http://localhost:9223/json/version', res => {
|
|
193
188
|
res.resume();
|
|
189
|
+
resolve(res.statusCode === 200);
|
|
194
190
|
});
|
|
195
191
|
req.on('error', () => resolve(false));
|
|
196
|
-
req.setTimeout(
|
|
192
|
+
req.setTimeout(timeoutMs, () => { req.destroy(); resolve(false); });
|
|
197
193
|
});
|
|
198
194
|
}
|
|
199
195
|
|
|
200
196
|
async function ensureChrome() {
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
197
|
+
const ready = await probePort9223();
|
|
198
|
+
if (!ready) {
|
|
199
|
+
process.stderr.write('GreedySearch Chrome not running on port 9223 — auto-launching...\n');
|
|
200
|
+
await new Promise((resolve, reject) => {
|
|
201
|
+
const proc = spawn('node', [join(__dir, 'launch.mjs')], { stdio: ['ignore', process.stderr, process.stderr] });
|
|
202
|
+
proc.on('close', code => code === 0 ? resolve() : reject(new Error('launch.mjs failed')));
|
|
203
|
+
});
|
|
204
|
+
}
|
|
207
205
|
}
|
|
208
206
|
|
|
209
207
|
async function main() {
|
|
@@ -240,10 +238,10 @@ async function main() {
|
|
|
240
238
|
if (engine === 'all') {
|
|
241
239
|
await cdp(['list']); // refresh pages cache
|
|
242
240
|
|
|
243
|
-
// Assign tabs: reuse existing engine tabs from cache, open new ones
|
|
244
|
-
//
|
|
241
|
+
// Assign tabs: reuse existing engine tabs from cache, open new ones where needed.
|
|
242
|
+
// Engine tabs are never closed — keeping them alive preserves session cookies and
|
|
243
|
+
// reduces the chance of verification challenges on subsequent searches.
|
|
245
244
|
const tabs = [];
|
|
246
|
-
const openedTabs = [];
|
|
247
245
|
let blankReused = false;
|
|
248
246
|
|
|
249
247
|
for (const e of ALL_ENGINES) {
|
|
@@ -253,13 +251,11 @@ async function main() {
|
|
|
253
251
|
} else if (!blankReused) {
|
|
254
252
|
const tab = await getOrReuseBlankTab();
|
|
255
253
|
tabs.push(tab);
|
|
256
|
-
openedTabs.push(tab);
|
|
257
254
|
blankReused = true;
|
|
258
255
|
} else {
|
|
259
256
|
await new Promise(r => setTimeout(r, 500));
|
|
260
257
|
const tab = await openNewTab();
|
|
261
258
|
tabs.push(tab);
|
|
262
|
-
openedTabs.push(tab);
|
|
263
259
|
}
|
|
264
260
|
}
|
|
265
261
|
|
|
@@ -270,9 +266,6 @@ async function main() {
|
|
|
270
266
|
)
|
|
271
267
|
);
|
|
272
268
|
|
|
273
|
-
// Close only tabs we opened (not pre-existing ones)
|
|
274
|
-
await Promise.allSettled(openedTabs.map(closeTab));
|
|
275
|
-
|
|
276
269
|
const out = {};
|
|
277
270
|
for (let i = 0; i < results.length; i++) {
|
|
278
271
|
const r = results[i];
|