@apmantza/greedysearch-pi 1.9.0 → 1.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/README.md +11 -1
- package/bin/launch-visible.mjs +65 -0
- package/bin/launch.mjs +442 -417
- package/bin/search.mjs +757 -679
- package/extractors/bing-copilot.mjs +490 -374
- package/extractors/common.mjs +703 -596
- package/extractors/consent.mjs +421 -388
- package/extractors/selectors.mjs +55 -54
- package/index.ts +176 -177
- package/package.json +8 -3
- package/skills/greedy-search/skill.md +5 -19
- package/src/fetcher.mjs +666 -652
- package/src/formatters/synthesis.ts +1 -5
- package/src/search/output.mjs +23 -1
- package/src/search/research.mjs +1581 -0
- package/src/search/sources.mjs +488 -466
- package/src/search/synthesis-runner.mjs +52 -46
- package/src/tools/greedy-search-handler.ts +298 -124
- package/test.mjs +971 -534
package/extractors/consent.mjs
CHANGED
|
@@ -1,388 +1,421 @@
|
|
|
1
|
-
import { randomInt } from "node:crypto";
|
|
2
|
-
import { existsSync, readFileSync } from "node:fs";
|
|
3
|
-
import http from "node:http";
|
|
4
|
-
|
|
5
|
-
// consent.mjs — auto-dismiss common cookie/consent banners and human-verification pages
|
|
6
|
-
// Call dismissConsent(tab, cdpFn) after navigating to any page.
|
|
7
|
-
|
|
8
|
-
const CONSENT_JS = `
|
|
9
|
-
(function() {
|
|
10
|
-
// Google consent page (consent.google.com)
|
|
11
|
-
var g = document.querySelector('#L2AGLb, button[jsname="b3VHJd"], .tHlp8d');
|
|
12
|
-
if (g) { g.click(); return 'google'; }
|
|
13
|
-
|
|
14
|
-
// OneTrust (used by many sites including Stack Overflow)
|
|
15
|
-
var ot = document.querySelector('#onetrust-accept-btn-handler, .onetrust-accept-btn-handler');
|
|
16
|
-
if (ot) { ot.click(); return 'onetrust'; }
|
|
17
|
-
|
|
18
|
-
// Generic "accept all" / "agree" buttons
|
|
19
|
-
var btns = Array.from(document.querySelectorAll('button, a[role=button]'));
|
|
20
|
-
var accept = btns.find(b => /^(accept all|accept cookies|agree|i agree|got it|allow all|allow cookies)$/i.test(b.innerText?.trim()));
|
|
21
|
-
if (accept) { accept.click(); return 'generic:' + accept.innerText.trim(); }
|
|
22
|
-
|
|
23
|
-
return null;
|
|
24
|
-
})()
|
|
25
|
-
`;
|
|
26
|
-
|
|
27
|
-
// Detect verification challenges — returns element info (NOT clicking).
|
|
28
|
-
// The CDP-side handleVerification performs human-like clicks on found elements.
|
|
29
|
-
const VERIFY_DETECT_JS = `
|
|
30
|
-
(function() {
|
|
31
|
-
var url = document.location.href;
|
|
32
|
-
|
|
33
|
-
// --- Google "sorry" page (hard CAPTCHA, can't auto-solve) ---
|
|
34
|
-
if (url.includes('/sorry/') || url.includes('sorry.google')) return 'sorry-page';
|
|
35
|
-
|
|
36
|
-
// --- Microsoft account verification page ---
|
|
37
|
-
if (url.includes('login.microsoftonline.com') || url.includes('login.live.com') || url.includes('account.microsoft.com')) {
|
|
38
|
-
var msBtns = Array.from(document.querySelectorAll('button, input[type=submit], a'));
|
|
39
|
-
var msVerify = msBtns.find(b => /verify|continue|next/i.test(b.innerText?.trim() || b.value || ''));
|
|
40
|
-
if (msVerify) { msVerify.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:msVerify.innerText?.trim()||msVerify.value}); }
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
// --- Copilot / modal verification ---
|
|
44
|
-
var modal = document.querySelector('[role="dialog"], .b_modal, [class*="verify"], [class*="challenge"]');
|
|
45
|
-
if (modal) {
|
|
46
|
-
var modalBtns = Array.from(modal.querySelectorAll('button, a[role="button"], input[type="submit"]'));
|
|
47
|
-
var actionBtn = modalBtns.find(b => /^(continue|verify|submit|next|i agree|accept|got it)$/i.test(b.innerText?.trim() || b.value || ''));
|
|
48
|
-
if (actionBtn) { actionBtn.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:actionBtn.innerText?.trim()}); }
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// --- Turnstile / Cloudflare challenge iframe (return coordinates for humanClickXY) ---
|
|
52
|
-
var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"], iframe[title*="challenge"]');
|
|
53
|
-
if (turnstileIframe) {
|
|
54
|
-
var r = turnstileIframe.getBoundingClientRect();
|
|
55
|
-
return JSON.stringify({t:'xy',x:r.left+30,y:r.top+r.height/2});
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
// --- Cloudflare Turnstile widget inside closed shadow DOM (Copilot, etc.) ---
|
|
59
|
-
// The iframe is not queryable from main document, but the host container
|
|
60
|
-
// (#cf-turnstile) and the hidden response input are.
|
|
61
|
-
var cfTurnstileHost = document.querySelector('#cf-turnstile, [id^="cf-chl-widget-"]');
|
|
62
|
-
if (cfTurnstileHost) {
|
|
63
|
-
var r2 = cfTurnstileHost.getBoundingClientRect();
|
|
64
|
-
return JSON.stringify({t:'xy',x:r2.left+r2.width/2,y:r2.top+r2.height/2});
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
// --- Cloudflare challenge page ---
|
|
68
|
-
var cfCheckbox = document.querySelector('#cf-stage input[type="checkbox"], .ctp-checkbox-container input');
|
|
69
|
-
if (cfCheckbox) { cfCheckbox.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:'cloudflare-checkbox'}); }
|
|
70
|
-
var cfBtn = document.querySelector('#challenge-form button, .cf-challenge button');
|
|
71
|
-
if (cfBtn) { cfBtn.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:cfBtn.innerText?.trim()}); }
|
|
72
|
-
|
|
73
|
-
// --- Microsoft "I am human" button ---
|
|
74
|
-
var msHumanBtn = document.querySelector('button[id*="i0"], button[id*="id__"]');
|
|
75
|
-
if (msHumanBtn && /verify|human|robot|continue/i.test(msHumanBtn.innerText?.trim())) {
|
|
76
|
-
msHumanBtn.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:msHumanBtn.innerText.trim()});
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
// --- Generic verify/continue/proceed buttons (catch-all) ---
|
|
80
|
-
// IMPORTANT: exclude sign-in / OAuth buttons (e.g. "Continue with Google")
|
|
81
|
-
var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
|
|
82
|
-
var verify = btns.find(b => {
|
|
83
|
-
var t = (b.innerText?.trim() || b.value || '').toLowerCase();
|
|
84
|
-
var isVerifyLike = (t.includes('verify') || t.includes('human') || t.includes('robot') || t.includes('continue') || t.includes('proceed')) &&
|
|
85
|
-
!t.includes('verified') && !document.querySelector('iframe[src*="recaptcha"]');
|
|
86
|
-
if (!isVerifyLike) return false;
|
|
87
|
-
// Exclude OAuth / sign-in buttons to prevent accidental login flows
|
|
88
|
-
var isSignIn = /sign.in|log.in|google|microsoft|apple|facebook|github|auth/i.test(t);
|
|
89
|
-
return !isSignIn;
|
|
90
|
-
});
|
|
91
|
-
if (verify) { verify.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:verify.innerText?.trim()||verify.value}); }
|
|
92
|
-
|
|
93
|
-
// --- Google reCAPTCHA checkbox ---
|
|
94
|
-
var recaptchaCheckbox = document.querySelector('.recaptcha-checkbox-unchecked, input[type=checkbox][id*="recaptcha"]');
|
|
95
|
-
if (recaptchaCheckbox) { recaptchaCheckbox.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:'recaptcha'}); }
|
|
96
|
-
|
|
97
|
-
return null;
|
|
98
|
-
})()
|
|
99
|
-
`;
|
|
100
|
-
|
|
101
|
-
// Retry detection — returns 'cleared' if no verification page, or selector info
|
|
102
|
-
const VERIFY_RETRY_JS = `
|
|
103
|
-
(function() {
|
|
104
|
-
var url = document.location.href;
|
|
105
|
-
var isVerifyPage = url.includes('/sorry/') ||
|
|
106
|
-
url.includes('challenges.cloudflare.com') ||
|
|
107
|
-
url.includes('login.microsoftonline.com') ||
|
|
108
|
-
document.querySelector('#challenge-running, #challenge-stage, .cf-turnstile, [role="dialog"]');
|
|
109
|
-
if (!isVerifyPage) return 'cleared';
|
|
110
|
-
|
|
111
|
-
var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
|
|
112
|
-
var btn = btns.find(b => {
|
|
113
|
-
var t = (b.innerText?.trim() || b.value || '').toLowerCase();
|
|
114
|
-
var isVerifyLike = t.includes('verify') || t.includes('human') || t.includes('robot') || t.includes('continue') || t.includes('next') || t.includes('submit');
|
|
115
|
-
if (!isVerifyLike) return false;
|
|
116
|
-
var isSignIn = /sign.in|log.in|google|microsoft|apple|facebook|github|auth/i.test(t);
|
|
117
|
-
return !isSignIn;
|
|
118
|
-
});
|
|
119
|
-
if (btn) { btn.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:btn.innerText?.trim()||btn.value}); }
|
|
120
|
-
|
|
121
|
-
var cf = document.querySelector('#cf-stage input[type="checkbox"], .cf-turnstile input');
|
|
122
|
-
if (cf) { cf.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:'turnstile'}); }
|
|
123
|
-
|
|
124
|
-
// Cloudflare Turnstile widget inside closed shadow DOM (detected via host container)
|
|
125
|
-
var cfTurnstileHost = document.querySelector('#cf-turnstile, [id^="cf-chl-widget-"]');
|
|
126
|
-
if (cfTurnstileHost) { return 'still-verifying'; }
|
|
127
|
-
|
|
128
|
-
var modal = document.querySelector('[role="dialog"], .b_modal, [class*="verify"]');
|
|
129
|
-
if (modal) {
|
|
130
|
-
var modalBtn = modal.querySelector('button, a[role="button"]');
|
|
131
|
-
if (modalBtn) { modalBtn.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:modalBtn.innerText?.trim()}); }
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
return 'still-verifying';
|
|
135
|
-
})()
|
|
136
|
-
`;
|
|
137
|
-
|
|
138
|
-
export async function dismissConsent(tab, cdp) {
|
|
139
|
-
const result = await cdp(["eval", tab, CONSENT_JS]).catch(() => null);
|
|
140
|
-
if (result && result !== "null") {
|
|
141
|
-
await new Promise((r) => setTimeout(r, 1500));
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
// ─── Human-like click simulation (multi-event with jitter) ────────────
|
|
146
|
-
|
|
147
|
-
function rng(min, max) {
|
|
148
|
-
// crypto.randomInt is used instead of Math.random() to comply with SonarCloud security hotspot S2245.
|
|
149
|
-
// This is NOT security-sensitive — the random values are only used for mouse-jitter and timing delays.
|
|
150
|
-
return randomInt(min * 1000, max * 1000) / 1000;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
/**
|
|
154
|
-
* Fire a browser-level Input.dispatchMouseEvent via Chrome's top-level CDP
|
|
155
|
-
* WebSocket. Unlike page-session dispatch, this routes through the compositor
|
|
156
|
-
* and reaches OOPIFs (e.g. Cloudflare Turnstile in a cross-origin iframe).
|
|
157
|
-
* Best-effort — errors are silently swallowed.
|
|
158
|
-
*/
|
|
159
|
-
async function browserLevelClick(x, y) {
|
|
160
|
-
if (!globalThis.WebSocket) return;
|
|
161
|
-
const profileDir = process.env.CDP_PROFILE_DIR;
|
|
162
|
-
if (!profileDir) return;
|
|
163
|
-
const portFile = `${profileDir.replaceAll("\\", "/")}/DevToolsActivePort`;
|
|
164
|
-
if (!existsSync(portFile)) return;
|
|
165
|
-
const port = readFileSync(portFile, "utf8").trim().split("\n")[0];
|
|
166
|
-
|
|
167
|
-
const version = await new Promise((resolve, reject) => {
|
|
168
|
-
const req = http.get(`http://localhost:${port}/json/version`, (res) => {
|
|
169
|
-
let body = "";
|
|
170
|
-
res.on("data", (d) => (body += d));
|
|
171
|
-
res.on("end", () => {
|
|
172
|
-
try {
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
const
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
//
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
//
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
);
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
}
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
}
|
|
1
|
+
import { randomInt } from "node:crypto";
|
|
2
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
3
|
+
import http from "node:http";
|
|
4
|
+
|
|
5
|
+
// consent.mjs — auto-dismiss common cookie/consent banners and human-verification pages
|
|
6
|
+
// Call dismissConsent(tab, cdpFn) after navigating to any page.
|
|
7
|
+
|
|
8
|
+
const CONSENT_JS = `
|
|
9
|
+
(function() {
|
|
10
|
+
// Google consent page (consent.google.com)
|
|
11
|
+
var g = document.querySelector('#L2AGLb, button[jsname="b3VHJd"], .tHlp8d');
|
|
12
|
+
if (g) { g.click(); return 'google'; }
|
|
13
|
+
|
|
14
|
+
// OneTrust (used by many sites including Stack Overflow)
|
|
15
|
+
var ot = document.querySelector('#onetrust-accept-btn-handler, .onetrust-accept-btn-handler');
|
|
16
|
+
if (ot) { ot.click(); return 'onetrust'; }
|
|
17
|
+
|
|
18
|
+
// Generic "accept all" / "agree" buttons
|
|
19
|
+
var btns = Array.from(document.querySelectorAll('button, a[role=button]'));
|
|
20
|
+
var accept = btns.find(b => /^(accept all|accept cookies|agree|i agree|got it|allow all|allow cookies)$/i.test(b.innerText?.trim()));
|
|
21
|
+
if (accept) { accept.click(); return 'generic:' + accept.innerText.trim(); }
|
|
22
|
+
|
|
23
|
+
return null;
|
|
24
|
+
})()
|
|
25
|
+
`;
|
|
26
|
+
|
|
27
|
+
// Detect verification challenges — returns element info (NOT clicking).
|
|
28
|
+
// The CDP-side handleVerification performs human-like clicks on found elements.
|
|
29
|
+
const VERIFY_DETECT_JS = `
|
|
30
|
+
(function() {
|
|
31
|
+
var url = document.location.href;
|
|
32
|
+
|
|
33
|
+
// --- Google "sorry" page (hard CAPTCHA, can't auto-solve) ---
|
|
34
|
+
if (url.includes('/sorry/') || url.includes('sorry.google')) return 'sorry-page';
|
|
35
|
+
|
|
36
|
+
// --- Microsoft account verification page ---
|
|
37
|
+
if (url.includes('login.microsoftonline.com') || url.includes('login.live.com') || url.includes('account.microsoft.com')) {
|
|
38
|
+
var msBtns = Array.from(document.querySelectorAll('button, input[type=submit], a'));
|
|
39
|
+
var msVerify = msBtns.find(b => /verify|continue|next/i.test(b.innerText?.trim() || b.value || ''));
|
|
40
|
+
if (msVerify) { msVerify.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:msVerify.innerText?.trim()||msVerify.value}); }
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// --- Copilot / modal verification ---
|
|
44
|
+
var modal = document.querySelector('[role="dialog"], .b_modal, [class*="verify"], [class*="challenge"]');
|
|
45
|
+
if (modal) {
|
|
46
|
+
var modalBtns = Array.from(modal.querySelectorAll('button, a[role="button"], input[type="submit"]'));
|
|
47
|
+
var actionBtn = modalBtns.find(b => /^(continue|verify|submit|next|i agree|accept|got it)$/i.test(b.innerText?.trim() || b.value || ''));
|
|
48
|
+
if (actionBtn) { actionBtn.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:actionBtn.innerText?.trim()}); }
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// --- Turnstile / Cloudflare challenge iframe (return coordinates for humanClickXY) ---
|
|
52
|
+
var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"], iframe[title*="challenge"]');
|
|
53
|
+
if (turnstileIframe) {
|
|
54
|
+
var r = turnstileIframe.getBoundingClientRect();
|
|
55
|
+
return JSON.stringify({t:'xy',x:r.left+30,y:r.top+r.height/2});
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// --- Cloudflare Turnstile widget inside closed shadow DOM (Copilot, etc.) ---
|
|
59
|
+
// The iframe is not queryable from main document, but the host container
|
|
60
|
+
// (#cf-turnstile) and the hidden response input are.
|
|
61
|
+
var cfTurnstileHost = document.querySelector('#cf-turnstile, [id^="cf-chl-widget-"]');
|
|
62
|
+
if (cfTurnstileHost) {
|
|
63
|
+
var r2 = cfTurnstileHost.getBoundingClientRect();
|
|
64
|
+
return JSON.stringify({t:'xy',x:r2.left+r2.width/2,y:r2.top+r2.height/2});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// --- Cloudflare challenge page ---
|
|
68
|
+
var cfCheckbox = document.querySelector('#cf-stage input[type="checkbox"], .ctp-checkbox-container input');
|
|
69
|
+
if (cfCheckbox) { cfCheckbox.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:'cloudflare-checkbox'}); }
|
|
70
|
+
var cfBtn = document.querySelector('#challenge-form button, .cf-challenge button');
|
|
71
|
+
if (cfBtn) { cfBtn.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:cfBtn.innerText?.trim()}); }
|
|
72
|
+
|
|
73
|
+
// --- Microsoft "I am human" button ---
|
|
74
|
+
var msHumanBtn = document.querySelector('button[id*="i0"], button[id*="id__"]');
|
|
75
|
+
if (msHumanBtn && /verify|human|robot|continue/i.test(msHumanBtn.innerText?.trim())) {
|
|
76
|
+
msHumanBtn.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:msHumanBtn.innerText.trim()});
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// --- Generic verify/continue/proceed buttons (catch-all) ---
|
|
80
|
+
// IMPORTANT: exclude sign-in / OAuth buttons (e.g. "Continue with Google")
|
|
81
|
+
var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
|
|
82
|
+
var verify = btns.find(b => {
|
|
83
|
+
var t = (b.innerText?.trim() || b.value || '').toLowerCase();
|
|
84
|
+
var isVerifyLike = (t.includes('verify') || t.includes('human') || t.includes('robot') || t.includes('continue') || t.includes('proceed')) &&
|
|
85
|
+
!t.includes('verified') && !document.querySelector('iframe[src*="recaptcha"]');
|
|
86
|
+
if (!isVerifyLike) return false;
|
|
87
|
+
// Exclude OAuth / sign-in buttons to prevent accidental login flows
|
|
88
|
+
var isSignIn = /sign.in|log.in|google|microsoft|apple|facebook|github|auth/i.test(t);
|
|
89
|
+
return !isSignIn;
|
|
90
|
+
});
|
|
91
|
+
if (verify) { verify.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:verify.innerText?.trim()||verify.value}); }
|
|
92
|
+
|
|
93
|
+
// --- Google reCAPTCHA checkbox ---
|
|
94
|
+
var recaptchaCheckbox = document.querySelector('.recaptcha-checkbox-unchecked, input[type=checkbox][id*="recaptcha"]');
|
|
95
|
+
if (recaptchaCheckbox) { recaptchaCheckbox.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:'recaptcha'}); }
|
|
96
|
+
|
|
97
|
+
return null;
|
|
98
|
+
})()
|
|
99
|
+
`;
|
|
100
|
+
|
|
101
|
+
// Retry detection — returns 'cleared' if no verification page, or selector info
|
|
102
|
+
const VERIFY_RETRY_JS = `
|
|
103
|
+
(function() {
|
|
104
|
+
var url = document.location.href;
|
|
105
|
+
var isVerifyPage = url.includes('/sorry/') ||
|
|
106
|
+
url.includes('challenges.cloudflare.com') ||
|
|
107
|
+
url.includes('login.microsoftonline.com') ||
|
|
108
|
+
document.querySelector('#challenge-running, #challenge-stage, .cf-turnstile, [role="dialog"]');
|
|
109
|
+
if (!isVerifyPage) return 'cleared';
|
|
110
|
+
|
|
111
|
+
var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
|
|
112
|
+
var btn = btns.find(b => {
|
|
113
|
+
var t = (b.innerText?.trim() || b.value || '').toLowerCase();
|
|
114
|
+
var isVerifyLike = t.includes('verify') || t.includes('human') || t.includes('robot') || t.includes('continue') || t.includes('next') || t.includes('submit');
|
|
115
|
+
if (!isVerifyLike) return false;
|
|
116
|
+
var isSignIn = /sign.in|log.in|google|microsoft|apple|facebook|github|auth/i.test(t);
|
|
117
|
+
return !isSignIn;
|
|
118
|
+
});
|
|
119
|
+
if (btn) { btn.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:btn.innerText?.trim()||btn.value}); }
|
|
120
|
+
|
|
121
|
+
var cf = document.querySelector('#cf-stage input[type="checkbox"], .cf-turnstile input');
|
|
122
|
+
if (cf) { cf.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:'turnstile'}); }
|
|
123
|
+
|
|
124
|
+
// Cloudflare Turnstile widget inside closed shadow DOM (detected via host container)
|
|
125
|
+
var cfTurnstileHost = document.querySelector('#cf-turnstile, [id^="cf-chl-widget-"]');
|
|
126
|
+
if (cfTurnstileHost) { return 'still-verifying'; }
|
|
127
|
+
|
|
128
|
+
var modal = document.querySelector('[role="dialog"], .b_modal, [class*="verify"]');
|
|
129
|
+
if (modal) {
|
|
130
|
+
var modalBtn = modal.querySelector('button, a[role="button"]');
|
|
131
|
+
if (modalBtn) { modalBtn.setAttribute('data-gs-verify','1'); return JSON.stringify({t:'sel',s:'[data-gs-verify="1"]',txt:modalBtn.innerText?.trim()}); }
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return 'still-verifying';
|
|
135
|
+
})()
|
|
136
|
+
`;
|
|
137
|
+
|
|
138
|
+
export async function dismissConsent(tab, cdp) {
|
|
139
|
+
const result = await cdp(["eval", tab, CONSENT_JS]).catch(() => null);
|
|
140
|
+
if (result && result !== "null") {
|
|
141
|
+
await new Promise((r) => setTimeout(r, 1500));
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// ─── Human-like click simulation (multi-event with jitter) ────────────
|
|
146
|
+
|
|
147
|
+
function rng(min, max) {
|
|
148
|
+
// crypto.randomInt is used instead of Math.random() to comply with SonarCloud security hotspot S2245.
|
|
149
|
+
// This is NOT security-sensitive — the random values are only used for mouse-jitter and timing delays.
|
|
150
|
+
return randomInt(min * 1000, max * 1000) / 1000;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Fire a browser-level Input.dispatchMouseEvent via Chrome's top-level CDP
|
|
155
|
+
* WebSocket. Unlike page-session dispatch, this routes through the compositor
|
|
156
|
+
* and reaches OOPIFs (e.g. Cloudflare Turnstile in a cross-origin iframe).
|
|
157
|
+
* Best-effort — errors are silently swallowed.
|
|
158
|
+
*/
|
|
159
|
+
async function browserLevelClick(x, y) {
|
|
160
|
+
if (!globalThis.WebSocket) return;
|
|
161
|
+
const profileDir = process.env.CDP_PROFILE_DIR;
|
|
162
|
+
if (!profileDir) return;
|
|
163
|
+
const portFile = `${profileDir.replaceAll("\\", "/")}/DevToolsActivePort`;
|
|
164
|
+
if (!existsSync(portFile)) return;
|
|
165
|
+
const port = readFileSync(portFile, "utf8").trim().split("\n")[0];
|
|
166
|
+
|
|
167
|
+
const version = await new Promise((resolve, reject) => {
|
|
168
|
+
const req = http.get(`http://localhost:${port}/json/version`, (res) => {
|
|
169
|
+
let body = "";
|
|
170
|
+
res.on("data", (d) => (body += d));
|
|
171
|
+
res.on("end", () => {
|
|
172
|
+
try {
|
|
173
|
+
resolve(JSON.parse(body));
|
|
174
|
+
} catch {
|
|
175
|
+
reject(new Error("bad JSON"));
|
|
176
|
+
}
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
req.on("error", reject);
|
|
180
|
+
req.setTimeout(1000, () => {
|
|
181
|
+
req.destroy();
|
|
182
|
+
reject(new Error("timeout"));
|
|
183
|
+
});
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
const ws = new globalThis.WebSocket(version.webSocketDebuggerUrl);
|
|
187
|
+
let msgId = 0;
|
|
188
|
+
|
|
189
|
+
await new Promise((resolve) => {
|
|
190
|
+
ws.onopen = async () => {
|
|
191
|
+
const send = (method, params) =>
|
|
192
|
+
new Promise((r) => {
|
|
193
|
+
const id = ++msgId;
|
|
194
|
+
const handler = (evt) => {
|
|
195
|
+
if (JSON.parse(evt.data).id === id) {
|
|
196
|
+
ws.removeEventListener("message", handler);
|
|
197
|
+
r();
|
|
198
|
+
}
|
|
199
|
+
};
|
|
200
|
+
ws.addEventListener("message", handler);
|
|
201
|
+
ws.send(JSON.stringify({ id, method, params }));
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
const cx = x + rng(-2, 2);
|
|
205
|
+
const cy = y + rng(-2, 2);
|
|
206
|
+
await send("Input.dispatchMouseEvent", {
|
|
207
|
+
type: "mouseMoved",
|
|
208
|
+
x: cx,
|
|
209
|
+
y: cy,
|
|
210
|
+
button: "none",
|
|
211
|
+
});
|
|
212
|
+
await new Promise((r) => setTimeout(r, rng(80, 160)));
|
|
213
|
+
await send("Input.dispatchMouseEvent", {
|
|
214
|
+
type: "mousePressed",
|
|
215
|
+
x: cx,
|
|
216
|
+
y: cy,
|
|
217
|
+
button: "left",
|
|
218
|
+
clickCount: 1,
|
|
219
|
+
});
|
|
220
|
+
await new Promise((r) => setTimeout(r, rng(30, 80)));
|
|
221
|
+
await send("Input.dispatchMouseEvent", {
|
|
222
|
+
type: "mouseReleased",
|
|
223
|
+
x: cx + rng(-1, 1),
|
|
224
|
+
y: cy + rng(-1, 1),
|
|
225
|
+
button: "left",
|
|
226
|
+
clickCount: 1,
|
|
227
|
+
});
|
|
228
|
+
setTimeout(() => {
|
|
229
|
+
ws.close();
|
|
230
|
+
resolve();
|
|
231
|
+
}, 200);
|
|
232
|
+
};
|
|
233
|
+
ws.onerror = () => resolve();
|
|
234
|
+
setTimeout(resolve, 3000);
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Perform a human-like click at specific coordinates via CDP Input.dispatchMouseEvent.
|
|
240
|
+
* Sends: mouseMoved → randomPause → mousePressed → randomPause → mouseReleased
|
|
241
|
+
* with coordinate jitter and variable timing to mimic human motor variance.
|
|
242
|
+
*/
|
|
243
|
+
export async function humanClickXY(tab, cdpFn, x, y) {
|
|
244
|
+
const cx = Number.parseFloat(x);
|
|
245
|
+
const cy = Number.parseFloat(y);
|
|
246
|
+
if (Number.isNaN(cx) || Number.isNaN(cy)) {
|
|
247
|
+
throw new Error(`humanClickXY: invalid coordinates (${x}, ${y})`);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const base = { button: "left", clickCount: 1, modifiers: 0 };
|
|
251
|
+
|
|
252
|
+
// ── mouseMoved with slight jitter ──
|
|
253
|
+
const jx = cx + rng(-3, 3);
|
|
254
|
+
const jy = cy + rng(-3, 3);
|
|
255
|
+
await cdpFn([
|
|
256
|
+
"evalraw",
|
|
257
|
+
tab,
|
|
258
|
+
"Input.dispatchMouseEvent",
|
|
259
|
+
JSON.stringify({ ...base, type: "mouseMoved", x: jx, y: jy }),
|
|
260
|
+
]);
|
|
261
|
+
// Brief hover delay (80-180ms) — humans don't instant-click
|
|
262
|
+
await new Promise((r) => setTimeout(r, rng(80, 180)));
|
|
263
|
+
|
|
264
|
+
// ── mousePressed at jittered position ──
|
|
265
|
+
const px = cx + rng(-2, 2);
|
|
266
|
+
const py = cy + rng(-2, 2);
|
|
267
|
+
await cdpFn([
|
|
268
|
+
"evalraw",
|
|
269
|
+
tab,
|
|
270
|
+
"Input.dispatchMouseEvent",
|
|
271
|
+
JSON.stringify({ ...base, type: "mousePressed", x: px, y: py }),
|
|
272
|
+
]);
|
|
273
|
+
// Hold delay (30-90ms) — mimics human click duration
|
|
274
|
+
await new Promise((r) => setTimeout(r, rng(30, 90)));
|
|
275
|
+
|
|
276
|
+
// ── mouseReleased at jittered position ──
|
|
277
|
+
const rx = px + rng(-1, 1);
|
|
278
|
+
const ry = py + rng(-1, 1);
|
|
279
|
+
await cdpFn([
|
|
280
|
+
"evalraw",
|
|
281
|
+
tab,
|
|
282
|
+
"Input.dispatchMouseEvent",
|
|
283
|
+
JSON.stringify({ ...base, type: "mouseReleased", x: rx, y: ry }),
|
|
284
|
+
]);
|
|
285
|
+
|
|
286
|
+
// Also fire via browser-level CDP WebSocket so the click reaches OOPIFs
|
|
287
|
+
// (cross-origin iframes like Cloudflare Turnstile) that page-session
|
|
288
|
+
// dispatch can't route to. Best-effort — never throws.
|
|
289
|
+
await browserLevelClick(cx, cy).catch(() => {});
|
|
290
|
+
|
|
291
|
+
// Post-click settle
|
|
292
|
+
await new Promise((r) => setTimeout(r, rng(100, 300)));
|
|
293
|
+
|
|
294
|
+
return `human-clicked at (${cx.toFixed(0)}, ${cy.toFixed(0)})`;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Find an element by CSS selector and perform a human-like click on its center.
|
|
299
|
+
*/
|
|
300
|
+
export async function humanClickElement(tab, cdpFn, selector) {
|
|
301
|
+
// Get element bounding rect
|
|
302
|
+
const rect = await cdpFn([
|
|
303
|
+
"eval",
|
|
304
|
+
tab,
|
|
305
|
+
`(function() {
|
|
306
|
+
var el = document.querySelector('${selector.replace(/\\/g, "\\\\").replace(/'/g, "\\'")}');
|
|
307
|
+
if (!el) return 'null';
|
|
308
|
+
var r = el.getBoundingClientRect();
|
|
309
|
+
return JSON.stringify({x: r.left + r.width / 2, y: r.top + r.height / 2, w: r.width, h: r.height});
|
|
310
|
+
})()`,
|
|
311
|
+
]).catch(() => "null");
|
|
312
|
+
|
|
313
|
+
if (!rect || rect === "null") {
|
|
314
|
+
return null; // Element not found
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
const parsed = JSON.parse(rect);
|
|
318
|
+
// Skip elements with zero dimensions or off-screen position — clicking at
|
|
319
|
+
// (0,0) is a false positive (hidden/unmounted element matched the selector).
|
|
320
|
+
if (parsed.w === 0 || parsed.h === 0 || (parsed.x === 0 && parsed.y === 0)) {
|
|
321
|
+
return null;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
const { x, y } = parsed;
|
|
325
|
+
return humanClickXY(tab, cdpFn, x, y);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Parse a detection result and perform a human click if it found something.
|
|
330
|
+
* Returns true if a click was performed.
|
|
331
|
+
*/
|
|
332
|
+
async function tryHumanClick(tab, cdp, detectResult) {
|
|
333
|
+
if (
|
|
334
|
+
!detectResult ||
|
|
335
|
+
detectResult === "null" ||
|
|
336
|
+
detectResult === "cleared" ||
|
|
337
|
+
detectResult === "still-verifying"
|
|
338
|
+
)
|
|
339
|
+
return false;
|
|
340
|
+
|
|
341
|
+
// JSON format: {t:"sel",s:"...",txt:"..."} or {t:"xy",x:...,y:...}
|
|
342
|
+
try {
|
|
343
|
+
const info = JSON.parse(detectResult);
|
|
344
|
+
if (info.t === "sel" && info.s) {
|
|
345
|
+
process.stderr.write(
|
|
346
|
+
`[greedysearch] Human-clicking "${info.txt}" via CDP...\n`,
|
|
347
|
+
);
|
|
348
|
+
const r = await humanClickElement(tab, cdp, info.s);
|
|
349
|
+
return r !== null;
|
|
350
|
+
}
|
|
351
|
+
if (info.t === "xy") {
|
|
352
|
+
// Skip zero/invalid coordinates — element is off-screen or not rendered
|
|
353
|
+
if (!info.x && !info.y) return false;
|
|
354
|
+
process.stderr.write(
|
|
355
|
+
`[greedysearch] Human-clicking at (${info.x.toFixed(0)}, ${info.y.toFixed(0)})...\n`,
|
|
356
|
+
);
|
|
357
|
+
await humanClickXY(tab, cdp, info.x, info.y);
|
|
358
|
+
return true;
|
|
359
|
+
}
|
|
360
|
+
} catch {}
|
|
361
|
+
|
|
362
|
+
return false;
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
export async function detectVerificationChallenge(tab, cdp) {
|
|
366
|
+
const result = await cdp(["eval", tab, VERIFY_DETECT_JS]).catch(() => null);
|
|
367
|
+
return result && result !== "null" ? result : null;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// Returns 'clear' | 'clicked' | 'needs-human'
|
|
371
|
+
export async function handleVerification(tab, cdp, waitMs = 30000) {
|
|
372
|
+
const result = await detectVerificationChallenge(tab, cdp);
|
|
373
|
+
|
|
374
|
+
if (!result) return "clear";
|
|
375
|
+
|
|
376
|
+
// Hard CAPTCHA page — wait for user to solve it manually
|
|
377
|
+
if (result === "sorry-page") {
|
|
378
|
+
process.stderr.write(
|
|
379
|
+
`[greedysearch] Google CAPTCHA detected — please solve it in the browser window (waiting up to ${Math.floor(waitMs / 1000)}s)...\n`,
|
|
380
|
+
);
|
|
381
|
+
const deadline = Date.now() + waitMs;
|
|
382
|
+
while (Date.now() < deadline) {
|
|
383
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
384
|
+
const url = await cdp(["eval", tab, "document.location.href"]).catch(
|
|
385
|
+
() => "",
|
|
386
|
+
);
|
|
387
|
+
if (!url.includes("/sorry/")) return "cleared-by-user";
|
|
388
|
+
}
|
|
389
|
+
return "needs-human";
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Perform human click on detected element
|
|
393
|
+
const clicked = await tryHumanClick(tab, cdp, result);
|
|
394
|
+
if (clicked) {
|
|
395
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
396
|
+
|
|
397
|
+
// Retry loop — keep checking until cleared or timeout
|
|
398
|
+
const deadline = Date.now() + waitMs;
|
|
399
|
+
while (Date.now() < deadline) {
|
|
400
|
+
const retryResult = await cdp(["eval", tab, VERIFY_RETRY_JS]).catch(
|
|
401
|
+
() => null,
|
|
402
|
+
);
|
|
403
|
+
if (retryResult === "cleared" || !retryResult || retryResult === "null") {
|
|
404
|
+
process.stderr.write("[greedysearch] Verification cleared.\n");
|
|
405
|
+
return "clicked";
|
|
406
|
+
}
|
|
407
|
+
if (retryResult !== "still-verifying") {
|
|
408
|
+
await tryHumanClick(tab, cdp, retryResult);
|
|
409
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
410
|
+
} else {
|
|
411
|
+
await new Promise((r) => setTimeout(r, 1500));
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
process.stderr.write(
|
|
415
|
+
"[greedysearch] Verification may require manual intervention.\n",
|
|
416
|
+
);
|
|
417
|
+
return "needs-human";
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
return "clear";
|
|
421
|
+
}
|