@apmantza/greedysearch-pi 1.4.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -24
- package/extractors/bing-copilot.mjs +195 -204
- package/extractors/consent.mjs +255 -248
- package/extractors/gemini.mjs +12 -53
- package/extractors/google-ai.mjs +162 -165
- package/extractors/perplexity.mjs +181 -184
- package/package.json +2 -2
- package/search.mjs +997 -996
package/extractors/consent.mjs
CHANGED
|
@@ -1,248 +1,255 @@
|
|
|
1
|
-
// consent.mjs — auto-dismiss common cookie/consent banners and human-verification pages
|
|
2
|
-
// Call dismissConsent(tab, cdpFn) after navigating to any page.
|
|
3
|
-
|
|
4
|
-
const CONSENT_JS = `
|
|
5
|
-
(function() {
|
|
6
|
-
// Google consent page (consent.google.com)
|
|
7
|
-
var g = document.querySelector('#L2AGLb, button[jsname="b3VHJd"], .tHlp8d');
|
|
8
|
-
if (g) { g.click(); return 'google'; }
|
|
9
|
-
|
|
10
|
-
// OneTrust (used by many sites including Stack Overflow)
|
|
11
|
-
var ot = document.querySelector('#onetrust-accept-btn-handler, .onetrust-accept-btn-handler');
|
|
12
|
-
if (ot) { ot.click(); return 'onetrust'; }
|
|
13
|
-
|
|
14
|
-
// Generic "accept all" / "agree" buttons
|
|
15
|
-
var btns = Array.from(document.querySelectorAll('button, a[role=button]'));
|
|
16
|
-
var accept = btns.find(b => /^(accept all|accept cookies|agree|i agree|got it|allow all|allow cookies)$/i.test(b.innerText?.trim()));
|
|
17
|
-
if (accept) { accept.click(); return 'generic:' + accept.innerText.trim(); }
|
|
18
|
-
|
|
19
|
-
return null;
|
|
20
|
-
})()
|
|
21
|
-
`;
|
|
22
|
-
|
|
23
|
-
// Detect and auto-click human verification challenges (Google, Microsoft, Cloudflare)
|
|
24
|
-
const VERIFY_DETECT_JS = `
|
|
25
|
-
(function() {
|
|
26
|
-
var url = document.location.href;
|
|
27
|
-
|
|
28
|
-
// --- Google "sorry" page (hard CAPTCHA, can't auto-solve) ---
|
|
29
|
-
if (url.includes('/sorry/') || url.includes('sorry.google')) return 'sorry-page';
|
|
30
|
-
|
|
31
|
-
// --- Microsoft account verification page ---
|
|
32
|
-
if (url.includes('login.microsoftonline.com') || url.includes('login.live.com') || url.includes('account.microsoft.com')) {
|
|
33
|
-
// Look for "Verify" or "Continue" buttons on Microsoft auth pages
|
|
34
|
-
var msBtns = Array.from(document.querySelectorAll('button, input[type=submit], a'));
|
|
35
|
-
var msVerify = msBtns.find(b => /verify|continue|next/i.test(b.innerText?.trim() || b.value || ''));
|
|
36
|
-
if (msVerify) { msVerify.click(); return 'clicked-ms-verify:' + (msVerify.innerText?.trim() || msVerify.value); }
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// --- Bing Copilot / Microsoft "Verify you're human" interstitial ---
|
|
40
|
-
// Copilot sometimes shows a modal with "Continue" or "Verify" before allowing queries
|
|
41
|
-
if (url.includes('copilot.microsoft.com') || url.includes('bing.com/chat')) {
|
|
42
|
-
// Look for verification modal/dialog
|
|
43
|
-
var modal = document.querySelector('[role="dialog"], .b_modal, .bnp_hfly, [class*="verify"], [class*="challenge"]');
|
|
44
|
-
if (modal) {
|
|
45
|
-
// Find any actionable button in the modal
|
|
46
|
-
var modalBtns = Array.from(modal.querySelectorAll('button, a[role="button"], input[type="submit"]'));
|
|
47
|
-
var actionBtn = modalBtns.find(b => /^(continue|verify|submit|next|i agree|accept|got it)$/i.test(b.innerText?.trim() || b.value || ''));
|
|
48
|
-
if (actionBtn) { actionBtn.click(); return 'clicked-copilot-modal:' + actionBtn.innerText.trim(); }
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// Check for Turnstile iframe (Copilot uses Cloudflare Turnstile)
|
|
52
|
-
var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"], iframe[title*="challenge"], iframe[title*="Widget"]');
|
|
53
|
-
if (turnstileIframe) {
|
|
54
|
-
// Try clicking the iframe container or nearby checkbox
|
|
55
|
-
var container = turnstileIframe.closest('[class*="turnstile"], [class*="challenge"], [id*="turnstile"]') || turnstileIframe.parentElement;
|
|
56
|
-
if (container) {
|
|
57
|
-
var checkbox = container.querySelector('input[type="checkbox"]');
|
|
58
|
-
if (checkbox && !checkbox.checked) {
|
|
59
|
-
checkbox.click();
|
|
60
|
-
return 'clicked-turnstile-in-iframe';
|
|
61
|
-
}
|
|
62
|
-
// Click the container itself (Turnstile often captures clicks on parent)
|
|
63
|
-
container.click();
|
|
64
|
-
return 'clicked-turnstile-container-near-iframe';
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
// --- Cloudflare Turnstile (used by Copilot and many sites) ---
|
|
70
|
-
// Turnstile widget in iframe
|
|
71
|
-
var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"]');
|
|
72
|
-
if (turnstileIframe) {
|
|
73
|
-
// Try to find and click the checkbox inside the iframe's container
|
|
74
|
-
var turnstileCheckbox = document.querySelector('#cf-turnstile-response, [data-turnstile-callback] input, .cf-turnstile input[type="checkbox"]');
|
|
75
|
-
if (turnstileCheckbox && !turnstileCheckbox.checked) {
|
|
76
|
-
turnstileCheckbox.click();
|
|
77
|
-
return 'clicked-turnstile-checkbox';
|
|
78
|
-
}
|
|
79
|
-
// Try clicking the turnstile container itself (some implementations)
|
|
80
|
-
var turnstileContainer = document.querySelector('.cf-turnstile, [data-sitekey]');
|
|
81
|
-
if (turnstileContainer) {
|
|
82
|
-
turnstileContainer.click();
|
|
83
|
-
return 'clicked-turnstile-container';
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
// --- Cloudflare "Verify you are human" challenge page ---
|
|
88
|
-
if (url.includes('challenges.cloudflare.com') || document.querySelector('#challenge-running, #challenge-stage')) {
|
|
89
|
-
var cfCheckbox = document.querySelector('#cf-stage input[type="checkbox"], .ctp-checkbox-container input');
|
|
90
|
-
if (cfCheckbox) { cfCheckbox.click(); return 'clicked-cloudflare-checkbox'; }
|
|
91
|
-
var cfBtn = document.querySelector('#challenge-form button, .cf-challenge button');
|
|
92
|
-
if (cfBtn) { cfBtn.click(); return 'clicked-cloudflare-button'; }
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
// --- Microsoft "I am human" / "Verify" challenge ---
|
|
96
|
-
// Microsoft uses various verification UIs
|
|
97
|
-
var msHumanBtn = document.querySelector('button[id*="i0"], button[id*="id__"]');
|
|
98
|
-
if (msHumanBtn && /verify|human|robot|continue/i.test(msHumanBtn.innerText?.trim())) {
|
|
99
|
-
msHumanBtn.click();
|
|
100
|
-
return 'clicked-ms-human:' + msHumanBtn.innerText.trim();
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
// --- Generic verification buttons (catch-all) ---
|
|
104
|
-
var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
|
|
105
|
-
var verify = btns.find(b =>
|
|
106
|
-
|
|
107
|
-
verify.
|
|
108
|
-
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
var
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
const
|
|
161
|
-
(
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
if (
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
1
|
+
// consent.mjs — auto-dismiss common cookie/consent banners and human-verification pages
|
|
2
|
+
// Call dismissConsent(tab, cdpFn) after navigating to any page.
|
|
3
|
+
|
|
4
|
+
const CONSENT_JS = `
|
|
5
|
+
(function() {
|
|
6
|
+
// Google consent page (consent.google.com)
|
|
7
|
+
var g = document.querySelector('#L2AGLb, button[jsname="b3VHJd"], .tHlp8d');
|
|
8
|
+
if (g) { g.click(); return 'google'; }
|
|
9
|
+
|
|
10
|
+
// OneTrust (used by many sites including Stack Overflow)
|
|
11
|
+
var ot = document.querySelector('#onetrust-accept-btn-handler, .onetrust-accept-btn-handler');
|
|
12
|
+
if (ot) { ot.click(); return 'onetrust'; }
|
|
13
|
+
|
|
14
|
+
// Generic "accept all" / "agree" buttons
|
|
15
|
+
var btns = Array.from(document.querySelectorAll('button, a[role=button]'));
|
|
16
|
+
var accept = btns.find(b => /^(accept all|accept cookies|agree|i agree|got it|allow all|allow cookies)$/i.test(b.innerText?.trim()));
|
|
17
|
+
if (accept) { accept.click(); return 'generic:' + accept.innerText.trim(); }
|
|
18
|
+
|
|
19
|
+
return null;
|
|
20
|
+
})()
|
|
21
|
+
`;
|
|
22
|
+
|
|
23
|
+
// Detect and auto-click human verification challenges (Google, Microsoft, Cloudflare)
|
|
24
|
+
const VERIFY_DETECT_JS = `
|
|
25
|
+
(function() {
|
|
26
|
+
var url = document.location.href;
|
|
27
|
+
|
|
28
|
+
// --- Google "sorry" page (hard CAPTCHA, can't auto-solve) ---
|
|
29
|
+
if (url.includes('/sorry/') || url.includes('sorry.google')) return 'sorry-page';
|
|
30
|
+
|
|
31
|
+
// --- Microsoft account verification page ---
|
|
32
|
+
if (url.includes('login.microsoftonline.com') || url.includes('login.live.com') || url.includes('account.microsoft.com')) {
|
|
33
|
+
// Look for "Verify" or "Continue" buttons on Microsoft auth pages
|
|
34
|
+
var msBtns = Array.from(document.querySelectorAll('button, input[type=submit], a'));
|
|
35
|
+
var msVerify = msBtns.find(b => /verify|continue|next/i.test(b.innerText?.trim() || b.value || ''));
|
|
36
|
+
if (msVerify) { msVerify.click(); return 'clicked-ms-verify:' + (msVerify.innerText?.trim() || msVerify.value); }
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// --- Bing Copilot / Microsoft "Verify you're human" interstitial ---
|
|
40
|
+
// Copilot sometimes shows a modal with "Continue" or "Verify" before allowing queries
|
|
41
|
+
if (url.includes('copilot.microsoft.com') || url.includes('bing.com/chat')) {
|
|
42
|
+
// Look for verification modal/dialog
|
|
43
|
+
var modal = document.querySelector('[role="dialog"], .b_modal, .bnp_hfly, [class*="verify"], [class*="challenge"]');
|
|
44
|
+
if (modal) {
|
|
45
|
+
// Find any actionable button in the modal
|
|
46
|
+
var modalBtns = Array.from(modal.querySelectorAll('button, a[role="button"], input[type="submit"]'));
|
|
47
|
+
var actionBtn = modalBtns.find(b => /^(continue|verify|submit|next|i agree|accept|got it)$/i.test(b.innerText?.trim() || b.value || ''));
|
|
48
|
+
if (actionBtn) { actionBtn.click(); return 'clicked-copilot-modal:' + actionBtn.innerText.trim(); }
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Check for Turnstile iframe (Copilot uses Cloudflare Turnstile)
|
|
52
|
+
var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"], iframe[title*="challenge"], iframe[title*="Widget"]');
|
|
53
|
+
if (turnstileIframe) {
|
|
54
|
+
// Try clicking the iframe container or nearby checkbox
|
|
55
|
+
var container = turnstileIframe.closest('[class*="turnstile"], [class*="challenge"], [id*="turnstile"]') || turnstileIframe.parentElement;
|
|
56
|
+
if (container) {
|
|
57
|
+
var checkbox = container.querySelector('input[type="checkbox"]');
|
|
58
|
+
if (checkbox && !checkbox.checked) {
|
|
59
|
+
checkbox.click();
|
|
60
|
+
return 'clicked-turnstile-in-iframe';
|
|
61
|
+
}
|
|
62
|
+
// Click the container itself (Turnstile often captures clicks on parent)
|
|
63
|
+
container.click();
|
|
64
|
+
return 'clicked-turnstile-container-near-iframe';
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// --- Cloudflare Turnstile (used by Copilot and many sites) ---
|
|
70
|
+
// Turnstile widget in iframe
|
|
71
|
+
var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"]');
|
|
72
|
+
if (turnstileIframe) {
|
|
73
|
+
// Try to find and click the checkbox inside the iframe's container
|
|
74
|
+
var turnstileCheckbox = document.querySelector('#cf-turnstile-response, [data-turnstile-callback] input, .cf-turnstile input[type="checkbox"]');
|
|
75
|
+
if (turnstileCheckbox && !turnstileCheckbox.checked) {
|
|
76
|
+
turnstileCheckbox.click();
|
|
77
|
+
return 'clicked-turnstile-checkbox';
|
|
78
|
+
}
|
|
79
|
+
// Try clicking the turnstile container itself (some implementations)
|
|
80
|
+
var turnstileContainer = document.querySelector('.cf-turnstile, [data-sitekey]');
|
|
81
|
+
if (turnstileContainer) {
|
|
82
|
+
turnstileContainer.click();
|
|
83
|
+
return 'clicked-turnstile-container';
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// --- Cloudflare "Verify you are human" challenge page ---
|
|
88
|
+
if (url.includes('challenges.cloudflare.com') || document.querySelector('#challenge-running, #challenge-stage')) {
|
|
89
|
+
var cfCheckbox = document.querySelector('#cf-stage input[type="checkbox"], .ctp-checkbox-container input');
|
|
90
|
+
if (cfCheckbox) { cfCheckbox.click(); return 'clicked-cloudflare-checkbox'; }
|
|
91
|
+
var cfBtn = document.querySelector('#challenge-form button, .cf-challenge button');
|
|
92
|
+
if (cfBtn) { cfBtn.click(); return 'clicked-cloudflare-button'; }
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// --- Microsoft "I am human" / "Verify" challenge ---
|
|
96
|
+
// Microsoft uses various verification UIs
|
|
97
|
+
var msHumanBtn = document.querySelector('button[id*="i0"], button[id*="id__"]');
|
|
98
|
+
if (msHumanBtn && /verify|human|robot|continue/i.test(msHumanBtn.innerText?.trim())) {
|
|
99
|
+
msHumanBtn.click();
|
|
100
|
+
return 'clicked-ms-human:' + msHumanBtn.innerText.trim();
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// --- Generic verification buttons (catch-all) ---
|
|
104
|
+
var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
|
|
105
|
+
var verify = btns.find(b => {
|
|
106
|
+
var t = (b.innerText?.trim() || b.value || '').toLowerCase();
|
|
107
|
+
return (t.includes('verify') || t.includes('human') || t.includes('robot') || t.includes('continue') || t.includes('proceed')) &&
|
|
108
|
+
!t.includes('verified') && !document.querySelector('iframe[src*="recaptcha"]');
|
|
109
|
+
});
|
|
110
|
+
if (verify) {
|
|
111
|
+
verify.click();
|
|
112
|
+
return 'clicked-verify:' + (verify.innerText?.trim() || verify.value);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// --- Google reCAPTCHA (no image challenge, just checkbox) ---
|
|
116
|
+
var recaptchaCheckbox = document.querySelector('.recaptcha-checkbox-unchecked, input[type=checkbox][id*="recaptcha"]');
|
|
117
|
+
if (recaptchaCheckbox) { recaptchaCheckbox.click(); return 'clicked-recaptcha'; }
|
|
118
|
+
|
|
119
|
+
return null;
|
|
120
|
+
})()
|
|
121
|
+
`;
|
|
122
|
+
|
|
123
|
+
// Retry loop for verification — keeps checking and clicking until page changes or timeout
|
|
124
|
+
const VERIFY_RETRY_JS = `
|
|
125
|
+
(function() {
|
|
126
|
+
var url = document.location.href;
|
|
127
|
+
|
|
128
|
+
// Check if we're still on a verification page
|
|
129
|
+
var isVerifyPage = url.includes('/sorry/') ||
|
|
130
|
+
url.includes('challenges.cloudflare.com') ||
|
|
131
|
+
url.includes('login.microsoftonline.com') ||
|
|
132
|
+
document.querySelector('#challenge-running, #challenge-stage, .cf-turnstile, [role="dialog"]');
|
|
133
|
+
|
|
134
|
+
if (!isVerifyPage) return 'cleared';
|
|
135
|
+
|
|
136
|
+
// Try clicking any verify/continue button again
|
|
137
|
+
var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
|
|
138
|
+
var btn = btns.find(b => {
|
|
139
|
+
var t = (b.innerText?.trim() || b.value || '').toLowerCase();
|
|
140
|
+
return t.includes('verify') || t.includes('human') || t.includes('robot') || t.includes('continue') || t.includes('next') || t.includes('submit');
|
|
141
|
+
});
|
|
142
|
+
if (btn) { btn.click(); return 'clicked:' + (btn.innerText?.trim() || btn.value); }
|
|
143
|
+
|
|
144
|
+
// Try Turnstile checkbox
|
|
145
|
+
var cf = document.querySelector('#cf-stage input[type="checkbox"], .cf-turnstile input');
|
|
146
|
+
if (cf && !cf.checked) { cf.click(); return 'clicked-turnstile'; }
|
|
147
|
+
|
|
148
|
+
// Check for modal dialog with continue button (Copilot interstitial)
|
|
149
|
+
var modal = document.querySelector('[role="dialog"], .b_modal, [class*="verify"]');
|
|
150
|
+
if (modal) {
|
|
151
|
+
var modalBtn = modal.querySelector('button, a[role="button"]');
|
|
152
|
+
if (modalBtn) { modalBtn.click(); return 'clicked-modal-btn:' + modalBtn.innerText.trim(); }
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return 'still-verifying';
|
|
156
|
+
})()
|
|
157
|
+
`;
|
|
158
|
+
|
|
159
|
+
export async function dismissConsent(tab, cdp) {
|
|
160
|
+
const result = await cdp(['eval', tab, CONSENT_JS]).catch(() => null);
|
|
161
|
+
if (result && result !== 'null') {
|
|
162
|
+
await new Promise(r => setTimeout(r, 1500));
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Get iframe bounding box for coordinate-based clicking (for cross-origin Turnstile)
|
|
167
|
+
const GET_IFRAME_CENTER_JS = `
|
|
168
|
+
(function() {
|
|
169
|
+
var iframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"], iframe[title*="challenge"], iframe[title*="Widget"]');
|
|
170
|
+
if (!iframe) return null;
|
|
171
|
+
var rect = iframe.getBoundingClientRect();
|
|
172
|
+
// Click near the center-left where the checkbox usually is
|
|
173
|
+
return JSON.stringify({ x: rect.left + 30, y: rect.top + rect.height / 2 });
|
|
174
|
+
})()
|
|
175
|
+
`;
|
|
176
|
+
|
|
177
|
+
// Returns 'clear' | 'clicked' | 'needs-human'
|
|
178
|
+
export async function handleVerification(tab, cdp, waitMs = 60000) {
|
|
179
|
+
const result = await cdp(['eval', tab, VERIFY_DETECT_JS]).catch(() => null);
|
|
180
|
+
|
|
181
|
+
if (!result || result === 'null') return 'clear';
|
|
182
|
+
|
|
183
|
+
// Hard CAPTCHA page — wait for user to solve it manually
|
|
184
|
+
if (result === 'sorry-page') {
|
|
185
|
+
process.stderr.write(`[greedysearch] Google CAPTCHA detected — please solve it in the browser window (waiting up to ${Math.floor(waitMs / 1000)}s)...\n`);
|
|
186
|
+
const deadline = Date.now() + waitMs;
|
|
187
|
+
while (Date.now() < deadline) {
|
|
188
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
189
|
+
const url = await cdp(['eval', tab, 'document.location.href']).catch(() => '');
|
|
190
|
+
if (!url.includes('/sorry/')) return 'cleared-by-user';
|
|
191
|
+
}
|
|
192
|
+
return 'needs-human';
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// We clicked something — wait for page to update, then keep retrying
|
|
196
|
+
if (result.startsWith('clicked-')) {
|
|
197
|
+
process.stderr.write(`[greedysearch] Clicked verification: ${result}\n`);
|
|
198
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
199
|
+
|
|
200
|
+
// Keep checking if verification cleared, retry clicking for up to waitMs
|
|
201
|
+
const deadline = Date.now() + waitMs;
|
|
202
|
+
while (Date.now() < deadline) {
|
|
203
|
+
const retryResult = await cdp(['eval', tab, VERIFY_RETRY_JS]).catch(() => null);
|
|
204
|
+
|
|
205
|
+
if (retryResult === 'cleared' || !retryResult || retryResult === 'null') {
|
|
206
|
+
process.stderr.write(`[greedysearch] Verification cleared.\n`);
|
|
207
|
+
await new Promise(r => setTimeout(r, 1000));
|
|
208
|
+
return 'clicked';
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (retryResult.startsWith('clicked:')) {
|
|
212
|
+
process.stderr.write(`[greedysearch] Retrying verification click...\n`);
|
|
213
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// If verification is stuck, try clicking the Turnstile iframe by coordinates
|
|
217
|
+
const iframeCenter = await cdp(['eval', tab, GET_IFRAME_CENTER_JS]).catch(() => null);
|
|
218
|
+
if (iframeCenter && iframeCenter !== 'null') {
|
|
219
|
+
try {
|
|
220
|
+
const { x, y } = JSON.parse(iframeCenter);
|
|
221
|
+
process.stderr.write(`[greedysearch] Trying coordinate click on Turnstile iframe at (${x}, ${y})...\n`);
|
|
222
|
+
await cdp(['clickxy', tab, String(x), String(y)]);
|
|
223
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
224
|
+
} catch {}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
await new Promise(r => setTimeout(r, 1500));
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Still stuck — might need user intervention
|
|
231
|
+
process.stderr.write(`[greedysearch] Verification may require manual intervention.\n`);
|
|
232
|
+
return 'needs-human';
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Detection didn't find anything initially, but check for Turnstile iframe with coordinates
|
|
236
|
+
if (result === 'null' || !result) {
|
|
237
|
+
const iframeCenter = await cdp(['eval', tab, GET_IFRAME_CENTER_JS]).catch(() => null);
|
|
238
|
+
if (iframeCenter && iframeCenter !== 'null') {
|
|
239
|
+
process.stderr.write(`[greedysearch] Found Turnstile iframe, attempting coordinate click...\n`);
|
|
240
|
+
try {
|
|
241
|
+
const { x, y } = JSON.parse(iframeCenter);
|
|
242
|
+
await cdp(['clickxy', tab, String(x), String(y)]);
|
|
243
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
244
|
+
|
|
245
|
+
// Check if it worked
|
|
246
|
+
const cleared = await cdp(['eval', tab, VERIFY_RETRY_JS]).catch(() => null);
|
|
247
|
+
if (cleared === 'cleared' || cleared === 'null') {
|
|
248
|
+
return 'clicked';
|
|
249
|
+
}
|
|
250
|
+
} catch {}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
return 'clear';
|
|
255
|
+
}
|
package/extractors/gemini.mjs
CHANGED
|
@@ -44,13 +44,14 @@ function cdp(args, timeoutMs = 30000) {
|
|
|
44
44
|
|
|
45
45
|
async function getOrOpenTab(tabPrefix) {
|
|
46
46
|
if (tabPrefix) return tabPrefix;
|
|
47
|
-
|
|
48
|
-
const pages = JSON.parse(readFileSync(PAGES_CACHE, 'utf8'));
|
|
49
|
-
const existing = pages.find(p => p.url.includes('gemini.google.com'));
|
|
50
|
-
if (existing) return existing.targetId.slice(0, 8);
|
|
51
|
-
}
|
|
47
|
+
// Always open a fresh tab to avoid SPA navigation issues
|
|
52
48
|
const list = await cdp(['list']);
|
|
53
|
-
|
|
49
|
+
const anchor = list.split('\n')[0]?.slice(0, 8);
|
|
50
|
+
if (!anchor) throw new Error('No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?');
|
|
51
|
+
const raw = await cdp(['evalraw', anchor, 'Target.createTarget', '{"url":"about:blank"}']);
|
|
52
|
+
const { targetId } = JSON.parse(raw);
|
|
53
|
+
await cdp(['list']); // refresh cache
|
|
54
|
+
return targetId.slice(0, 8);
|
|
54
55
|
}
|
|
55
56
|
|
|
56
57
|
async function typeIntoGemini(tab, text) {
|
|
@@ -111,53 +112,11 @@ async function extractAnswer(tab) {
|
|
|
111
112
|
const answer = await cdp(['eval', tab, `window.__geminiClipboard || ''`]);
|
|
112
113
|
if (!answer) throw new Error('Clipboard interceptor returned empty text');
|
|
113
114
|
|
|
114
|
-
//
|
|
115
|
-
const
|
|
116
|
-
|
|
117
|
-
(
|
|
118
|
-
|
|
119
|
-
if (!btn) btn = Array.from(document.querySelectorAll('button')).find(b => b.innerText?.trim() === 'Sources');
|
|
120
|
-
if (btn) { btn.click(); return 'clicked'; }
|
|
121
|
-
return 'not-found';
|
|
122
|
-
})()
|
|
123
|
-
`]).catch(() => 'not-found');
|
|
124
|
-
|
|
125
|
-
// Wait for the sources sidebar to populate
|
|
126
|
-
await new Promise(r => setTimeout(r, 1500));
|
|
127
|
-
|
|
128
|
-
// Extract sources from the sidebar panel (has proper URLs + titles)
|
|
129
|
-
const raw = await cdp(['eval', tab, `
|
|
130
|
-
(function() {
|
|
131
|
-
// Find the Sources sidebar container by heading
|
|
132
|
-
var headings = Array.from(document.querySelectorAll('h1, h2, h3, [class*="header"]'));
|
|
133
|
-
var sourceHeading = headings.find(h => h.innerText?.trim() === 'Sources');
|
|
134
|
-
if (sourceHeading) {
|
|
135
|
-
var container = sourceHeading.closest('.container') || sourceHeading.parentElement;
|
|
136
|
-
var links = Array.from(container.querySelectorAll('a[href^="http"]'))
|
|
137
|
-
.map(a => ({ url: a.href.split('#')[0], title: a.innerText?.trim().split('\\n')[0] || '' }))
|
|
138
|
-
.filter(s => s.url && ${sourceExcludeFilter})
|
|
139
|
-
.filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
|
|
140
|
-
.slice(0, 8);
|
|
141
|
-
return JSON.stringify(links);
|
|
142
|
-
}
|
|
143
|
-
// Fallback: inline source cards with aria-labels
|
|
144
|
-
var cards = Array.from(document.querySelectorAll('${S.citationButtonPattern}'));
|
|
145
|
-
if (cards.length) {
|
|
146
|
-
return JSON.stringify(cards.map(b => {
|
|
147
|
-
var label = b.getAttribute('aria-label') || '';
|
|
148
|
-
var name = label.match(${S.citationNameRegex})?.[1] || label;
|
|
149
|
-
return { url: '', title: name };
|
|
150
|
-
}));
|
|
151
|
-
}
|
|
152
|
-
// Last resort: page-wide links (may include footer junk)
|
|
153
|
-
return JSON.stringify(Array.from(document.querySelectorAll('a[href^="http"]'))
|
|
154
|
-
.map(a => ({ url: a.href.split('#')[0], title: a.innerText?.trim().split('\\n')[0] || '' }))
|
|
155
|
-
.filter(s => s.url && !s.url.includes('gemini.google') && !s.url.includes('gstatic') && !s.url.includes('google.com/search'))
|
|
156
|
-
.filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
|
|
157
|
-
.slice(0, 8));
|
|
158
|
-
})()
|
|
159
|
-
`]).catch(() => '[]');
|
|
160
|
-
const sources = JSON.parse(raw);
|
|
115
|
+
// Regex parse Markdown links from clipboard — robust against DOM changes
|
|
116
|
+
const sources = Array.from(answer.matchAll(/\[([^\]]+)\]\((https?:\/\/[^\s\)]+)\)/g))
|
|
117
|
+
.map(m => ({ title: m[1], url: m[2] }))
|
|
118
|
+
.filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
|
|
119
|
+
.slice(0, 10);
|
|
161
120
|
|
|
162
121
|
return { answer: answer.trim(), sources };
|
|
163
122
|
}
|