@apmantza/greedysearch-pi 1.0.21 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -136,21 +136,44 @@ async function main() {
136
136
 
137
137
  // Navigate to Copilot homepage and use the chat input
138
138
  await cdp(['nav', tab, 'https://copilot.microsoft.com/'], 35000);
139
- await new Promise(r => setTimeout(r, 1500));
139
+ await new Promise(r => setTimeout(r, 2000));
140
140
  await dismissConsent(tab, cdp);
141
- await handleVerification(tab, cdp, 60000);
141
+
142
+ // Handle verification challenges (Cloudflare Turnstile, Microsoft auth, etc.)
143
+ const verifyResult = await handleVerification(tab, cdp, 90000);
144
+ if (verifyResult === 'needs-human') {
145
+ throw new Error('Copilot verification required — please solve it manually in the browser window');
146
+ }
147
+
148
+ // After verification, page may have redirected or reloaded — wait for it to settle
149
+ if (verifyResult === 'clicked') {
150
+ await new Promise(r => setTimeout(r, 3000));
151
+
152
+ // Re-navigate if we got redirected
153
+ const currentUrl = await cdp(['eval', tab, 'document.location.href']).catch(() => '');
154
+ if (!currentUrl.includes('copilot.microsoft.com')) {
155
+ await cdp(['nav', tab, 'https://copilot.microsoft.com/'], 35000);
156
+ await new Promise(r => setTimeout(r, 2000));
157
+ await dismissConsent(tab, cdp);
158
+ }
159
+ }
142
160
 
143
- // Wait for React app to mount #userInput (up to 8s)
144
- const deadline = Date.now() + 8000;
145
- while (Date.now() < deadline) {
161
+ // Wait for React app to mount #userInput (up to 15s, longer after verification)
162
+ const inputDeadline = Date.now() + 15000;
163
+ while (Date.now() < inputDeadline) {
146
164
  const found = await cdp(['eval', tab, `!!document.querySelector('#userInput')`]).catch(() => 'false');
147
165
  if (found === 'true') break;
148
- await new Promise(r => setTimeout(r, 400));
166
+ await new Promise(r => setTimeout(r, 500));
149
167
  }
150
168
  await new Promise(r => setTimeout(r, 300));
151
169
 
170
+ // Verify input is actually there before proceeding
171
+ const inputReady = await cdp(['eval', tab, `!!document.querySelector('#userInput')`]).catch(() => 'false');
172
+ if (inputReady !== 'true') {
173
+ throw new Error('Copilot input not found — verification may have failed or page is in unexpected state');
174
+ }
175
+
152
176
  await injectClipboardInterceptor(tab);
153
- // Find input and type query
154
177
  await cdp(['click', tab, '#userInput']);
155
178
  await new Promise(r => setTimeout(r, 400));
156
179
  await cdp(['type', tab, query]);
@@ -20,28 +20,98 @@ const CONSENT_JS = `
20
20
  })()
21
21
  `;
22
22
 
23
- // Detect Google's "verify you're human" / unusual traffic page
23
+ // Detect and auto-click human verification challenges (Google, Microsoft, Cloudflare)
24
24
  const VERIFY_DETECT_JS = `
25
25
  (function() {
26
26
  var url = document.location.href;
27
+
28
+ // --- Google "sorry" page (hard CAPTCHA, can't auto-solve) ---
27
29
  if (url.includes('/sorry/') || url.includes('sorry.google')) return 'sorry-page';
28
-
29
- // Simple click-through verify button (not image CAPTCHA)
30
+
31
+ // --- Microsoft account verification page ---
32
+ if (url.includes('login.microsoftonline.com') || url.includes('login.live.com') || url.includes('account.microsoft.com')) {
33
+ // Look for "Verify" or "Continue" buttons on Microsoft auth pages
34
+ var msBtns = Array.from(document.querySelectorAll('button, input[type=submit], a'));
35
+ var msVerify = msBtns.find(b => /verify|continue|next/i.test(b.innerText?.trim() || b.value || ''));
36
+ if (msVerify) { msVerify.click(); return 'clicked-ms-verify:' + (msVerify.innerText?.trim() || msVerify.value); }
37
+ }
38
+
39
+ // --- Cloudflare Turnstile (used by Copilot and many sites) ---
40
+ // Turnstile widget in iframe
41
+ var turnstileIframe = document.querySelector('iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"]');
42
+ if (turnstileIframe) {
43
+ // Try to find and click the checkbox inside the iframe's container
44
+ var turnstileCheckbox = document.querySelector('#cf-turnstile-response, [data-turnstile-callback] input, .cf-turnstile input[type=checkbox]');
45
+ if (turnstileCheckbox && !turnstileCheckbox.checked) {
46
+ turnstileCheckbox.click();
47
+ return 'clicked-turnstile-checkbox';
48
+ }
49
+ // Try clicking the turnstile container itself (some implementations)
50
+ var turnstileContainer = document.querySelector('.cf-turnstile, [data-sitekey]');
51
+ if (turnstileContainer) {
52
+ turnstileContainer.click();
53
+ return 'clicked-turnstile-container';
54
+ }
55
+ }
56
+
57
+ // --- Cloudflare "Verify you are human" challenge page ---
58
+ if (url.includes('challenges.cloudflare.com') || document.querySelector('#challenge-running, #challenge-stage')) {
59
+ var cfCheckbox = document.querySelector('#cf-stage input[type="checkbox"], .ctp-checkbox-container input');
60
+ if (cfCheckbox) { cfCheckbox.click(); return 'clicked-cloudflare-checkbox'; }
61
+ var cfBtn = document.querySelector('#challenge-form button, .cf-challenge button');
62
+ if (cfBtn) { cfBtn.click(); return 'clicked-cloudflare-button'; }
63
+ }
64
+
65
+ // --- Microsoft "I am human" / "Verify" challenge ---
66
+ // Microsoft uses various verification UIs
67
+ var msHumanBtn = document.querySelector('button[id*="i0"], button[id*="id__"]');
68
+ if (msHumanBtn && /verify|human|robot|continue/i.test(msHumanBtn.innerText?.trim())) {
69
+ msHumanBtn.click();
70
+ return 'clicked-ms-human:' + msHumanBtn.innerText.trim();
71
+ }
72
+
73
+ // --- Generic verification buttons (catch-all) ---
30
74
  var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
31
- var verify = btns.find(b => /verify|human|not a robot|continue/i.test(b.innerText?.trim() || b.value || ''));
75
+ var verify = btns.find(b => /^(verify|verification|verify you are human|i am human|not a robot|continue|proceed)$/i.test(b.innerText?.trim() || b.value || ''));
32
76
  if (verify && !document.querySelector('iframe[src*="recaptcha"]')) {
33
77
  verify.click();
34
78
  return 'clicked-verify:' + (verify.innerText?.trim() || verify.value);
35
79
  }
36
80
 
37
- // Unchecked reCAPTCHA / Turnstile checkbox (no image challenge)
38
- var checkbox = document.querySelector('.recaptcha-checkbox-unchecked, input[type=checkbox][id*="recaptcha"], #cf-stage input[type=checkbox]');
39
- if (checkbox) { checkbox.click(); return 'clicked-checkbox'; }
81
+ // --- Google reCAPTCHA (no image challenge, just checkbox) ---
82
+ var recaptchaCheckbox = document.querySelector('.recaptcha-checkbox-unchecked, input[type=checkbox][id*="recaptcha"]');
83
+ if (recaptchaCheckbox) { recaptchaCheckbox.click(); return 'clicked-recaptcha'; }
40
84
 
41
85
  return null;
42
86
  })()
43
87
  `;
44
88
 
89
+ // Retry loop for verification — keeps checking and clicking until page changes or timeout
90
+ const VERIFY_RETRY_JS = `
91
+ (function() {
92
+ var url = document.location.href;
93
+
94
+ // Check if we're still on a verification page
95
+ var isVerifyPage = url.includes('/sorry/') ||
96
+ url.includes('challenges.cloudflare.com') ||
97
+ url.includes('login.microsoftonline.com') ||
98
+ document.querySelector('#challenge-running, #challenge-stage, .cf-turnstile');
99
+
100
+ if (!isVerifyPage) return 'cleared';
101
+
102
+ // Try clicking any verify/continue button again
103
+ var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
104
+ var btn = btns.find(b => /^(verify|continue|next|i am human|not a robot)$/i.test(b.innerText?.trim() || b.value || ''));
105
+ if (btn) { btn.click(); return 'clicked:' + (btn.innerText?.trim() || btn.value); }
106
+
107
+ // Try Turnstile checkbox
108
+ var cf = document.querySelector('#cf-stage input[type="checkbox"], .cf-turnstile input');
109
+ if (cf && !cf.checked) { cf.click(); return 'clicked-turnstile'; }
110
+
111
+ return 'still-verifying';
112
+ })()
113
+ `;
114
+
45
115
  export async function dismissConsent(tab, cdp) {
46
116
  const result = await cdp(['eval', tab, CONSENT_JS]).catch(() => null);
47
117
  if (result && result !== 'null') {
@@ -55,9 +125,9 @@ export async function handleVerification(tab, cdp, waitMs = 60000) {
55
125
 
56
126
  if (!result || result === 'null') return 'clear';
57
127
 
128
+ // Hard CAPTCHA page — wait for user to solve it manually
58
129
  if (result === 'sorry-page') {
59
- // Hard CAPTCHA pagewait for user to solve it manually
60
- process.stderr.write(`[greedysearch] Google verification required — please solve it in the browser window (waiting up to ${waitMs / 1000}s)...\n`);
130
+ process.stderr.write(`[greedysearch] Google CAPTCHA detectedplease solve it in the browser window (waiting up to ${Math.floor(waitMs / 1000)}s)...\n`);
61
131
  const deadline = Date.now() + waitMs;
62
132
  while (Date.now() < deadline) {
63
133
  await new Promise(r => setTimeout(r, 2000));
@@ -67,9 +137,33 @@ export async function handleVerification(tab, cdp, waitMs = 60000) {
67
137
  return 'needs-human';
68
138
  }
69
139
 
140
+ // We clicked something — wait for page to update, then keep retrying
70
141
  if (result.startsWith('clicked-')) {
142
+ process.stderr.write(`[greedysearch] Clicked verification: ${result}\n`);
71
143
  await new Promise(r => setTimeout(r, 2000));
72
- return 'clicked';
144
+
145
+ // Keep checking if verification cleared, retry clicking for up to waitMs
146
+ const deadline = Date.now() + waitMs;
147
+ while (Date.now() < deadline) {
148
+ const retryResult = await cdp(['eval', tab, VERIFY_RETRY_JS]).catch(() => null);
149
+
150
+ if (retryResult === 'cleared' || !retryResult || retryResult === 'null') {
151
+ process.stderr.write(`[greedysearch] Verification cleared.\n`);
152
+ await new Promise(r => setTimeout(r, 1000));
153
+ return 'clicked';
154
+ }
155
+
156
+ if (retryResult.startsWith('clicked:')) {
157
+ process.stderr.write(`[greedysearch] Retrying verification click...\n`);
158
+ await new Promise(r => setTimeout(r, 2000));
159
+ }
160
+
161
+ await new Promise(r => setTimeout(r, 1500));
162
+ }
163
+
164
+ // Still stuck — might need user intervention
165
+ process.stderr.write(`[greedysearch] Verification may require manual intervention.\n`);
166
+ return 'needs-human';
73
167
  }
74
168
 
75
169
  return 'clear';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@apmantza/greedysearch-pi",
3
- "version": "1.0.21",
3
+ "version": "1.0.23",
4
4
  "description": "Pi extension: search Perplexity, Bing Copilot, and Google AI in parallel with optional Gemini synthesis — grounded AI answers, not just links",
5
5
  "type": "module",
6
6
  "keywords": [
package/search.mjs CHANGED
@@ -174,6 +174,80 @@ function pickTopSource(out) {
174
174
  return null;
175
175
  }
176
176
 
177
+ function deduplicateSources(out) {
178
+ const seen = new Map(); // url -> { title, engines }
179
+ const engineOrder = ['perplexity', 'bing', 'google'];
180
+
181
+ for (const engine of engineOrder) {
182
+ const r = out[engine];
183
+ if (!r?.sources) continue;
184
+ for (const s of r.sources) {
185
+ const url = s.url?.split('#')[0]?.replace(/\/$/, '');
186
+ if (!url || url.length < 10) continue;
187
+ if (!seen.has(url)) {
188
+ seen.set(url, { url: s.url, title: s.title || '', engines: [engine] });
189
+ } else {
190
+ const existing = seen.get(url);
191
+ if (!existing.engines.includes(engine)) {
192
+ existing.engines.push(engine);
193
+ }
194
+ if (!existing.title && s.title) existing.title = s.title;
195
+ }
196
+ }
197
+ }
198
+
199
+ // Sort by consensus (most engines = highest confidence)
200
+ return Array.from(seen.values())
201
+ .sort((a, b) => b.engines.length - a.engines.length)
202
+ .slice(0, 10);
203
+ }
204
+
205
+ async function synthesizeWithGemini(query, results) {
206
+ // Build a prompt that includes all engine results
207
+ const sources = deduplicateSources(results);
208
+
209
+ let prompt = `Based on the following search results from multiple AI engines, provide a single, synthesized answer to the user's question. Combine the information, resolve any conflicts, and present the most accurate and complete answer.\n\n`;
210
+ prompt += `User's question: "${query}"\n\n`;
211
+
212
+ for (const engine of ['perplexity', 'bing', 'google']) {
213
+ const r = results[engine];
214
+ if (r?.error) {
215
+ prompt += `## ${engine} (failed)\nError: ${r.error}\n\n`;
216
+ } else if (r?.answer) {
217
+ prompt += `## ${engine}\n${r.answer.slice(0, 2000)}\n\n`;
218
+ }
219
+ }
220
+
221
+ prompt += `Provide a synthesized answer that:\n`;
222
+ prompt += `1. Combines the best information from all sources\n`;
223
+ prompt += `2. Notes where sources agree or disagree\n`;
224
+ prompt += `3. Is clear and well-structured\n`;
225
+ prompt += `4. Includes key sources at the end\n`;
226
+
227
+ // Run the query through Gemini extractor
228
+ return new Promise((resolve, reject) => {
229
+ const proc = spawn('node', [join(__dir, 'extractors', 'gemini.mjs'), prompt, '--short'], {
230
+ stdio: ['ignore', 'pipe', 'pipe'],
231
+ });
232
+ let out = '';
233
+ let err = '';
234
+ proc.stdout.on('data', d => out += d);
235
+ proc.stderr.on('data', d => err += d);
236
+ const t = setTimeout(() => {
237
+ proc.kill();
238
+ reject(new Error('Gemini synthesis timed out after 120s'));
239
+ }, 120000);
240
+ proc.on('close', code => {
241
+ clearTimeout(t);
242
+ if (code !== 0) reject(new Error(err.trim() || 'gemini extractor failed'));
243
+ else {
244
+ try { resolve(JSON.parse(out.trim())); }
245
+ catch { reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`)); }
246
+ }
247
+ });
248
+ });
249
+ }
250
+
177
251
  function writeOutput(data, outFile) {
178
252
  const json = JSON.stringify(data, null, 2) + '\n';
179
253
  if (outFile) {
@@ -257,12 +331,14 @@ async function main() {
257
331
  const full = args.includes('--full');
258
332
  const short = !full; // brief by default; --full opts into complete answers
259
333
  const fetchSource = args.includes('--fetch-top-source');
334
+ const synthesize = args.includes('--synthesize');
260
335
  const outIdx = args.indexOf('--out');
261
336
  const outFile = outIdx !== -1 ? args[outIdx + 1] : null;
262
337
  const rest = args.filter((a, i) =>
263
338
  a !== '--full' &&
264
339
  a !== '--short' && // keep accepting --short for back-compat
265
340
  a !== '--fetch-top-source' &&
341
+ a !== '--synthesize' &&
266
342
  a !== '--out' &&
267
343
  (outIdx === -1 || i !== outIdx + 1)
268
344
  );
@@ -310,6 +386,25 @@ async function main() {
310
386
  }
311
387
  }
312
388
 
389
+ // Deduplicate sources across all engines
390
+ out._sources = deduplicateSources(out);
391
+
392
+ // Synthesize with Gemini if requested
393
+ if (synthesize) {
394
+ process.stderr.write('[greedysearch] Synthesizing results with Gemini...\n');
395
+ try {
396
+ const synthesis = await synthesizeWithGemini(query, out);
397
+ out._synthesis = {
398
+ answer: synthesis.answer || '',
399
+ sources: synthesis.sources || [],
400
+ synthesized: true,
401
+ };
402
+ } catch (e) {
403
+ process.stderr.write(`[greedysearch] Synthesis failed: ${e.message}\n`);
404
+ out._synthesis = { error: e.message, synthesized: false };
405
+ }
406
+ }
407
+
313
408
  if (fetchSource) {
314
409
  const top = pickTopSource(out);
315
410
  if (top) out._topSource = await fetchTopSource(top.url);