@apmantza/greedysearch-pi 1.0.19 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -5
- package/extractors/bing-copilot.mjs +176 -176
- package/extractors/consent.mjs +76 -76
- package/extractors/google-ai.mjs +161 -161
- package/extractors/mistral.mjs +171 -171
- package/extractors/perplexity.mjs +179 -179
- package/extractors/stackoverflow-ai.mjs +169 -169
- package/index.ts +34 -7
- package/package.json +2 -2
- package/search.mjs +340 -340
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# GreedySearch for Pi
|
|
2
2
|
|
|
3
|
-
Pi extension that adds a `greedy_search` tool — fans out queries to Perplexity, Bing Copilot, and Google AI simultaneously and returns synthesized AI answers.
|
|
3
|
+
Pi extension that adds a `greedy_search` tool — fans out queries to Perplexity, Bing Copilot, and Google AI simultaneously and returns synthesized AI answers. Gemini can act as an optional synthesizer via `synthesize: true`, deduplicating sources across engines and returning a single grounded answer.
|
|
4
4
|
|
|
5
5
|
Forked from [GreedySearch-claude](https://github.com/apmantza/GreedySearch-claude).
|
|
6
6
|
|
|
@@ -26,11 +26,22 @@ You can also invoke it directly:
|
|
|
26
26
|
greedy_search({ query: "best way to handle auth in Next.js 15", engine: "all" })
|
|
27
27
|
```
|
|
28
28
|
|
|
29
|
+
With Gemini synthesis:
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
greedy_search({ query: "best way to handle auth in Next.js 15", engine: "all", synthesize: true })
|
|
33
|
+
```
|
|
34
|
+
|
|
29
35
|
**Engines:**
|
|
30
36
|
- `all` — fan-out to all three in parallel (default, highest confidence)
|
|
31
|
-
- `perplexity` — best for technical Q&A
|
|
32
|
-
- `bing` — best for recent news and Microsoft ecosystem
|
|
33
|
-
- `google` — best for broad coverage
|
|
37
|
+
- `perplexity` / `p` — best for technical Q&A
|
|
38
|
+
- `bing` / `b` — best for recent news and Microsoft ecosystem
|
|
39
|
+
- `google` / `g` — best for broad coverage
|
|
40
|
+
- `gemini` / `gem` — Gemini standalone query
|
|
41
|
+
|
|
42
|
+
**`synthesize: true`**
|
|
43
|
+
|
|
44
|
+
Deduplicates sources across engines by consensus, feeds them to Gemini, and returns a single grounded answer instead of three separate responses. Adds ~30s to the request but reduces downstream token usage when passing results to a model.
|
|
34
45
|
|
|
35
46
|
## Requirements
|
|
36
47
|
|
|
@@ -56,4 +67,4 @@ node ~/.pi/agent/git/GreedySearch-pi/launch.mjs --kill
|
|
|
56
67
|
- `index.ts` — Pi extension, registers `greedy_search` tool
|
|
57
68
|
- `search.mjs` — CLI runner, spawns extractors in parallel
|
|
58
69
|
- `launch.mjs` — launches dedicated Chrome on port 9223
|
|
59
|
-
- `extractors/` — per-engine CDP scrapers (Perplexity, Bing Copilot, Google AI)
|
|
70
|
+
- `extractors/` — per-engine CDP scrapers (Perplexity, Bing Copilot, Google AI, Gemini)
|
|
@@ -1,176 +1,176 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// extractors/bing-copilot.mjs
|
|
3
|
-
// Navigate copilot.microsoft.com, wait for answer to complete, return clean answer + sources.
|
|
4
|
-
//
|
|
5
|
-
// Usage:
|
|
6
|
-
// node extractors/bing-copilot.mjs "<query>" [--tab <prefix>]
|
|
7
|
-
//
|
|
8
|
-
// Output (stdout): JSON { answer, sources, query, url }
|
|
9
|
-
// Errors go to stderr only — stdout is always clean JSON for piping.
|
|
10
|
-
|
|
11
|
-
import { readFileSync, existsSync } from 'fs';
|
|
12
|
-
import { spawn } from 'child_process';
|
|
13
|
-
import { tmpdir, homedir } from 'os';
|
|
14
|
-
import { join } from 'path';
|
|
15
|
-
import { dismissConsent, handleVerification } from './consent.mjs';
|
|
16
|
-
|
|
17
|
-
const CDP = join(homedir(), '.claude', 'skills', 'chrome-cdp', 'scripts', 'cdp.mjs');
|
|
18
|
-
const PAGES_CACHE = `${tmpdir().replace(/\\/g, '/')}/cdp-pages.json`;
|
|
19
|
-
|
|
20
|
-
const COPY_POLL_INTERVAL = 700;
|
|
21
|
-
const COPY_TIMEOUT = 60000;
|
|
22
|
-
|
|
23
|
-
// ---------------------------------------------------------------------------
|
|
24
|
-
|
|
25
|
-
function cdp(args, timeoutMs = 30000) {
|
|
26
|
-
return new Promise((resolve, reject) => {
|
|
27
|
-
const proc = spawn('node', [CDP, ...args], { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
28
|
-
let out = '';
|
|
29
|
-
let err = '';
|
|
30
|
-
proc.stdout.on('data', d => out += d);
|
|
31
|
-
proc.stderr.on('data', d => err += d);
|
|
32
|
-
const timer = setTimeout(() => { proc.kill(); reject(new Error(`cdp timeout: ${args[0]}`)); }, timeoutMs);
|
|
33
|
-
proc.on('close', code => {
|
|
34
|
-
clearTimeout(timer);
|
|
35
|
-
if (code !== 0) reject(new Error(err.trim() || `cdp exit ${code}`));
|
|
36
|
-
else resolve(out.trim());
|
|
37
|
-
});
|
|
38
|
-
});
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
async function getOrOpenTab(tabPrefix) {
|
|
42
|
-
if (tabPrefix) return tabPrefix;
|
|
43
|
-
|
|
44
|
-
if (existsSync(PAGES_CACHE)) {
|
|
45
|
-
const pages = JSON.parse(readFileSync(PAGES_CACHE, 'utf8'));
|
|
46
|
-
const existing = pages.find(p => p.url.includes('copilot.microsoft.com'));
|
|
47
|
-
if (existing) return existing.targetId.slice(0, 8);
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
const list = await cdp(['list']);
|
|
51
|
-
const firstLine = list.split('\n')[0];
|
|
52
|
-
if (!firstLine) throw new Error('No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?');
|
|
53
|
-
return firstLine.slice(0, 8);
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
async function injectClipboardInterceptor(tab) {
|
|
57
|
-
await cdp(['eval', tab, `
|
|
58
|
-
window.__bingClipboard = null;
|
|
59
|
-
const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
|
|
60
|
-
navigator.clipboard.writeText = function(text) {
|
|
61
|
-
window.__bingClipboard = text;
|
|
62
|
-
return _origWriteText(text);
|
|
63
|
-
};
|
|
64
|
-
const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
|
|
65
|
-
navigator.clipboard.write = async function(items) {
|
|
66
|
-
try {
|
|
67
|
-
for (const item of items) {
|
|
68
|
-
if (item.types && item.types.includes('text/plain')) {
|
|
69
|
-
const blob = await item.getType('text/plain');
|
|
70
|
-
window.__bingClipboard = await blob.text();
|
|
71
|
-
break;
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
} catch(e) {}
|
|
75
|
-
return _origWrite(items);
|
|
76
|
-
};
|
|
77
|
-
`]);
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
async function waitForCopyButton(tab) {
|
|
81
|
-
const deadline = Date.now() + COPY_TIMEOUT;
|
|
82
|
-
while (Date.now() < deadline) {
|
|
83
|
-
await new Promise(r => setTimeout(r, COPY_POLL_INTERVAL));
|
|
84
|
-
const found = await cdp(['eval', tab,
|
|
85
|
-
`!!document.querySelector('button[data-testid="copy-ai-message-button"]')`
|
|
86
|
-
]).catch(() => 'false');
|
|
87
|
-
if (found === 'true') return;
|
|
88
|
-
}
|
|
89
|
-
throw new Error(`Copilot copy button did not appear within ${COPY_TIMEOUT}ms`);
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
async function extractAnswer(tab) {
|
|
93
|
-
await cdp(['eval', tab, `document.querySelector('button[data-testid="copy-ai-message-button"]')?.click()`]);
|
|
94
|
-
await new Promise(r => setTimeout(r, 400));
|
|
95
|
-
|
|
96
|
-
const answer = await cdp(['eval', tab, `window.__bingClipboard || ''`]);
|
|
97
|
-
if (!answer) throw new Error('Clipboard interceptor returned empty text');
|
|
98
|
-
|
|
99
|
-
const raw = await cdp(['eval', tab, `
|
|
100
|
-
(function() {
|
|
101
|
-
var sources = Array.from(document.querySelectorAll('a[href^="http"][target="_blank"]'))
|
|
102
|
-
.map(a => ({ url: a.href, title: a.innerText?.trim().split('\\n')[0] || a.title || '' }))
|
|
103
|
-
.filter(s => s.url && !s.url.includes('copilot.microsoft.com'))
|
|
104
|
-
.filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
|
|
105
|
-
.slice(0, 10);
|
|
106
|
-
return JSON.stringify(sources);
|
|
107
|
-
})()
|
|
108
|
-
`]).catch(() => '[]');
|
|
109
|
-
const sources = JSON.parse(raw);
|
|
110
|
-
|
|
111
|
-
return { answer: answer.trim(), sources };
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
// ---------------------------------------------------------------------------
|
|
115
|
-
|
|
116
|
-
async function main() {
|
|
117
|
-
const args = process.argv.slice(2);
|
|
118
|
-
if (!args.length || args[0] === '--help') {
|
|
119
|
-
process.stderr.write('Usage: node extractors/bing-copilot.mjs "<query>" [--tab <prefix>]\n');
|
|
120
|
-
process.exit(1);
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
const short = args.includes('--short');
|
|
124
|
-
const rest = args.filter(a => a !== '--short');
|
|
125
|
-
const tabFlagIdx = rest.indexOf('--tab');
|
|
126
|
-
const tabPrefix = tabFlagIdx !== -1 ? rest[tabFlagIdx + 1] : null;
|
|
127
|
-
const query = tabFlagIdx !== -1
|
|
128
|
-
? rest.filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1).join(' ')
|
|
129
|
-
: rest.join(' ');
|
|
130
|
-
|
|
131
|
-
try {
|
|
132
|
-
await cdp(['list']);
|
|
133
|
-
const tab = await getOrOpenTab(tabPrefix);
|
|
134
|
-
|
|
135
|
-
// Navigate to Copilot homepage and use the chat input
|
|
136
|
-
await cdp(['nav', tab, 'https://copilot.microsoft.com/'], 35000);
|
|
137
|
-
await new Promise(r => setTimeout(r, 1500));
|
|
138
|
-
await dismissConsent(tab, cdp);
|
|
139
|
-
await handleVerification(tab, cdp, 60000);
|
|
140
|
-
|
|
141
|
-
// Wait for React app to mount #userInput (up to 8s)
|
|
142
|
-
const deadline = Date.now() + 8000;
|
|
143
|
-
while (Date.now() < deadline) {
|
|
144
|
-
const found = await cdp(['eval', tab, `!!document.querySelector('#userInput')`]).catch(() => 'false');
|
|
145
|
-
if (found === 'true') break;
|
|
146
|
-
await new Promise(r => setTimeout(r, 400));
|
|
147
|
-
}
|
|
148
|
-
await new Promise(r => setTimeout(r, 300));
|
|
149
|
-
|
|
150
|
-
await injectClipboardInterceptor(tab);
|
|
151
|
-
// Find input and type query
|
|
152
|
-
await cdp(['click', tab, '#userInput']);
|
|
153
|
-
await new Promise(r => setTimeout(r, 400));
|
|
154
|
-
await cdp(['type', tab, query]);
|
|
155
|
-
await new Promise(r => setTimeout(r, 400));
|
|
156
|
-
|
|
157
|
-
// Submit with Enter (most reliable across locales and Chrome instances)
|
|
158
|
-
await cdp(['eval', tab,
|
|
159
|
-
`document.querySelector('#userInput')?.dispatchEvent(new KeyboardEvent('keydown',{key:'Enter',bubbles:true,keyCode:13})), 'ok'`
|
|
160
|
-
]);
|
|
161
|
-
|
|
162
|
-
await waitForCopyButton(tab);
|
|
163
|
-
|
|
164
|
-
const { answer, sources } = await extractAnswer(tab);
|
|
165
|
-
if (!answer) throw new Error('No answer extracted — Copilot may not have responded');
|
|
166
|
-
const out = short ? answer.slice(0, 300).replace(/\s+\S*$/, '') + '…' : answer;
|
|
167
|
-
|
|
168
|
-
const finalUrl = await cdp(['eval', tab, 'document.location.href']).catch(() => '');
|
|
169
|
-
process.stdout.write(JSON.stringify({ query, url: finalUrl, answer: out, sources }, null, 2) + '\n');
|
|
170
|
-
} catch (e) {
|
|
171
|
-
process.stderr.write(`Error: ${e.message}\n`);
|
|
172
|
-
process.exit(1);
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
main();
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// extractors/bing-copilot.mjs
|
|
3
|
+
// Navigate copilot.microsoft.com, wait for answer to complete, return clean answer + sources.
|
|
4
|
+
//
|
|
5
|
+
// Usage:
|
|
6
|
+
// node extractors/bing-copilot.mjs "<query>" [--tab <prefix>]
|
|
7
|
+
//
|
|
8
|
+
// Output (stdout): JSON { answer, sources, query, url }
|
|
9
|
+
// Errors go to stderr only — stdout is always clean JSON for piping.
|
|
10
|
+
|
|
11
|
+
import { readFileSync, existsSync } from 'fs';
|
|
12
|
+
import { spawn } from 'child_process';
|
|
13
|
+
import { tmpdir, homedir } from 'os';
|
|
14
|
+
import { join } from 'path';
|
|
15
|
+
import { dismissConsent, handleVerification } from './consent.mjs';
|
|
16
|
+
|
|
17
|
+
const CDP = join(homedir(), '.claude', 'skills', 'chrome-cdp', 'scripts', 'cdp.mjs');
|
|
18
|
+
const PAGES_CACHE = `${tmpdir().replace(/\\/g, '/')}/cdp-pages.json`;
|
|
19
|
+
|
|
20
|
+
const COPY_POLL_INTERVAL = 700;
|
|
21
|
+
const COPY_TIMEOUT = 60000;
|
|
22
|
+
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
function cdp(args, timeoutMs = 30000) {
|
|
26
|
+
return new Promise((resolve, reject) => {
|
|
27
|
+
const proc = spawn('node', [CDP, ...args], { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
28
|
+
let out = '';
|
|
29
|
+
let err = '';
|
|
30
|
+
proc.stdout.on('data', d => out += d);
|
|
31
|
+
proc.stderr.on('data', d => err += d);
|
|
32
|
+
const timer = setTimeout(() => { proc.kill(); reject(new Error(`cdp timeout: ${args[0]}`)); }, timeoutMs);
|
|
33
|
+
proc.on('close', code => {
|
|
34
|
+
clearTimeout(timer);
|
|
35
|
+
if (code !== 0) reject(new Error(err.trim() || `cdp exit ${code}`));
|
|
36
|
+
else resolve(out.trim());
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async function getOrOpenTab(tabPrefix) {
|
|
42
|
+
if (tabPrefix) return tabPrefix;
|
|
43
|
+
|
|
44
|
+
if (existsSync(PAGES_CACHE)) {
|
|
45
|
+
const pages = JSON.parse(readFileSync(PAGES_CACHE, 'utf8'));
|
|
46
|
+
const existing = pages.find(p => p.url.includes('copilot.microsoft.com'));
|
|
47
|
+
if (existing) return existing.targetId.slice(0, 8);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const list = await cdp(['list']);
|
|
51
|
+
const firstLine = list.split('\n')[0];
|
|
52
|
+
if (!firstLine) throw new Error('No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?');
|
|
53
|
+
return firstLine.slice(0, 8);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
async function injectClipboardInterceptor(tab) {
|
|
57
|
+
await cdp(['eval', tab, `
|
|
58
|
+
window.__bingClipboard = null;
|
|
59
|
+
const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
|
|
60
|
+
navigator.clipboard.writeText = function(text) {
|
|
61
|
+
window.__bingClipboard = text;
|
|
62
|
+
return _origWriteText(text);
|
|
63
|
+
};
|
|
64
|
+
const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
|
|
65
|
+
navigator.clipboard.write = async function(items) {
|
|
66
|
+
try {
|
|
67
|
+
for (const item of items) {
|
|
68
|
+
if (item.types && item.types.includes('text/plain')) {
|
|
69
|
+
const blob = await item.getType('text/plain');
|
|
70
|
+
window.__bingClipboard = await blob.text();
|
|
71
|
+
break;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
} catch(e) {}
|
|
75
|
+
return _origWrite(items);
|
|
76
|
+
};
|
|
77
|
+
`]);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
async function waitForCopyButton(tab) {
|
|
81
|
+
const deadline = Date.now() + COPY_TIMEOUT;
|
|
82
|
+
while (Date.now() < deadline) {
|
|
83
|
+
await new Promise(r => setTimeout(r, COPY_POLL_INTERVAL));
|
|
84
|
+
const found = await cdp(['eval', tab,
|
|
85
|
+
`!!document.querySelector('button[data-testid="copy-ai-message-button"]')`
|
|
86
|
+
]).catch(() => 'false');
|
|
87
|
+
if (found === 'true') return;
|
|
88
|
+
}
|
|
89
|
+
throw new Error(`Copilot copy button did not appear within ${COPY_TIMEOUT}ms`);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async function extractAnswer(tab) {
|
|
93
|
+
await cdp(['eval', tab, `document.querySelector('button[data-testid="copy-ai-message-button"]')?.click()`]);
|
|
94
|
+
await new Promise(r => setTimeout(r, 400));
|
|
95
|
+
|
|
96
|
+
const answer = await cdp(['eval', tab, `window.__bingClipboard || ''`]);
|
|
97
|
+
if (!answer) throw new Error('Clipboard interceptor returned empty text');
|
|
98
|
+
|
|
99
|
+
const raw = await cdp(['eval', tab, `
|
|
100
|
+
(function() {
|
|
101
|
+
var sources = Array.from(document.querySelectorAll('a[href^="http"][target="_blank"]'))
|
|
102
|
+
.map(a => ({ url: a.href, title: a.innerText?.trim().split('\\n')[0] || a.title || '' }))
|
|
103
|
+
.filter(s => s.url && !s.url.includes('copilot.microsoft.com'))
|
|
104
|
+
.filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
|
|
105
|
+
.slice(0, 10);
|
|
106
|
+
return JSON.stringify(sources);
|
|
107
|
+
})()
|
|
108
|
+
`]).catch(() => '[]');
|
|
109
|
+
const sources = JSON.parse(raw);
|
|
110
|
+
|
|
111
|
+
return { answer: answer.trim(), sources };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
async function main() {
|
|
117
|
+
const args = process.argv.slice(2);
|
|
118
|
+
if (!args.length || args[0] === '--help') {
|
|
119
|
+
process.stderr.write('Usage: node extractors/bing-copilot.mjs "<query>" [--tab <prefix>]\n');
|
|
120
|
+
process.exit(1);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const short = args.includes('--short');
|
|
124
|
+
const rest = args.filter(a => a !== '--short');
|
|
125
|
+
const tabFlagIdx = rest.indexOf('--tab');
|
|
126
|
+
const tabPrefix = tabFlagIdx !== -1 ? rest[tabFlagIdx + 1] : null;
|
|
127
|
+
const query = tabFlagIdx !== -1
|
|
128
|
+
? rest.filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1).join(' ')
|
|
129
|
+
: rest.join(' ');
|
|
130
|
+
|
|
131
|
+
try {
|
|
132
|
+
await cdp(['list']);
|
|
133
|
+
const tab = await getOrOpenTab(tabPrefix);
|
|
134
|
+
|
|
135
|
+
// Navigate to Copilot homepage and use the chat input
|
|
136
|
+
await cdp(['nav', tab, 'https://copilot.microsoft.com/'], 35000);
|
|
137
|
+
await new Promise(r => setTimeout(r, 1500));
|
|
138
|
+
await dismissConsent(tab, cdp);
|
|
139
|
+
await handleVerification(tab, cdp, 60000);
|
|
140
|
+
|
|
141
|
+
// Wait for React app to mount #userInput (up to 8s)
|
|
142
|
+
const deadline = Date.now() + 8000;
|
|
143
|
+
while (Date.now() < deadline) {
|
|
144
|
+
const found = await cdp(['eval', tab, `!!document.querySelector('#userInput')`]).catch(() => 'false');
|
|
145
|
+
if (found === 'true') break;
|
|
146
|
+
await new Promise(r => setTimeout(r, 400));
|
|
147
|
+
}
|
|
148
|
+
await new Promise(r => setTimeout(r, 300));
|
|
149
|
+
|
|
150
|
+
await injectClipboardInterceptor(tab);
|
|
151
|
+
// Find input and type query
|
|
152
|
+
await cdp(['click', tab, '#userInput']);
|
|
153
|
+
await new Promise(r => setTimeout(r, 400));
|
|
154
|
+
await cdp(['type', tab, query]);
|
|
155
|
+
await new Promise(r => setTimeout(r, 400));
|
|
156
|
+
|
|
157
|
+
// Submit with Enter (most reliable across locales and Chrome instances)
|
|
158
|
+
await cdp(['eval', tab,
|
|
159
|
+
`document.querySelector('#userInput')?.dispatchEvent(new KeyboardEvent('keydown',{key:'Enter',bubbles:true,keyCode:13})), 'ok'`
|
|
160
|
+
]);
|
|
161
|
+
|
|
162
|
+
await waitForCopyButton(tab);
|
|
163
|
+
|
|
164
|
+
const { answer, sources } = await extractAnswer(tab);
|
|
165
|
+
if (!answer) throw new Error('No answer extracted — Copilot may not have responded');
|
|
166
|
+
const out = short ? answer.slice(0, 300).replace(/\s+\S*$/, '') + '…' : answer;
|
|
167
|
+
|
|
168
|
+
const finalUrl = await cdp(['eval', tab, 'document.location.href']).catch(() => '');
|
|
169
|
+
process.stdout.write(JSON.stringify({ query, url: finalUrl, answer: out, sources }, null, 2) + '\n');
|
|
170
|
+
} catch (e) {
|
|
171
|
+
process.stderr.write(`Error: ${e.message}\n`);
|
|
172
|
+
process.exit(1);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
main();
|
package/extractors/consent.mjs
CHANGED
|
@@ -1,76 +1,76 @@
|
|
|
1
|
-
// consent.mjs — auto-dismiss common cookie/consent banners and human-verification pages
|
|
2
|
-
// Call dismissConsent(tab, cdpFn) after navigating to any page.
|
|
3
|
-
|
|
4
|
-
const CONSENT_JS = `
|
|
5
|
-
(function() {
|
|
6
|
-
// Google consent page (consent.google.com)
|
|
7
|
-
var g = document.querySelector('#L2AGLb, button[jsname="b3VHJd"], .tHlp8d');
|
|
8
|
-
if (g) { g.click(); return 'google'; }
|
|
9
|
-
|
|
10
|
-
// OneTrust (used by many sites including Stack Overflow)
|
|
11
|
-
var ot = document.querySelector('#onetrust-accept-btn-handler, .onetrust-accept-btn-handler');
|
|
12
|
-
if (ot) { ot.click(); return 'onetrust'; }
|
|
13
|
-
|
|
14
|
-
// Generic "accept all" / "agree" buttons
|
|
15
|
-
var btns = Array.from(document.querySelectorAll('button, a[role=button]'));
|
|
16
|
-
var accept = btns.find(b => /^(accept all|accept cookies|agree|i agree|got it|allow all|allow cookies)$/i.test(b.innerText?.trim()));
|
|
17
|
-
if (accept) { accept.click(); return 'generic:' + accept.innerText.trim(); }
|
|
18
|
-
|
|
19
|
-
return null;
|
|
20
|
-
})()
|
|
21
|
-
`;
|
|
22
|
-
|
|
23
|
-
// Detect Google's "verify you're human" / unusual traffic page
|
|
24
|
-
const VERIFY_DETECT_JS = `
|
|
25
|
-
(function() {
|
|
26
|
-
var url = document.location.href;
|
|
27
|
-
if (url.includes('/sorry/') || url.includes('sorry.google')) return 'sorry-page';
|
|
28
|
-
|
|
29
|
-
// Simple click-through verify button (not image CAPTCHA)
|
|
30
|
-
var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
|
|
31
|
-
var verify = btns.find(b => /verify|human|not a robot|continue/i.test(b.innerText?.trim() || b.value || ''));
|
|
32
|
-
if (verify && !document.querySelector('iframe[src*="recaptcha"]')) {
|
|
33
|
-
verify.click();
|
|
34
|
-
return 'clicked-verify:' + (verify.innerText?.trim() || verify.value);
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
// Unchecked reCAPTCHA / Turnstile checkbox (no image challenge)
|
|
38
|
-
var checkbox = document.querySelector('.recaptcha-checkbox-unchecked, input[type=checkbox][id*="recaptcha"], #cf-stage input[type=checkbox]');
|
|
39
|
-
if (checkbox) { checkbox.click(); return 'clicked-checkbox'; }
|
|
40
|
-
|
|
41
|
-
return null;
|
|
42
|
-
})()
|
|
43
|
-
`;
|
|
44
|
-
|
|
45
|
-
export async function dismissConsent(tab, cdp) {
|
|
46
|
-
const result = await cdp(['eval', tab, CONSENT_JS]).catch(() => null);
|
|
47
|
-
if (result && result !== 'null') {
|
|
48
|
-
await new Promise(r => setTimeout(r, 1500));
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
// Returns 'clear' | 'clicked' | 'needs-human'
|
|
53
|
-
export async function handleVerification(tab, cdp, waitMs = 60000) {
|
|
54
|
-
const result = await cdp(['eval', tab, VERIFY_DETECT_JS]).catch(() => null);
|
|
55
|
-
|
|
56
|
-
if (!result || result === 'null') return 'clear';
|
|
57
|
-
|
|
58
|
-
if (result === 'sorry-page') {
|
|
59
|
-
// Hard CAPTCHA page — wait for user to solve it manually
|
|
60
|
-
process.stderr.write(`[greedysearch] Google verification required — please solve it in the browser window (waiting up to ${waitMs / 1000}s)...\n`);
|
|
61
|
-
const deadline = Date.now() + waitMs;
|
|
62
|
-
while (Date.now() < deadline) {
|
|
63
|
-
await new Promise(r => setTimeout(r, 2000));
|
|
64
|
-
const url = await cdp(['eval', tab, 'document.location.href']).catch(() => '');
|
|
65
|
-
if (!url.includes('/sorry/')) return 'cleared-by-user';
|
|
66
|
-
}
|
|
67
|
-
return 'needs-human';
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
if (result.startsWith('clicked-')) {
|
|
71
|
-
await new Promise(r => setTimeout(r, 2000));
|
|
72
|
-
return 'clicked';
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
return 'clear';
|
|
76
|
-
}
|
|
1
|
+
// consent.mjs — auto-dismiss common cookie/consent banners and human-verification pages
|
|
2
|
+
// Call dismissConsent(tab, cdpFn) after navigating to any page.
|
|
3
|
+
|
|
4
|
+
const CONSENT_JS = `
|
|
5
|
+
(function() {
|
|
6
|
+
// Google consent page (consent.google.com)
|
|
7
|
+
var g = document.querySelector('#L2AGLb, button[jsname="b3VHJd"], .tHlp8d');
|
|
8
|
+
if (g) { g.click(); return 'google'; }
|
|
9
|
+
|
|
10
|
+
// OneTrust (used by many sites including Stack Overflow)
|
|
11
|
+
var ot = document.querySelector('#onetrust-accept-btn-handler, .onetrust-accept-btn-handler');
|
|
12
|
+
if (ot) { ot.click(); return 'onetrust'; }
|
|
13
|
+
|
|
14
|
+
// Generic "accept all" / "agree" buttons
|
|
15
|
+
var btns = Array.from(document.querySelectorAll('button, a[role=button]'));
|
|
16
|
+
var accept = btns.find(b => /^(accept all|accept cookies|agree|i agree|got it|allow all|allow cookies)$/i.test(b.innerText?.trim()));
|
|
17
|
+
if (accept) { accept.click(); return 'generic:' + accept.innerText.trim(); }
|
|
18
|
+
|
|
19
|
+
return null;
|
|
20
|
+
})()
|
|
21
|
+
`;
|
|
22
|
+
|
|
23
|
+
// Detect Google's "verify you're human" / unusual traffic page
|
|
24
|
+
const VERIFY_DETECT_JS = `
|
|
25
|
+
(function() {
|
|
26
|
+
var url = document.location.href;
|
|
27
|
+
if (url.includes('/sorry/') || url.includes('sorry.google')) return 'sorry-page';
|
|
28
|
+
|
|
29
|
+
// Simple click-through verify button (not image CAPTCHA)
|
|
30
|
+
var btns = Array.from(document.querySelectorAll('button, input[type=submit], a[role=button]'));
|
|
31
|
+
var verify = btns.find(b => /verify|human|not a robot|continue/i.test(b.innerText?.trim() || b.value || ''));
|
|
32
|
+
if (verify && !document.querySelector('iframe[src*="recaptcha"]')) {
|
|
33
|
+
verify.click();
|
|
34
|
+
return 'clicked-verify:' + (verify.innerText?.trim() || verify.value);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Unchecked reCAPTCHA / Turnstile checkbox (no image challenge)
|
|
38
|
+
var checkbox = document.querySelector('.recaptcha-checkbox-unchecked, input[type=checkbox][id*="recaptcha"], #cf-stage input[type=checkbox]');
|
|
39
|
+
if (checkbox) { checkbox.click(); return 'clicked-checkbox'; }
|
|
40
|
+
|
|
41
|
+
return null;
|
|
42
|
+
})()
|
|
43
|
+
`;
|
|
44
|
+
|
|
45
|
+
export async function dismissConsent(tab, cdp) {
|
|
46
|
+
const result = await cdp(['eval', tab, CONSENT_JS]).catch(() => null);
|
|
47
|
+
if (result && result !== 'null') {
|
|
48
|
+
await new Promise(r => setTimeout(r, 1500));
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Returns 'clear' | 'clicked' | 'needs-human'
|
|
53
|
+
export async function handleVerification(tab, cdp, waitMs = 60000) {
|
|
54
|
+
const result = await cdp(['eval', tab, VERIFY_DETECT_JS]).catch(() => null);
|
|
55
|
+
|
|
56
|
+
if (!result || result === 'null') return 'clear';
|
|
57
|
+
|
|
58
|
+
if (result === 'sorry-page') {
|
|
59
|
+
// Hard CAPTCHA page — wait for user to solve it manually
|
|
60
|
+
process.stderr.write(`[greedysearch] Google verification required — please solve it in the browser window (waiting up to ${waitMs / 1000}s)...\n`);
|
|
61
|
+
const deadline = Date.now() + waitMs;
|
|
62
|
+
while (Date.now() < deadline) {
|
|
63
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
64
|
+
const url = await cdp(['eval', tab, 'document.location.href']).catch(() => '');
|
|
65
|
+
if (!url.includes('/sorry/')) return 'cleared-by-user';
|
|
66
|
+
}
|
|
67
|
+
return 'needs-human';
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (result.startsWith('clicked-')) {
|
|
71
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
72
|
+
return 'clicked';
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return 'clear';
|
|
76
|
+
}
|