@apmantza/greedysearch-pi 1.4.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -24
- package/extractors/bing-copilot.mjs +195 -204
- package/extractors/consent.mjs +255 -248
- package/extractors/gemini.mjs +12 -53
- package/extractors/google-ai.mjs +162 -165
- package/extractors/perplexity.mjs +181 -184
- package/package.json +2 -2
- package/search.mjs +997 -996
|
@@ -1,184 +1,181 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// extractors/perplexity.mjs
|
|
3
|
-
// Navigate Perplexity, wait for streaming to complete, return clean answer + sources.
|
|
4
|
-
//
|
|
5
|
-
// Usage:
|
|
6
|
-
// node extractors/perplexity.mjs "<query>" [--tab <prefix>]
|
|
7
|
-
//
|
|
8
|
-
// Output (stdout): JSON { answer, sources, query, url }
|
|
9
|
-
// Errors go to stderr only — stdout is always clean JSON for piping.
|
|
10
|
-
|
|
11
|
-
import { readFileSync, existsSync } from 'fs';
|
|
12
|
-
import { spawn } from 'child_process';
|
|
13
|
-
import { tmpdir } from 'os';
|
|
14
|
-
import { join, dirname } from 'path';
|
|
15
|
-
import { fileURLToPath } from 'url';
|
|
16
|
-
import { dismissConsent } from './consent.mjs';
|
|
17
|
-
import { SELECTORS } from './selectors.mjs';
|
|
18
|
-
|
|
19
|
-
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
20
|
-
const CDP = join(__dir, '..', 'cdp.mjs');
|
|
21
|
-
const PAGES_CACHE = `${tmpdir().replace(/\\/g, '/')}/cdp-pages.json`;
|
|
22
|
-
|
|
23
|
-
const COPY_POLL_INTERVAL = 600;
|
|
24
|
-
const COPY_TIMEOUT = 30000;
|
|
25
|
-
|
|
26
|
-
const S = SELECTORS.perplexity;
|
|
27
|
-
|
|
28
|
-
// ---------------------------------------------------------------------------
|
|
29
|
-
|
|
30
|
-
function cdp(args, timeoutMs = 30000) {
|
|
31
|
-
return new Promise((resolve, reject) => {
|
|
32
|
-
const proc = spawn('node', [CDP, ...args], { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
33
|
-
let out = '';
|
|
34
|
-
let err = '';
|
|
35
|
-
proc.stdout.on('data', d => out += d);
|
|
36
|
-
proc.stderr.on('data', d => err += d);
|
|
37
|
-
const timer = setTimeout(() => { proc.kill(); reject(new Error(`cdp timeout: ${args[0]}`)); }, timeoutMs);
|
|
38
|
-
proc.on('close', code => {
|
|
39
|
-
clearTimeout(timer);
|
|
40
|
-
if (code !== 0) reject(new Error(err.trim() || `cdp exit ${code}`));
|
|
41
|
-
else resolve(out.trim());
|
|
42
|
-
});
|
|
43
|
-
});
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
async function getOrOpenTab(tabPrefix) {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
if (
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
const
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
const
|
|
132
|
-
const
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
await
|
|
158
|
-
|
|
159
|
-
await
|
|
160
|
-
await
|
|
161
|
-
|
|
162
|
-
await cdp(['
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
const
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
process.
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
main();
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// extractors/perplexity.mjs
|
|
3
|
+
// Navigate Perplexity, wait for streaming to complete, return clean answer + sources.
|
|
4
|
+
//
|
|
5
|
+
// Usage:
|
|
6
|
+
// node extractors/perplexity.mjs "<query>" [--tab <prefix>]
|
|
7
|
+
//
|
|
8
|
+
// Output (stdout): JSON { answer, sources, query, url }
|
|
9
|
+
// Errors go to stderr only — stdout is always clean JSON for piping.
|
|
10
|
+
|
|
11
|
+
import { readFileSync, existsSync } from 'fs';
|
|
12
|
+
import { spawn } from 'child_process';
|
|
13
|
+
import { tmpdir } from 'os';
|
|
14
|
+
import { join, dirname } from 'path';
|
|
15
|
+
import { fileURLToPath } from 'url';
|
|
16
|
+
import { dismissConsent } from './consent.mjs';
|
|
17
|
+
import { SELECTORS } from './selectors.mjs';
|
|
18
|
+
|
|
19
|
+
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
20
|
+
const CDP = join(__dir, '..', 'cdp.mjs');
|
|
21
|
+
const PAGES_CACHE = `${tmpdir().replace(/\\/g, '/')}/cdp-pages.json`;
|
|
22
|
+
|
|
23
|
+
const COPY_POLL_INTERVAL = 600;
|
|
24
|
+
const COPY_TIMEOUT = 30000;
|
|
25
|
+
|
|
26
|
+
const S = SELECTORS.perplexity;
|
|
27
|
+
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
function cdp(args, timeoutMs = 30000) {
|
|
31
|
+
return new Promise((resolve, reject) => {
|
|
32
|
+
const proc = spawn('node', [CDP, ...args], { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
33
|
+
let out = '';
|
|
34
|
+
let err = '';
|
|
35
|
+
proc.stdout.on('data', d => out += d);
|
|
36
|
+
proc.stderr.on('data', d => err += d);
|
|
37
|
+
const timer = setTimeout(() => { proc.kill(); reject(new Error(`cdp timeout: ${args[0]}`)); }, timeoutMs);
|
|
38
|
+
proc.on('close', code => {
|
|
39
|
+
clearTimeout(timer);
|
|
40
|
+
if (code !== 0) reject(new Error(err.trim() || `cdp exit ${code}`));
|
|
41
|
+
else resolve(out.trim());
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async function getOrOpenTab(tabPrefix) {
|
|
47
|
+
if (tabPrefix) return tabPrefix;
|
|
48
|
+
// Always open a fresh tab to avoid SPA navigation issues
|
|
49
|
+
const list = await cdp(['list']);
|
|
50
|
+
const anchor = list.split('\n')[0]?.slice(0, 8);
|
|
51
|
+
if (!anchor) throw new Error('No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?');
|
|
52
|
+
const raw = await cdp(['evalraw', anchor, 'Target.createTarget', '{"url":"about:blank"}']);
|
|
53
|
+
const { targetId } = JSON.parse(raw);
|
|
54
|
+
await cdp(['list']); // refresh cache so cdp nav can find the new tab
|
|
55
|
+
return targetId.slice(0, 8);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async function injectClipboardInterceptor(tab) {
|
|
59
|
+
await cdp(['eval', tab, `
|
|
60
|
+
window.__pplxClipboard = null;
|
|
61
|
+
const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
|
|
62
|
+
navigator.clipboard.writeText = function(text) {
|
|
63
|
+
window.__pplxClipboard = text;
|
|
64
|
+
return _origWriteText(text);
|
|
65
|
+
};
|
|
66
|
+
const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
|
|
67
|
+
navigator.clipboard.write = async function(items) {
|
|
68
|
+
try {
|
|
69
|
+
for (const item of items) {
|
|
70
|
+
if (item.types && item.types.includes('text/plain')) {
|
|
71
|
+
const blob = await item.getType('text/plain');
|
|
72
|
+
window.__pplxClipboard = await blob.text();
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
} catch(e) {}
|
|
77
|
+
return _origWrite(items);
|
|
78
|
+
};
|
|
79
|
+
`]);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async function waitForGenerationToFinish(tab) {
|
|
83
|
+
const deadline = Date.now() + COPY_TIMEOUT;
|
|
84
|
+
let lastLen = -1;
|
|
85
|
+
let stableCount = 0;
|
|
86
|
+
while (Date.now() < deadline) {
|
|
87
|
+
await new Promise(r => setTimeout(r, COPY_POLL_INTERVAL));
|
|
88
|
+
const lenStr = await cdp(['eval', tab, 'document.body.innerText.length']).catch(() => '0');
|
|
89
|
+
const currentLen = parseInt(lenStr) || 0;
|
|
90
|
+
if (currentLen > 0) {
|
|
91
|
+
if (currentLen === lastLen) {
|
|
92
|
+
stableCount++;
|
|
93
|
+
if (stableCount >= 3) return;
|
|
94
|
+
} else {
|
|
95
|
+
lastLen = currentLen;
|
|
96
|
+
stableCount = 0;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
throw new Error(`Perplexity generation did not finish within ${COPY_TIMEOUT}ms`);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
async function extractAnswer(tab) {
|
|
104
|
+
await cdp(['eval', tab, `document.querySelector('${S.copyButton}')?.click()`]);
|
|
105
|
+
await new Promise(r => setTimeout(r, 400));
|
|
106
|
+
|
|
107
|
+
const answer = await cdp(['eval', tab, `window.__pplxClipboard || ''`]);
|
|
108
|
+
if (!answer) throw new Error('Clipboard interceptor returned empty text');
|
|
109
|
+
|
|
110
|
+
// Regex parse Markdown links from clipboard — robust against DOM changes
|
|
111
|
+
const sources = Array.from(answer.matchAll(/\[([^\]]+)\]\((https?:\/\/[^\s\)]+)\)/g))
|
|
112
|
+
.map(m => ({ title: m[1], url: m[2] }))
|
|
113
|
+
.filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
|
|
114
|
+
.slice(0, 10);
|
|
115
|
+
|
|
116
|
+
return { answer: answer.trim(), sources };
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ---------------------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
async function main() {
|
|
122
|
+
const args = process.argv.slice(2);
|
|
123
|
+
if (!args.length || args[0] === '--help') {
|
|
124
|
+
process.stderr.write('Usage: node extractors/perplexity.mjs "<query>" [--tab <prefix>]\n');
|
|
125
|
+
process.exit(1);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const short = args.includes('--short');
|
|
129
|
+
const rest = args.filter(a => a !== '--short');
|
|
130
|
+
const tabFlagIdx = rest.indexOf('--tab');
|
|
131
|
+
const tabPrefix = tabFlagIdx !== -1 ? rest[tabFlagIdx + 1] : null;
|
|
132
|
+
const query = tabFlagIdx !== -1
|
|
133
|
+
? rest.filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1).join(' ')
|
|
134
|
+
: rest.join(' ');
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
try {
|
|
138
|
+
// Refresh page list so cache is current
|
|
139
|
+
await cdp(['list']);
|
|
140
|
+
|
|
141
|
+
const tab = await getOrOpenTab(tabPrefix);
|
|
142
|
+
|
|
143
|
+
// Navigate to homepage and use the search box (direct ?q= URLs trigger bot redirect)
|
|
144
|
+
await cdp(['nav', tab, 'https://www.perplexity.ai/'], 35000);
|
|
145
|
+
await dismissConsent(tab, cdp);
|
|
146
|
+
|
|
147
|
+
// Wait for React app to mount input (up to 8s)
|
|
148
|
+
const deadline = Date.now() + 8000;
|
|
149
|
+
while (Date.now() < deadline) {
|
|
150
|
+
const found = await cdp(['eval', tab, `!!document.querySelector('${S.input}')`]).catch(() => 'false');
|
|
151
|
+
if (found === 'true') break;
|
|
152
|
+
await new Promise(r => setTimeout(r, 400));
|
|
153
|
+
}
|
|
154
|
+
await new Promise(r => setTimeout(r, 300));
|
|
155
|
+
|
|
156
|
+
await injectClipboardInterceptor(tab);
|
|
157
|
+
await cdp(['click', tab, S.input]);
|
|
158
|
+
await new Promise(r => setTimeout(r, 400));
|
|
159
|
+
await cdp(['type', tab, query]);
|
|
160
|
+
await new Promise(r => setTimeout(r, 400));
|
|
161
|
+
// Submit with Enter (most reliable across Chrome instances)
|
|
162
|
+
await cdp(['eval', tab,
|
|
163
|
+
`document.querySelector('${S.input}')?.dispatchEvent(new KeyboardEvent('keydown',{key:'Enter',bubbles:true,keyCode:13})), 'ok'`
|
|
164
|
+
]);
|
|
165
|
+
|
|
166
|
+
await waitForGenerationToFinish(tab);
|
|
167
|
+
|
|
168
|
+
const { answer, sources } = await extractAnswer(tab);
|
|
169
|
+
|
|
170
|
+
if (!answer) throw new Error('No answer extracted — Perplexity may not have responded');
|
|
171
|
+
const out = short ? answer.slice(0, 300).replace(/\s+\S*$/, '') + '…' : answer;
|
|
172
|
+
|
|
173
|
+
const finalUrl = await cdp(['eval', tab, 'document.location.href']).catch(() => '');
|
|
174
|
+
process.stdout.write(JSON.stringify({ query, url: finalUrl, answer: out, sources }, null, 2) + '\n');
|
|
175
|
+
} catch (e) {
|
|
176
|
+
process.stderr.write(`Error: ${e.message}\n`);
|
|
177
|
+
process.exit(1);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
main();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@apmantza/greedysearch-pi",
|
|
3
|
-
"version": "1.4.
|
|
3
|
+
"version": "1.4.2",
|
|
4
4
|
"description": "Pi extension: browser-automation tool that searches Perplexity, Bing Copilot, and Google AI in parallel, extracts answers and sources via CDP, with optional Gemini synthesis — grounded AI answers from real browser interactions.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"keywords": [
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
],
|
|
9
9
|
"repository": {
|
|
10
10
|
"type": "git",
|
|
11
|
-
"url": "https://github.com/apmantza/GreedySearch-pi"
|
|
11
|
+
"url": "git+https://github.com/apmantza/GreedySearch-pi.git"
|
|
12
12
|
},
|
|
13
13
|
"license": "MIT",
|
|
14
14
|
"pi": {
|