@apmantza/greedysearch-pi 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,184 +1,181 @@
1
- #!/usr/bin/env node
2
- // extractors/perplexity.mjs
3
- // Navigate Perplexity, wait for streaming to complete, return clean answer + sources.
4
- //
5
- // Usage:
6
- // node extractors/perplexity.mjs "<query>" [--tab <prefix>]
7
- //
8
- // Output (stdout): JSON { answer, sources, query, url }
9
- // Errors go to stderr only — stdout is always clean JSON for piping.
10
-
11
- import { readFileSync, existsSync } from 'fs';
12
- import { spawn } from 'child_process';
13
- import { tmpdir } from 'os';
14
- import { join, dirname } from 'path';
15
- import { fileURLToPath } from 'url';
16
- import { dismissConsent } from './consent.mjs';
17
- import { SELECTORS } from './selectors.mjs';
18
-
19
- const __dir = dirname(fileURLToPath(import.meta.url));
20
- const CDP = join(__dir, '..', 'cdp.mjs');
21
- const PAGES_CACHE = `${tmpdir().replace(/\\/g, '/')}/cdp-pages.json`;
22
-
23
- const COPY_POLL_INTERVAL = 600;
24
- const COPY_TIMEOUT = 30000;
25
-
26
- const S = SELECTORS.perplexity;
27
-
28
- // ---------------------------------------------------------------------------
29
-
30
- function cdp(args, timeoutMs = 30000) {
31
- return new Promise((resolve, reject) => {
32
- const proc = spawn('node', [CDP, ...args], { stdio: ['ignore', 'pipe', 'pipe'] });
33
- let out = '';
34
- let err = '';
35
- proc.stdout.on('data', d => out += d);
36
- proc.stderr.on('data', d => err += d);
37
- const timer = setTimeout(() => { proc.kill(); reject(new Error(`cdp timeout: ${args[0]}`)); }, timeoutMs);
38
- proc.on('close', code => {
39
- clearTimeout(timer);
40
- if (code !== 0) reject(new Error(err.trim() || `cdp exit ${code}`));
41
- else resolve(out.trim());
42
- });
43
- });
44
- }
45
-
46
- async function getOrOpenTab(tabPrefix) {
47
- // If caller specified a tab, use it
48
- if (tabPrefix) return tabPrefix;
49
-
50
- // Otherwise look for an existing Perplexity tab
51
- if (existsSync(PAGES_CACHE)) {
52
- const pages = JSON.parse(readFileSync(PAGES_CACHE, 'utf8'));
53
- const existing = pages.find(p => p.url.includes('perplexity.ai'));
54
- if (existing) return existing.targetId.slice(0, 8);
55
- }
56
-
57
- // Fall back to first available tab
58
- const list = await cdp(['list']);
59
- const firstLine = list.split('\n')[0];
60
- if (!firstLine) throw new Error('No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?');
61
- return firstLine.slice(0, 8);
62
- }
63
-
64
- async function injectClipboardInterceptor(tab) {
65
- await cdp(['eval', tab, `
66
- window.__pplxClipboard = null;
67
- const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
68
- navigator.clipboard.writeText = function(text) {
69
- window.__pplxClipboard = text;
70
- return _origWriteText(text);
71
- };
72
- const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
73
- navigator.clipboard.write = async function(items) {
74
- try {
75
- for (const item of items) {
76
- if (item.types && item.types.includes('text/plain')) {
77
- const blob = await item.getType('text/plain');
78
- window.__pplxClipboard = await blob.text();
79
- break;
80
- }
81
- }
82
- } catch(e) {}
83
- return _origWrite(items);
84
- };
85
- `]);
86
- }
87
-
88
- async function waitForCopyButton(tab) {
89
- const deadline = Date.now() + COPY_TIMEOUT;
90
- while (Date.now() < deadline) {
91
- await new Promise(r => setTimeout(r, COPY_POLL_INTERVAL));
92
- const found = await cdp(['eval', tab,
93
- `!!document.querySelector('${S.copyButton}')`
94
- ]).catch(() => 'false');
95
- if (found === 'true') return;
96
- }
97
- throw new Error(`Perplexity copy button did not appear within ${COPY_TIMEOUT}ms`);
98
- }
99
-
100
- async function extractAnswer(tab) {
101
- await cdp(['eval', tab, `document.querySelector('${S.copyButton}')?.click()`]);
102
- await new Promise(r => setTimeout(r, 400));
103
-
104
- const answer = await cdp(['eval', tab, `window.__pplxClipboard || ''`]);
105
- if (!answer) throw new Error('Clipboard interceptor returned empty text');
106
-
107
- const raw = await cdp(['eval', tab, `
108
- (function() {
109
- var sources = Array.from(document.querySelectorAll('${S.sourceItem}'))
110
- .map(el => ({ url: el.getAttribute('data-pplx-citation-url'), title: el.querySelector('${S.sourceLink}')?.innerText?.trim() || '' }))
111
- .filter(s => s.url)
112
- .filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
113
- .slice(0, 10);
114
- return JSON.stringify(sources);
115
- })()
116
- `]).catch(() => '[]');
117
- const sources = JSON.parse(raw);
118
-
119
- return { answer: answer.trim(), sources };
120
- }
121
-
122
- // ---------------------------------------------------------------------------
123
-
124
- async function main() {
125
- const args = process.argv.slice(2);
126
- if (!args.length || args[0] === '--help') {
127
- process.stderr.write('Usage: node extractors/perplexity.mjs "<query>" [--tab <prefix>]\n');
128
- process.exit(1);
129
- }
130
-
131
- const short = args.includes('--short');
132
- const rest = args.filter(a => a !== '--short');
133
- const tabFlagIdx = rest.indexOf('--tab');
134
- const tabPrefix = tabFlagIdx !== -1 ? rest[tabFlagIdx + 1] : null;
135
- const query = tabFlagIdx !== -1
136
- ? rest.filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1).join(' ')
137
- : rest.join(' ');
138
-
139
-
140
- try {
141
- // Refresh page list so cache is current
142
- await cdp(['list']);
143
-
144
- const tab = await getOrOpenTab(tabPrefix);
145
-
146
- // Navigate to homepage and use the search box (direct ?q= URLs trigger bot redirect)
147
- await cdp(['nav', tab, 'https://www.perplexity.ai/'], 35000);
148
- await dismissConsent(tab, cdp);
149
-
150
- // Wait for React app to mount input (up to 8s)
151
- const deadline = Date.now() + 8000;
152
- while (Date.now() < deadline) {
153
- const found = await cdp(['eval', tab, `!!document.querySelector('${S.input}')`]).catch(() => 'false');
154
- if (found === 'true') break;
155
- await new Promise(r => setTimeout(r, 400));
156
- }
157
- await new Promise(r => setTimeout(r, 300));
158
-
159
- await injectClipboardInterceptor(tab);
160
- await cdp(['click', tab, S.input]);
161
- await new Promise(r => setTimeout(r, 400));
162
- await cdp(['type', tab, query]);
163
- await new Promise(r => setTimeout(r, 400));
164
- // Submit with Enter (most reliable across Chrome instances)
165
- await cdp(['eval', tab,
166
- `document.querySelector('${S.input}')?.dispatchEvent(new KeyboardEvent('keydown',{key:'Enter',bubbles:true,keyCode:13})), 'ok'`
167
- ]);
168
-
169
- await waitForCopyButton(tab);
170
-
171
- const { answer, sources } = await extractAnswer(tab);
172
-
173
- if (!answer) throw new Error('No answer extracted Perplexity may not have responded');
174
- const out = short ? answer.slice(0, 300).replace(/\s+\S*$/, '') + '' : answer;
175
-
176
- const finalUrl = await cdp(['eval', tab, 'document.location.href']).catch(() => '');
177
- process.stdout.write(JSON.stringify({ query, url: finalUrl, answer: out, sources }, null, 2) + '\n');
178
- } catch (e) {
179
- process.stderr.write(`Error: ${e.message}\n`);
180
- process.exit(1);
181
- }
182
- }
183
-
184
- main();
1
+ #!/usr/bin/env node
2
+ // extractors/perplexity.mjs
3
+ // Navigate Perplexity, wait for streaming to complete, return clean answer + sources.
4
+ //
5
+ // Usage:
6
+ // node extractors/perplexity.mjs "<query>" [--tab <prefix>]
7
+ //
8
+ // Output (stdout): JSON { answer, sources, query, url }
9
+ // Errors go to stderr only — stdout is always clean JSON for piping.
10
+
11
+ import { readFileSync, existsSync } from 'fs';
12
+ import { spawn } from 'child_process';
13
+ import { tmpdir } from 'os';
14
+ import { join, dirname } from 'path';
15
+ import { fileURLToPath } from 'url';
16
+ import { dismissConsent } from './consent.mjs';
17
+ import { SELECTORS } from './selectors.mjs';
18
+
19
+ const __dir = dirname(fileURLToPath(import.meta.url));
20
+ const CDP = join(__dir, '..', 'cdp.mjs');
21
+ const PAGES_CACHE = `${tmpdir().replace(/\\/g, '/')}/cdp-pages.json`;
22
+
23
+ const COPY_POLL_INTERVAL = 600;
24
+ const COPY_TIMEOUT = 30000;
25
+
26
+ const S = SELECTORS.perplexity;
27
+
28
+ // ---------------------------------------------------------------------------
29
+
30
+ function cdp(args, timeoutMs = 30000) {
31
+ return new Promise((resolve, reject) => {
32
+ const proc = spawn('node', [CDP, ...args], { stdio: ['ignore', 'pipe', 'pipe'] });
33
+ let out = '';
34
+ let err = '';
35
+ proc.stdout.on('data', d => out += d);
36
+ proc.stderr.on('data', d => err += d);
37
+ const timer = setTimeout(() => { proc.kill(); reject(new Error(`cdp timeout: ${args[0]}`)); }, timeoutMs);
38
+ proc.on('close', code => {
39
+ clearTimeout(timer);
40
+ if (code !== 0) reject(new Error(err.trim() || `cdp exit ${code}`));
41
+ else resolve(out.trim());
42
+ });
43
+ });
44
+ }
45
+
46
+ async function getOrOpenTab(tabPrefix) {
47
+ if (tabPrefix) return tabPrefix;
48
+ // Always open a fresh tab to avoid SPA navigation issues
49
+ const list = await cdp(['list']);
50
+ const anchor = list.split('\n')[0]?.slice(0, 8);
51
+ if (!anchor) throw new Error('No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?');
52
+ const raw = await cdp(['evalraw', anchor, 'Target.createTarget', '{"url":"about:blank"}']);
53
+ const { targetId } = JSON.parse(raw);
54
+ await cdp(['list']); // refresh cache so cdp nav can find the new tab
55
+ return targetId.slice(0, 8);
56
+ }
57
+
58
+ async function injectClipboardInterceptor(tab) {
59
+ await cdp(['eval', tab, `
60
+ window.__pplxClipboard = null;
61
+ const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
62
+ navigator.clipboard.writeText = function(text) {
63
+ window.__pplxClipboard = text;
64
+ return _origWriteText(text);
65
+ };
66
+ const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
67
+ navigator.clipboard.write = async function(items) {
68
+ try {
69
+ for (const item of items) {
70
+ if (item.types && item.types.includes('text/plain')) {
71
+ const blob = await item.getType('text/plain');
72
+ window.__pplxClipboard = await blob.text();
73
+ break;
74
+ }
75
+ }
76
+ } catch(e) {}
77
+ return _origWrite(items);
78
+ };
79
+ `]);
80
+ }
81
+
82
+ async function waitForGenerationToFinish(tab) {
83
+ const deadline = Date.now() + COPY_TIMEOUT;
84
+ let lastLen = -1;
85
+ let stableCount = 0;
86
+ while (Date.now() < deadline) {
87
+ await new Promise(r => setTimeout(r, COPY_POLL_INTERVAL));
88
+ const lenStr = await cdp(['eval', tab, 'document.body.innerText.length']).catch(() => '0');
89
+ const currentLen = parseInt(lenStr) || 0;
90
+ if (currentLen > 0) {
91
+ if (currentLen === lastLen) {
92
+ stableCount++;
93
+ if (stableCount >= 3) return;
94
+ } else {
95
+ lastLen = currentLen;
96
+ stableCount = 0;
97
+ }
98
+ }
99
+ }
100
+ throw new Error(`Perplexity generation did not finish within ${COPY_TIMEOUT}ms`);
101
+ }
102
+
103
+ async function extractAnswer(tab) {
104
+ await cdp(['eval', tab, `document.querySelector('${S.copyButton}')?.click()`]);
105
+ await new Promise(r => setTimeout(r, 400));
106
+
107
+ const answer = await cdp(['eval', tab, `window.__pplxClipboard || ''`]);
108
+ if (!answer) throw new Error('Clipboard interceptor returned empty text');
109
+
110
+ // Regex parse Markdown links from clipboard robust against DOM changes
111
+ const sources = Array.from(answer.matchAll(/\[([^\]]+)\]\((https?:\/\/[^\s\)]+)\)/g))
112
+ .map(m => ({ title: m[1], url: m[2] }))
113
+ .filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
114
+ .slice(0, 10);
115
+
116
+ return { answer: answer.trim(), sources };
117
+ }
118
+
119
+ // ---------------------------------------------------------------------------
120
+
121
+ async function main() {
122
+ const args = process.argv.slice(2);
123
+ if (!args.length || args[0] === '--help') {
124
+ process.stderr.write('Usage: node extractors/perplexity.mjs "<query>" [--tab <prefix>]\n');
125
+ process.exit(1);
126
+ }
127
+
128
+ const short = args.includes('--short');
129
+ const rest = args.filter(a => a !== '--short');
130
+ const tabFlagIdx = rest.indexOf('--tab');
131
+ const tabPrefix = tabFlagIdx !== -1 ? rest[tabFlagIdx + 1] : null;
132
+ const query = tabFlagIdx !== -1
133
+ ? rest.filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1).join(' ')
134
+ : rest.join(' ');
135
+
136
+
137
+ try {
138
+ // Refresh page list so cache is current
139
+ await cdp(['list']);
140
+
141
+ const tab = await getOrOpenTab(tabPrefix);
142
+
143
+ // Navigate to homepage and use the search box (direct ?q= URLs trigger bot redirect)
144
+ await cdp(['nav', tab, 'https://www.perplexity.ai/'], 35000);
145
+ await dismissConsent(tab, cdp);
146
+
147
+ // Wait for React app to mount input (up to 8s)
148
+ const deadline = Date.now() + 8000;
149
+ while (Date.now() < deadline) {
150
+ const found = await cdp(['eval', tab, `!!document.querySelector('${S.input}')`]).catch(() => 'false');
151
+ if (found === 'true') break;
152
+ await new Promise(r => setTimeout(r, 400));
153
+ }
154
+ await new Promise(r => setTimeout(r, 300));
155
+
156
+ await injectClipboardInterceptor(tab);
157
+ await cdp(['click', tab, S.input]);
158
+ await new Promise(r => setTimeout(r, 400));
159
+ await cdp(['type', tab, query]);
160
+ await new Promise(r => setTimeout(r, 400));
161
+ // Submit with Enter (most reliable across Chrome instances)
162
+ await cdp(['eval', tab,
163
+ `document.querySelector('${S.input}')?.dispatchEvent(new KeyboardEvent('keydown',{key:'Enter',bubbles:true,keyCode:13})), 'ok'`
164
+ ]);
165
+
166
+ await waitForGenerationToFinish(tab);
167
+
168
+ const { answer, sources } = await extractAnswer(tab);
169
+
170
+ if (!answer) throw new Error('No answer extracted — Perplexity may not have responded');
171
+ const out = short ? answer.slice(0, 300).replace(/\s+\S*$/, '') + '…' : answer;
172
+
173
+ const finalUrl = await cdp(['eval', tab, 'document.location.href']).catch(() => '');
174
+ process.stdout.write(JSON.stringify({ query, url: finalUrl, answer: out, sources }, null, 2) + '\n');
175
+ } catch (e) {
176
+ process.stderr.write(`Error: ${e.message}\n`);
177
+ process.exit(1);
178
+ }
179
+ }
180
+
181
+ main();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@apmantza/greedysearch-pi",
3
- "version": "1.4.0",
3
+ "version": "1.4.2",
4
4
  "description": "Pi extension: browser-automation tool that searches Perplexity, Bing Copilot, and Google AI in parallel, extracts answers and sources via CDP, with optional Gemini synthesis — grounded AI answers from real browser interactions.",
5
5
  "type": "module",
6
6
  "keywords": [
@@ -8,7 +8,7 @@
8
8
  ],
9
9
  "repository": {
10
10
  "type": "git",
11
- "url": "https://github.com/apmantza/GreedySearch-pi"
11
+ "url": "git+https://github.com/apmantza/GreedySearch-pi.git"
12
12
  },
13
13
  "license": "MIT",
14
14
  "pi": {