@apmantza/greedysearch-pi 1.9.2 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +132 -2
- package/README.md +82 -47
- package/bin/cdp.mjs +1153 -1108
- package/bin/launch.mjs +9 -0
- package/bin/search.mjs +318 -81
- package/extractors/bing-copilot.mjs +48 -18
- package/extractors/chatgpt.mjs +553 -0
- package/extractors/common.mjs +213 -22
- package/extractors/consensus.mjs +655 -0
- package/extractors/consent.mjs +182 -18
- package/extractors/gemini.mjs +350 -217
- package/extractors/google-ai.mjs +129 -128
- package/extractors/logically.mjs +629 -0
- package/extractors/perplexity.mjs +547 -217
- package/extractors/selectors.mjs +3 -2
- package/extractors/semantic-scholar.mjs +219 -0
- package/package.json +8 -4
- package/skills/greedy-search/skill.md +20 -12
- package/src/fetcher.mjs +23 -1
- package/src/formatters/results.ts +185 -128
- package/src/search/browser-lifecycle.mjs +27 -5
- package/src/search/challenge-detect.mjs +205 -0
- package/src/search/chrome.mjs +653 -590
- package/src/search/constants.mjs +155 -39
- package/src/search/engines.mjs +114 -76
- package/src/search/fetch-source.mjs +566 -451
- package/src/search/pdf.mjs +68 -0
- package/src/search/progress.mjs +145 -0
- package/src/search/recovery.mjs +73 -45
- package/src/search/research.mjs +1419 -62
- package/src/search/scale-aware.mjs +93 -0
- package/src/search/simple-research.mjs +520 -0
- package/src/search/sources.mjs +52 -22
- package/src/search/synthesis-runner.mjs +105 -26
- package/src/search/synthesis.mjs +286 -246
- package/src/tools/greedy-search-handler.ts +129 -59
- package/src/tools/shared.ts +312 -186
- package/src/types.ts +110 -104
- package/test.mjs +537 -18
|
@@ -1,217 +1,547 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
// extractors/perplexity.mjs
|
|
4
|
-
// Navigate Perplexity, wait for streaming to complete, return clean answer + sources.
|
|
5
|
-
//
|
|
6
|
-
// Usage:
|
|
7
|
-
// node extractors/perplexity.mjs "<query>" [--tab <prefix>]
|
|
8
|
-
//
|
|
9
|
-
// Output (stdout): JSON { answer, sources, query, url }
|
|
10
|
-
// Errors go to stderr only — stdout is always clean JSON for piping.
|
|
11
|
-
//
|
|
12
|
-
// TODO: Refactor - this file has 42 lines duplicated with google-ai.mjs (line 28)
|
|
13
|
-
|
|
14
|
-
import {
|
|
15
|
-
buildEnvelope,
|
|
16
|
-
cdp,
|
|
17
|
-
formatAnswer,
|
|
18
|
-
getOrOpenTab,
|
|
19
|
-
handleError,
|
|
20
|
-
injectClipboardInterceptor,
|
|
21
|
-
jitter,
|
|
22
|
-
outputJson,
|
|
23
|
-
parseArgs,
|
|
24
|
-
parseSourcesFromMarkdown,
|
|
25
|
-
prepareArgs,
|
|
26
|
-
TIMING,
|
|
27
|
-
validateQuery,
|
|
28
|
-
waitForSelector,
|
|
29
|
-
waitForStreamComplete,
|
|
30
|
-
} from "./common.mjs";
|
|
31
|
-
import { dismissConsent, handleVerification } from "./consent.mjs";
|
|
32
|
-
import { SELECTORS } from "./selectors.mjs";
|
|
33
|
-
|
|
34
|
-
const S = SELECTORS.perplexity;
|
|
35
|
-
const GLOBAL_VAR = "__pplxClipboard";
|
|
36
|
-
|
|
37
|
-
// ============================================================================
|
|
38
|
-
// Language-agnostic copy button finder
|
|
39
|
-
// ============================================================================
|
|
40
|
-
|
|
41
|
-
function findCopyButtonJsExpression() {
|
|
42
|
-
// Perplexity uses SVG icons via <use xlink:href="#pplx-icon-copy">
|
|
43
|
-
// This works across all locales since it doesn't depend on aria-label text
|
|
44
|
-
// Use .pop() to get the last matching button (the answer copy button),
|
|
45
|
-
// not the first one which is the question copy button
|
|
46
|
-
return `Array.from(document.querySelectorAll('button')).filter(b => b.innerHTML.includes('#pplx-icon-copy')).pop()`;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
// ============================================================================
|
|
50
|
-
//
|
|
51
|
-
// ============================================================================
|
|
52
|
-
|
|
53
|
-
async function
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// extractors/perplexity.mjs
|
|
4
|
+
// Navigate Perplexity, wait for streaming to complete, return clean answer + sources.
|
|
5
|
+
//
|
|
6
|
+
// Usage:
|
|
7
|
+
// node extractors/perplexity.mjs "<query>" [--tab <prefix>]
|
|
8
|
+
//
|
|
9
|
+
// Output (stdout): JSON { answer, sources, query, url }
|
|
10
|
+
// Errors go to stderr only — stdout is always clean JSON for piping.
|
|
11
|
+
//
|
|
12
|
+
// TODO: Refactor - this file has 42 lines duplicated with google-ai.mjs (line 28)
|
|
13
|
+
|
|
14
|
+
import {
|
|
15
|
+
buildEnvelope,
|
|
16
|
+
cdp,
|
|
17
|
+
formatAnswer,
|
|
18
|
+
getOrOpenTab,
|
|
19
|
+
handleError,
|
|
20
|
+
injectClipboardInterceptor,
|
|
21
|
+
jitter,
|
|
22
|
+
outputJson,
|
|
23
|
+
parseArgs,
|
|
24
|
+
parseSourcesFromMarkdown,
|
|
25
|
+
prepareArgs,
|
|
26
|
+
TIMING,
|
|
27
|
+
validateQuery,
|
|
28
|
+
waitForSelector,
|
|
29
|
+
waitForStreamComplete,
|
|
30
|
+
} from "./common.mjs";
|
|
31
|
+
import { dismissConsent, handleVerification } from "./consent.mjs";
|
|
32
|
+
import { SELECTORS } from "./selectors.mjs";
|
|
33
|
+
|
|
34
|
+
const S = SELECTORS.perplexity;
|
|
35
|
+
const GLOBAL_VAR = "__pplxClipboard";
|
|
36
|
+
|
|
37
|
+
// ============================================================================
|
|
38
|
+
// Language-agnostic copy button finder
|
|
39
|
+
// ============================================================================
|
|
40
|
+
|
|
41
|
+
function findCopyButtonJsExpression() {
|
|
42
|
+
// Perplexity uses SVG icons via <use xlink:href="#pplx-icon-copy">
|
|
43
|
+
// This works across all locales since it doesn't depend on aria-label text
|
|
44
|
+
// Use .pop() to get the last matching button (the answer copy button),
|
|
45
|
+
// not the first one which is the question copy button
|
|
46
|
+
return `Array.from(document.querySelectorAll('button')).filter(b => b.innerHTML.includes('#pplx-icon-copy')).pop()`;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ============================================================================
|
|
50
|
+
// DOM fallback — read answer + sources when clipboard interceptor fails
|
|
51
|
+
// ============================================================================
|
|
52
|
+
|
|
53
|
+
async function extractAnswerFromDom(tab, env) {
|
|
54
|
+
// Heuristic for what counts as a real answer text (not a header stub
|
|
55
|
+
// like "Next.jsReactNext.js"): either substantial (>50 chars) or a
|
|
56
|
+
// short factual answer (>=5 chars and contains a word boundary or
|
|
57
|
+
// punctuation — i.e. it's a word/phrase, not a concatenated string).
|
|
58
|
+
function _looksLikeAnswerText(text) {
|
|
59
|
+
const t = (text || "").trim();
|
|
60
|
+
if (t.length > 50) return true;
|
|
61
|
+
return t.length >= 5 && /\s|[.,!?;:]/.test(t);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// First wait for the page to navigate to a search results URL (perplexity.ai/search/...)
|
|
65
|
+
// The homepage has a sidebar with nav items that would be falsely picked up as the answer.
|
|
66
|
+
const navResult = await cdp(
|
|
67
|
+
[
|
|
68
|
+
"eval",
|
|
69
|
+
tab,
|
|
70
|
+
`new Promise((resolve) => {
|
|
71
|
+
const _deadline = Date.now() + 8000;
|
|
72
|
+
function _checkNav() {
|
|
73
|
+
const url = document.location.href;
|
|
74
|
+
if (url.includes('/search/') || url.includes('/thread/') || url.match(/perplexity.ai\\/[^/]+/)) {
|
|
75
|
+
resolve('navigated');
|
|
76
|
+
} else if (Date.now() < _deadline) {
|
|
77
|
+
setTimeout(_checkNav, 300);
|
|
78
|
+
} else {
|
|
79
|
+
resolve('timeout');
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
_checkNav();
|
|
83
|
+
})`,
|
|
84
|
+
],
|
|
85
|
+
10000,
|
|
86
|
+
).catch(() => "timeout");
|
|
87
|
+
|
|
88
|
+
if (navResult === "timeout") {
|
|
89
|
+
// Page never navigated to a search URL — answer extraction will be unreliable
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Perplexity renders the answer in a prose container after the user message.
|
|
94
|
+
// First wait for the answer to actually appear (up to 5s), then extract it.
|
|
95
|
+
// Note: the looksLikeAnswerText helper is inlined into the browser-side
|
|
96
|
+
// eval string below (it can't reference a Node-side function via template).
|
|
97
|
+
const domExtract = await cdp(
|
|
98
|
+
[
|
|
99
|
+
"eval",
|
|
100
|
+
tab,
|
|
101
|
+
`new Promise((resolve) => {
|
|
102
|
+
const _deadline = Date.now() + 5000;
|
|
103
|
+
function _looksLikeAnswerText(text) {
|
|
104
|
+
const t = (text || '').trim();
|
|
105
|
+
if (t.length > 50) return true;
|
|
106
|
+
return t.length >= 5 && /\\s|[.,!?;:]/.test(t);
|
|
107
|
+
}
|
|
108
|
+
function _tryExtract() {
|
|
109
|
+
try {
|
|
110
|
+
// Strategy 1: Find .prose block that's NOT the question
|
|
111
|
+
// and NOT in the sidebar/nav. The answer is the last .prose
|
|
112
|
+
// that contains substantial text and is in the main content area.
|
|
113
|
+
const proseBlocks = Array.from(document.querySelectorAll('.prose, [class*="prose"]'));
|
|
114
|
+
const candidates = proseBlocks.filter(el => {
|
|
115
|
+
const text = el.innerText?.trim() || '';
|
|
116
|
+
if (!_looksLikeAnswerText(text)) return false;
|
|
117
|
+
// Exclude sidebar/nav (they're usually in <nav> or <aside> or have specific classes)
|
|
118
|
+
if (el.closest('nav, aside, [role="navigation"], [class*="sidebar"], [class*="nav-"]')) return false;
|
|
119
|
+
return true;
|
|
120
|
+
});
|
|
121
|
+
if (candidates.length > 0) {
|
|
122
|
+
const last = candidates[candidates.length - 1];
|
|
123
|
+
return resolve(JSON.stringify({ answer: last.innerText.trim(), method: 'prose' }));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Strategy 2: Look for the answer container by data attributes
|
|
127
|
+
// Perplexity uses [data-testid*="answer"] or [class*="answer-content"]
|
|
128
|
+
const answerContainer = document.querySelector('[data-testid*="answer"], [class*="answer-content"], [class*="response-content"]');
|
|
129
|
+
if (answerContainer && _looksLikeAnswerText(answerContainer.innerText?.trim())) {
|
|
130
|
+
return resolve(JSON.stringify({ answer: answerContainer.innerText.trim(), method: 'answer-container' }));
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Strategy 3: Find the largest text block in the main content area
|
|
134
|
+
// (not in nav/aside/sidebar), positioned after the input.
|
|
135
|
+
const input = document.querySelector('${S.input}');
|
|
136
|
+
if (!input) return resolve(null);
|
|
137
|
+
const inputRect = input.getBoundingClientRect();
|
|
138
|
+
const main = document.querySelector('main, [role="main"], [class*="main-content"]') || document.body;
|
|
139
|
+
const blocks = Array.from(main.querySelectorAll('div, article, section'))
|
|
140
|
+
.filter(d => {
|
|
141
|
+
const r = d.getBoundingClientRect();
|
|
142
|
+
if (r.top <= inputRect.bottom) return false; // not below input
|
|
143
|
+
if (r.width === 0 || r.height === 0) return false; // not visible
|
|
144
|
+
if (d.closest('nav, aside, [role="navigation"], [class*="sidebar"]')) return false; // not in nav
|
|
145
|
+
const text = d.innerText?.trim() || '';
|
|
146
|
+
return _looksLikeAnswerText(text) && d.children.length < 20;
|
|
147
|
+
})
|
|
148
|
+
.sort((a, b) => (b.innerText?.length || 0) - (a.innerText?.length || 0));
|
|
149
|
+
if (blocks.length > 0) {
|
|
150
|
+
return resolve(JSON.stringify({ answer: blocks[0].innerText.trim(), method: 'main-content' }));
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Retry if we haven't found anything yet
|
|
154
|
+
if (Date.now() < _deadline) {
|
|
155
|
+
setTimeout(_tryExtract, 400);
|
|
156
|
+
} else {
|
|
157
|
+
resolve(null);
|
|
158
|
+
}
|
|
159
|
+
} catch(e) { resolve(null); }
|
|
160
|
+
}
|
|
161
|
+
_tryExtract();
|
|
162
|
+
})`,
|
|
163
|
+
],
|
|
164
|
+
8000,
|
|
165
|
+
).catch(() => null);
|
|
166
|
+
|
|
167
|
+
if (!domExtract || domExtract === "null") return null;
|
|
168
|
+
|
|
169
|
+
try {
|
|
170
|
+
const { answer, method } = JSON.parse(domExtract);
|
|
171
|
+
if (answer && _looksLikeAnswerText(answer)) {
|
|
172
|
+
env.fallbackUsed = `dom:${method}`;
|
|
173
|
+
env.clipboardEmpty = true;
|
|
174
|
+
// Try to extract sources from links near the answer
|
|
175
|
+
const sourcesExtract = await cdp(
|
|
176
|
+
[
|
|
177
|
+
"eval",
|
|
178
|
+
tab,
|
|
179
|
+
`(() => {
|
|
180
|
+
const links = Array.from(document.querySelectorAll('a[href^="https://"]'))
|
|
181
|
+
.filter(a => {
|
|
182
|
+
const href = a.href || '';
|
|
183
|
+
return !href.includes('perplexity.ai') && !href.includes('google.com') && !href.includes('gstatic');
|
|
184
|
+
})
|
|
185
|
+
.slice(0, 10)
|
|
186
|
+
.map(a => ({ title: a.innerText?.trim() || a.href, url: a.href }));
|
|
187
|
+
return JSON.stringify(links);
|
|
188
|
+
})()`,
|
|
189
|
+
],
|
|
190
|
+
3000,
|
|
191
|
+
).catch(() => "[]");
|
|
192
|
+
let sources = [];
|
|
193
|
+
try {
|
|
194
|
+
sources = JSON.parse(sourcesExtract || "[]");
|
|
195
|
+
} catch {}
|
|
196
|
+
return { answer, sources };
|
|
197
|
+
}
|
|
198
|
+
} catch {}
|
|
199
|
+
return null;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// ============================================================================
|
|
203
|
+
// Extraction
|
|
204
|
+
// ============================================================================
|
|
205
|
+
|
|
206
|
+
async function extractAnswer(tab, env) {
|
|
207
|
+
const copyBtnExpr = findCopyButtonJsExpression();
|
|
208
|
+
|
|
209
|
+
await cdp(["eval", tab, `${copyBtnExpr}?.click()`]);
|
|
210
|
+
await new Promise((r) => setTimeout(r, 400));
|
|
211
|
+
|
|
212
|
+
let answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
|
|
213
|
+
env.clipboardEmpty = !answer;
|
|
214
|
+
|
|
215
|
+
// Retry once if clipboard is empty (Perplexity might be slow to write)
|
|
216
|
+
if (!answer) {
|
|
217
|
+
console.error("[perplexity] Clipboard empty, retrying in 2s...");
|
|
218
|
+
await cdp(["eval", tab, `${copyBtnExpr}?.click()`]);
|
|
219
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
220
|
+
answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
|
|
221
|
+
env.clipboardEmpty = !answer;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Reject suspicious answers: the user's query echoed back, or a copy
|
|
225
|
+
// button click that landed on the question (not the answer) copy
|
|
226
|
+
// button. Both manifest as a clipboard write that contains the query
|
|
227
|
+
// text — the old path treated it as a valid answer and the synthesis
|
|
228
|
+
// would silently include a paraphrased-query result.
|
|
229
|
+
if (env.query && answer) {
|
|
230
|
+
const queryNorm = env.query.toLowerCase().trim();
|
|
231
|
+
const answerNorm = answer.toLowerCase().trim();
|
|
232
|
+
if (
|
|
233
|
+
answerNorm === queryNorm ||
|
|
234
|
+
answer.trim().length < Math.max(20, queryNorm.length * 0.5)
|
|
235
|
+
) {
|
|
236
|
+
console.error(
|
|
237
|
+
`[perplexity] Clipboard contains query echo or stub (${answer.length} chars), retrying with longer wait...`,
|
|
238
|
+
);
|
|
239
|
+
env.clipboardEmpty = true;
|
|
240
|
+
answer = "";
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// DOM fallback: when clipboard interception fails (intermittent in headless),
|
|
245
|
+
// read the answer from the page DOM instead of triggering visible recovery.
|
|
246
|
+
if (!answer) {
|
|
247
|
+
console.error("[perplexity] Clipboard empty — trying DOM fallback...");
|
|
248
|
+
const domResult = await extractAnswerFromDom(tab, env);
|
|
249
|
+
if (domResult) {
|
|
250
|
+
console.error(
|
|
251
|
+
`[perplexity] DOM fallback succeeded (${env.fallbackUsed})`,
|
|
252
|
+
);
|
|
253
|
+
return domResult;
|
|
254
|
+
}
|
|
255
|
+
throw new Error("Clipboard interceptor returned empty text");
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const sources = parseSourcesFromMarkdown(answer);
|
|
259
|
+
return { answer: answer.trim(), sources };
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// ============================================================================
|
|
263
|
+
// Main
|
|
264
|
+
// ============================================================================
|
|
265
|
+
|
|
266
|
+
const USAGE =
|
|
267
|
+
'Usage: node extractors/perplexity.mjs "<query>" [--tab <prefix>]\n';
|
|
268
|
+
|
|
269
|
+
async function main() {
|
|
270
|
+
const args = await prepareArgs(process.argv.slice(2));
|
|
271
|
+
validateQuery(args, USAGE);
|
|
272
|
+
|
|
273
|
+
const { query, tabPrefix, short } = parseArgs(args);
|
|
274
|
+
const startTime = Date.now();
|
|
275
|
+
const mode =
|
|
276
|
+
process.env.GREEDY_SEARCH_VISIBLE === "1" ? "visible" : "headless";
|
|
277
|
+
|
|
278
|
+
const env = {
|
|
279
|
+
engine: "perplexity",
|
|
280
|
+
mode,
|
|
281
|
+
clipboardEmpty: null,
|
|
282
|
+
fallbackUsed: null,
|
|
283
|
+
blockedBy: null,
|
|
284
|
+
verificationResult: null,
|
|
285
|
+
inputReady: null,
|
|
286
|
+
// Carry the original query into extractAnswer so it can reject
|
|
287
|
+
// answers that look like query-echo (a copy button click on the
|
|
288
|
+
// question's icon instead of the answer's) without needing to
|
|
289
|
+
// thread query through every helper.
|
|
290
|
+
query,
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
try {
|
|
294
|
+
// Only refresh page list when creating a fresh tab (no prefix provided)
|
|
295
|
+
if (!tabPrefix) await cdp(["list"]);
|
|
296
|
+
|
|
297
|
+
const tab = await getOrOpenTab(tabPrefix);
|
|
298
|
+
|
|
299
|
+
// Skip navigation if already on Perplexity domain (tab was seeded by search.mjs)
|
|
300
|
+
const currentUrl = await cdp(["eval", tab, "document.location.href"]).catch(
|
|
301
|
+
() => "",
|
|
302
|
+
);
|
|
303
|
+
let onPerplexity = false;
|
|
304
|
+
try {
|
|
305
|
+
const host = new URL(currentUrl).hostname.toLowerCase();
|
|
306
|
+
onPerplexity =
|
|
307
|
+
host === "perplexity.ai" || host.endsWith(".perplexity.ai");
|
|
308
|
+
} catch {}
|
|
309
|
+
|
|
310
|
+
if (!onPerplexity) {
|
|
311
|
+
await cdp(["nav", tab, "https://www.perplexity.ai/"], 20000);
|
|
312
|
+
// Wait for the React app to hydrate and make the input visible.
|
|
313
|
+
// In all-mode under CDP contention, the input element exists but
|
|
314
|
+
// its first 5 parent DIVs have visibility:hidden — focus()
|
|
315
|
+
// silently fails. Force the parents to visibility:visible, then
|
|
316
|
+
// poll up to 15s for the input to be focusable.
|
|
317
|
+
const _inputReady = await cdp(
|
|
318
|
+
[
|
|
319
|
+
"eval",
|
|
320
|
+
tab,
|
|
321
|
+
`new Promise((resolve) => {
|
|
322
|
+
const _deadline = Date.now() + 15000;
|
|
323
|
+
function _check() {
|
|
324
|
+
const input = document.querySelector('${S.input}');
|
|
325
|
+
if (input) {
|
|
326
|
+
// Force visibility on all parents up to body —
|
|
327
|
+
// Perplexity hides the first 5 wrapper DIVs until
|
|
328
|
+
// the user interacts with the page
|
|
329
|
+
let el = input;
|
|
330
|
+
while (el && el !== document.body) {
|
|
331
|
+
if (window.getComputedStyle(el).visibility === 'hidden') {
|
|
332
|
+
el.style.visibility = 'visible';
|
|
333
|
+
}
|
|
334
|
+
el = el.parentElement;
|
|
335
|
+
}
|
|
336
|
+
input.focus();
|
|
337
|
+
if (document.activeElement === input) return resolve('ready');
|
|
338
|
+
}
|
|
339
|
+
if (Date.now() < _deadline) setTimeout(_check, 500);
|
|
340
|
+
else resolve('timeout');
|
|
341
|
+
}
|
|
342
|
+
_check();
|
|
343
|
+
})`,
|
|
344
|
+
],
|
|
345
|
+
18000,
|
|
346
|
+
).catch(() => "timeout");
|
|
347
|
+
if (_inputReady !== "ready") {
|
|
348
|
+
// Retry navigation up to 2 more times — the first nav may have
|
|
349
|
+
// been preempted by CDP contention in all-mode
|
|
350
|
+
for (let retry = 0; retry < 2; retry++) {
|
|
351
|
+
await cdp(["nav", tab, "https://www.perplexity.ai/"], 20000);
|
|
352
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
353
|
+
const _retryReady = await cdp(
|
|
354
|
+
[
|
|
355
|
+
"eval",
|
|
356
|
+
tab,
|
|
357
|
+
`(() => {
|
|
358
|
+
const input = document.querySelector('${S.input}');
|
|
359
|
+
if (!input) return false;
|
|
360
|
+
let el = input;
|
|
361
|
+
while (el && el !== document.body) {
|
|
362
|
+
if (window.getComputedStyle(el).visibility === 'hidden') {
|
|
363
|
+
el.style.visibility = 'visible';
|
|
364
|
+
}
|
|
365
|
+
el = el.parentElement;
|
|
366
|
+
}
|
|
367
|
+
input.focus();
|
|
368
|
+
return document.activeElement === input;
|
|
369
|
+
})()`,
|
|
370
|
+
],
|
|
371
|
+
5000,
|
|
372
|
+
).catch(() => false);
|
|
373
|
+
if (_retryReady === "true") break;
|
|
374
|
+
}
|
|
375
|
+
} else {
|
|
376
|
+
await new Promise((r) => setTimeout(r, 600));
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
// Handle verification challenges (Cloudflare Turnstile, etc.)
|
|
380
|
+
const verifyResult = await handleVerification(tab, cdp, 10000);
|
|
381
|
+
env.verificationResult = verifyResult;
|
|
382
|
+
if (verifyResult === "needs-human") {
|
|
383
|
+
throw new Error(
|
|
384
|
+
"Perplexity verification required — please solve it manually in the browser window",
|
|
385
|
+
);
|
|
386
|
+
}
|
|
387
|
+
await dismissConsent(tab, cdp);
|
|
388
|
+
|
|
389
|
+
// After verification, page may have redirected — wait for it to settle
|
|
390
|
+
// then re-navigate to homepage if we ended up somewhere else.
|
|
391
|
+
if (verifyResult === "clicked") {
|
|
392
|
+
await new Promise((r) => setTimeout(r, TIMING.afterVerify));
|
|
393
|
+
const postVerifyUrl = await cdp([
|
|
394
|
+
"eval",
|
|
395
|
+
tab,
|
|
396
|
+
"document.location.href",
|
|
397
|
+
]).catch(() => "");
|
|
398
|
+
let onPerplexityAfter = false;
|
|
399
|
+
try {
|
|
400
|
+
const host = new URL(postVerifyUrl).hostname.toLowerCase();
|
|
401
|
+
onPerplexityAfter =
|
|
402
|
+
host === "perplexity.ai" || host.endsWith(".perplexity.ai");
|
|
403
|
+
} catch {}
|
|
404
|
+
if (!onPerplexityAfter) {
|
|
405
|
+
await cdp(["nav", tab, "https://www.perplexity.ai/"], 20000);
|
|
406
|
+
await new Promise((r) => setTimeout(r, 800));
|
|
407
|
+
await dismissConsent(tab, cdp);
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// Wait for React app to mount input (up to 15s — gives CF redirect + hydration time)
|
|
412
|
+
// Note: we no longer fast-fail on Cloudflare detection here because the
|
|
413
|
+
// new CDP-pierce + browser-level-click path in handleVerification can
|
|
414
|
+
// auto-clear the Turnstile checkbox from a fresh headless session. The
|
|
415
|
+
// downstream handleVerification() call will either click through or
|
|
416
|
+
// surface needs-human; let it run.
|
|
417
|
+
const inputReady = await waitForSelector(tab, S.input, 15000, 400);
|
|
418
|
+
env.inputReady = inputReady;
|
|
419
|
+
|
|
420
|
+
if (!inputReady) {
|
|
421
|
+
throw new Error(
|
|
422
|
+
"Perplexity input not found — page may not have loaded or is in unexpected state",
|
|
423
|
+
);
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
await new Promise((r) => setTimeout(r, jitter(300)));
|
|
427
|
+
|
|
428
|
+
await injectClipboardInterceptor(tab, GLOBAL_VAR);
|
|
429
|
+
await cdp(["click", tab, S.input]);
|
|
430
|
+
await new Promise((r) => setTimeout(r, jitter(400)));
|
|
431
|
+
|
|
432
|
+
// Type via execCommand + focus. This triggers React's onChange
|
|
433
|
+
// (via the synthetic input event) in a way that Input.insertText
|
|
434
|
+
// cannot — Input.insertText sends raw text but doesn't dispatch
|
|
435
|
+
// the events that React's controlled-input system listens for.
|
|
436
|
+
// Causes the query to not register in all-mode under CDP contention.
|
|
437
|
+
// Retry up to 3 times — execCommand can fail if the input isn't
|
|
438
|
+
// fully focused yet (common under CDP contention in all-mode).
|
|
439
|
+
let typeResult;
|
|
440
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
441
|
+
typeResult = await cdp(
|
|
442
|
+
[
|
|
443
|
+
"eval",
|
|
444
|
+
tab,
|
|
445
|
+
`(() => {
|
|
446
|
+
try {
|
|
447
|
+
const input = document.querySelector('${S.input}');
|
|
448
|
+
if (!input) return 'no-input';
|
|
449
|
+
input.focus();
|
|
450
|
+
if (document.activeElement !== input) {
|
|
451
|
+
const activeTag = document.activeElement?.tagName || 'none';
|
|
452
|
+
const activeClass = (document.activeElement?.className || '').slice(0, 80);
|
|
453
|
+
return 'not-focused:active=' + activeTag + '.' + activeClass;
|
|
454
|
+
}
|
|
455
|
+
// execCommand('insertText') dispatches the proper input
|
|
456
|
+
// event that React's onChange listens for
|
|
457
|
+
const ok = document.execCommand('insertText', false, ${JSON.stringify(query)});
|
|
458
|
+
return ok ? 'ok' : 'exec-failed';
|
|
459
|
+
} catch (e) { return 'err:' + e.message; }
|
|
460
|
+
})()`,
|
|
461
|
+
],
|
|
462
|
+
5000,
|
|
463
|
+
);
|
|
464
|
+
if (typeResult === "ok") break;
|
|
465
|
+
// On not-focused, try clicking the input first to force focus
|
|
466
|
+
if (String(typeResult).startsWith("not-focused")) {
|
|
467
|
+
await cdp(["click", tab, S.input]).catch(() => {});
|
|
468
|
+
}
|
|
469
|
+
await new Promise((r) => setTimeout(r, 800));
|
|
470
|
+
}
|
|
471
|
+
if (typeResult !== "ok") {
|
|
472
|
+
throw new Error(`Perplexity type failed: ${typeResult}`);
|
|
473
|
+
}
|
|
474
|
+
await new Promise((r) => setTimeout(r, jitter(400)));
|
|
475
|
+
|
|
476
|
+
// Submit with Enter — use a real KeyboardEvent on the input so React's
|
|
477
|
+
// keydown handler fires. keyCode:13 is needed for compatibility.
|
|
478
|
+
await cdp([
|
|
479
|
+
"eval",
|
|
480
|
+
tab,
|
|
481
|
+
`(() => {
|
|
482
|
+
const input = document.querySelector('${S.input}');
|
|
483
|
+
if (!input) return 'no-input';
|
|
484
|
+
input.focus();
|
|
485
|
+
const ev = new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', keyCode: 13, which: 13, bubbles: true, cancelable: true });
|
|
486
|
+
input.dispatchEvent(ev);
|
|
487
|
+
return 'ok';
|
|
488
|
+
})()`,
|
|
489
|
+
]);
|
|
490
|
+
|
|
491
|
+
await waitForStreamComplete(tab, {
|
|
492
|
+
timeout: 20000,
|
|
493
|
+
interval: 600,
|
|
494
|
+
stableRounds: 5,
|
|
495
|
+
minLength: 50,
|
|
496
|
+
selector: "document.body",
|
|
497
|
+
});
|
|
498
|
+
|
|
499
|
+
// Detect Perplexity's free-search-limit wall. Shown as a [dialog]
|
|
500
|
+
// in the accessibility tree after hitting the rate limit. The wall
|
|
501
|
+
// text is localized (Greek, English, etc.) so we detect the
|
|
502
|
+
// structural [dialog] marker combined with the Upgrade button
|
|
503
|
+
// (αναβάθμιση/upgrade/Pro). Visible-mode cookies can't bypass
|
|
504
|
+
// this — it's account-level, not session-level.
|
|
505
|
+
if (process.env.GREEDY_SEARCH_HEADLESS === "1") {
|
|
506
|
+
const postSnap = await cdp(["snap", tab]).catch(() => "");
|
|
507
|
+
// [dialog] + upgrade-related button + no answer prose = rate-limit wall
|
|
508
|
+
if (
|
|
509
|
+
/\[dialog\]/i.test(postSnap) &&
|
|
510
|
+
/Pro|αναβάθμιση|upgrade/i.test(postSnap) &&
|
|
511
|
+
!/\.prose|\[article\]/i.test(postSnap)
|
|
512
|
+
) {
|
|
513
|
+
console.error(
|
|
514
|
+
"[perplexity] Rate Limited — skipping (visible retry won't help)",
|
|
515
|
+
);
|
|
516
|
+
env.blockedBy = "rate-limit";
|
|
517
|
+
throw new Error(
|
|
518
|
+
"Rate Limited — Perplexity free search limit reached. Wait a few hours.",
|
|
519
|
+
);
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
const { answer, sources } = await extractAnswer(tab, env);
|
|
524
|
+
|
|
525
|
+
if (!answer)
|
|
526
|
+
throw new Error(
|
|
527
|
+
"No answer extracted — Perplexity may not have responded",
|
|
528
|
+
);
|
|
529
|
+
|
|
530
|
+
const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
|
|
531
|
+
() => "",
|
|
532
|
+
);
|
|
533
|
+
env.durationMs = Date.now() - startTime;
|
|
534
|
+
outputJson({
|
|
535
|
+
query,
|
|
536
|
+
url: finalUrl,
|
|
537
|
+
answer: formatAnswer(answer, short),
|
|
538
|
+
sources,
|
|
539
|
+
_envelope: buildEnvelope(env),
|
|
540
|
+
});
|
|
541
|
+
} catch (e) {
|
|
542
|
+
env.durationMs = Date.now() - startTime;
|
|
543
|
+
handleError(e, buildEnvelope(env));
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
main();
|