@apmantza/greedysearch-pi 2.0.0 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,217 +1,547 @@
1
- #!/usr/bin/env node
2
-
3
- // extractors/perplexity.mjs
4
- // Navigate Perplexity, wait for streaming to complete, return clean answer + sources.
5
- //
6
- // Usage:
7
- // node extractors/perplexity.mjs "<query>" [--tab <prefix>]
8
- //
9
- // Output (stdout): JSON { answer, sources, query, url }
10
- // Errors go to stderr only — stdout is always clean JSON for piping.
11
- //
12
- // TODO: Refactor - this file has 42 lines duplicated with google-ai.mjs (line 28)
13
-
14
- import {
15
- buildEnvelope,
16
- cdp,
17
- formatAnswer,
18
- getOrOpenTab,
19
- handleError,
20
- injectClipboardInterceptor,
21
- jitter,
22
- outputJson,
23
- parseArgs,
24
- parseSourcesFromMarkdown,
25
- prepareArgs,
26
- TIMING,
27
- validateQuery,
28
- waitForSelector,
29
- waitForStreamComplete,
30
- } from "./common.mjs";
31
- import { dismissConsent, handleVerification } from "./consent.mjs";
32
- import { SELECTORS } from "./selectors.mjs";
33
-
34
- const S = SELECTORS.perplexity;
35
- const GLOBAL_VAR = "__pplxClipboard";
36
-
37
- // ============================================================================
38
- // Language-agnostic copy button finder
39
- // ============================================================================
40
-
41
- function findCopyButtonJsExpression() {
42
- // Perplexity uses SVG icons via <use xlink:href="#pplx-icon-copy">
43
- // This works across all locales since it doesn't depend on aria-label text
44
- // Use .pop() to get the last matching button (the answer copy button),
45
- // not the first one which is the question copy button
46
- return `Array.from(document.querySelectorAll('button')).filter(b => b.innerHTML.includes('#pplx-icon-copy')).pop()`;
47
- }
48
-
49
- // ============================================================================
50
- // Extraction
51
- // ============================================================================
52
-
53
- async function extractAnswer(tab, env) {
54
- const copyBtnExpr = findCopyButtonJsExpression();
55
-
56
- await cdp(["eval", tab, `${copyBtnExpr}?.click()`]);
57
- await new Promise((r) => setTimeout(r, 400));
58
-
59
- let answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
60
- env.clipboardEmpty = !answer;
61
-
62
- // Retry once if clipboard is empty (Perplexity might be slow to write)
63
- if (!answer) {
64
- console.error("[perplexity] Clipboard empty, retrying in 2s...");
65
- await cdp(["eval", tab, `${copyBtnExpr}?.click()`]);
66
- await new Promise((r) => setTimeout(r, 2000));
67
- answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
68
- env.clipboardEmpty = !answer;
69
- }
70
-
71
- if (!answer) throw new Error("Clipboard interceptor returned empty text");
72
-
73
- const sources = parseSourcesFromMarkdown(answer);
74
- return { answer: answer.trim(), sources };
75
- }
76
-
77
- // ============================================================================
78
- // Main
79
- // ============================================================================
80
-
81
- const USAGE =
82
- 'Usage: node extractors/perplexity.mjs "<query>" [--tab <prefix>]\n';
83
-
84
- async function main() {
85
- const args = await prepareArgs(process.argv.slice(2));
86
- validateQuery(args, USAGE);
87
-
88
- const { query, tabPrefix, short } = parseArgs(args);
89
- const startTime = Date.now();
90
- const mode =
91
- process.env.GREEDY_SEARCH_VISIBLE === "1" ? "visible" : "headless";
92
-
93
- const env = {
94
- engine: "perplexity",
95
- mode,
96
- clipboardEmpty: null,
97
- fallbackUsed: null,
98
- blockedBy: null,
99
- verificationResult: null,
100
- inputReady: null,
101
- };
102
-
103
- try {
104
- // Only refresh page list when creating a fresh tab (no prefix provided)
105
- if (!tabPrefix) await cdp(["list"]);
106
-
107
- const tab = await getOrOpenTab(tabPrefix);
108
-
109
- // Skip navigation if already on Perplexity domain (tab was seeded by search.mjs)
110
- const currentUrl = await cdp(["eval", tab, "document.location.href"]).catch(
111
- () => "",
112
- );
113
- let onPerplexity = false;
114
- try {
115
- const host = new URL(currentUrl).hostname.toLowerCase();
116
- onPerplexity =
117
- host === "perplexity.ai" || host.endsWith(".perplexity.ai");
118
- } catch {}
119
-
120
- if (!onPerplexity) {
121
- await cdp(["nav", tab, "https://www.perplexity.ai/"], 20000);
122
- await new Promise((r) => setTimeout(r, 800));
123
- }
124
- // Handle verification challenges (Cloudflare Turnstile, etc.)
125
- const verifyResult = await handleVerification(tab, cdp, 10000);
126
- env.verificationResult = verifyResult;
127
- if (verifyResult === "needs-human") {
128
- throw new Error(
129
- "Perplexity verification required please solve it manually in the browser window",
130
- );
131
- }
132
- await dismissConsent(tab, cdp);
133
-
134
- // After verification, page may have redirected — wait for it to settle
135
- // then re-navigate to homepage if we ended up somewhere else.
136
- if (verifyResult === "clicked") {
137
- await new Promise((r) => setTimeout(r, TIMING.afterVerify));
138
- const postVerifyUrl = await cdp(["eval", tab, "document.location.href"]).catch(() => "");
139
- let onPerplexityAfter = false;
140
- try {
141
- const host = new URL(postVerifyUrl).hostname.toLowerCase();
142
- onPerplexityAfter = host === "perplexity.ai" || host.endsWith(".perplexity.ai");
143
- } catch {}
144
- if (!onPerplexityAfter) {
145
- await cdp(["nav", tab, "https://www.perplexity.ai/"], 20000);
146
- await new Promise((r) => setTimeout(r, 800));
147
- await dismissConsent(tab, cdp);
148
- }
149
- }
150
-
151
- // In headless mode: snap the accessibility tree to detect Cloudflare
152
- // before burning the selector wait. Perplexity is CF-protected in headless
153
- // just like Bing fast-fail triggers the visible retry.
154
- if (process.env.GREEDY_SEARCH_HEADLESS === "1") {
155
- const snap = await cdp(["snap", tab]).catch(() => "");
156
- if (/cloudflare|challenge|security check/i.test(snap)) {
157
- console.error("[perplexity] Cloudflare challenge in snap — fast-failing to visible retry");
158
- env.blockedBy = "cloudflare";
159
- throw new Error("Cloudflare challenge detected — headless blocked");
160
- }
161
- }
162
-
163
- // Wait for React app to mount input (up to 15s — gives CF redirect + hydration time)
164
- const inputReady = await waitForSelector(tab, S.input, 15000, 400);
165
- env.inputReady = inputReady;
166
-
167
- if (!inputReady) {
168
- throw new Error("Perplexity input not found — page may not have loaded or is in unexpected state");
169
- }
170
-
171
- await new Promise((r) => setTimeout(r, jitter(300)));
172
-
173
- await injectClipboardInterceptor(tab, GLOBAL_VAR);
174
- await cdp(["click", tab, S.input]);
175
- await new Promise((r) => setTimeout(r, jitter(400)));
176
- await cdp(["type", tab, query]);
177
- await new Promise((r) => setTimeout(r, jitter(400)));
178
-
179
- // Submit with Enter (most reliable across Chrome instances)
180
- await cdp([
181
- "eval",
182
- tab,
183
- `document.querySelector('${S.input}')?.dispatchEvent(new KeyboardEvent('keydown',{key:'Enter',bubbles:true,keyCode:13})), 'ok'`,
184
- ]);
185
-
186
- await waitForStreamComplete(tab, {
187
- timeout: 20000,
188
- interval: 600,
189
- stableRounds: 3,
190
- selector: "document.body",
191
- });
192
-
193
- const { answer, sources } = await extractAnswer(tab, env);
194
-
195
- if (!answer)
196
- throw new Error(
197
- "No answer extracted — Perplexity may not have responded",
198
- );
199
-
200
- const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
201
- () => "",
202
- );
203
- env.durationMs = Date.now() - startTime;
204
- outputJson({
205
- query,
206
- url: finalUrl,
207
- answer: formatAnswer(answer, short),
208
- sources,
209
- _envelope: buildEnvelope(env),
210
- });
211
- } catch (e) {
212
- env.durationMs = Date.now() - startTime;
213
- handleError(e, buildEnvelope(env));
214
- }
215
- }
216
-
217
- main();
1
+ #!/usr/bin/env node
2
+
3
+ // extractors/perplexity.mjs
4
+ // Navigate Perplexity, wait for streaming to complete, return clean answer + sources.
5
+ //
6
+ // Usage:
7
+ // node extractors/perplexity.mjs "<query>" [--tab <prefix>]
8
+ //
9
+ // Output (stdout): JSON { answer, sources, query, url }
10
+ // Errors go to stderr only — stdout is always clean JSON for piping.
11
+ //
12
+ // TODO: Refactor - this file has 42 lines duplicated with google-ai.mjs (line 28)
13
+
14
+ import {
15
+ buildEnvelope,
16
+ cdp,
17
+ formatAnswer,
18
+ getOrOpenTab,
19
+ handleError,
20
+ injectClipboardInterceptor,
21
+ jitter,
22
+ outputJson,
23
+ parseArgs,
24
+ parseSourcesFromMarkdown,
25
+ prepareArgs,
26
+ TIMING,
27
+ validateQuery,
28
+ waitForSelector,
29
+ waitForStreamComplete,
30
+ } from "./common.mjs";
31
+ import { dismissConsent, handleVerification } from "./consent.mjs";
32
+ import { SELECTORS } from "./selectors.mjs";
33
+
34
+ const S = SELECTORS.perplexity;
35
+ const GLOBAL_VAR = "__pplxClipboard";
36
+
37
+ // ============================================================================
38
+ // Language-agnostic copy button finder
39
+ // ============================================================================
40
+
41
+ function findCopyButtonJsExpression() {
42
+ // Perplexity uses SVG icons via <use xlink:href="#pplx-icon-copy">
43
+ // This works across all locales since it doesn't depend on aria-label text
44
+ // Use .pop() to get the last matching button (the answer copy button),
45
+ // not the first one which is the question copy button
46
+ return `Array.from(document.querySelectorAll('button')).filter(b => b.innerHTML.includes('#pplx-icon-copy')).pop()`;
47
+ }
48
+
49
+ // ============================================================================
50
+ // DOM fallback — read answer + sources when clipboard interceptor fails
51
+ // ============================================================================
52
+
53
+ async function extractAnswerFromDom(tab, env) {
54
+ // Heuristic for what counts as a real answer text (not a header stub
55
+ // like "Next.jsReactNext.js"): either substantial (>50 chars) or a
56
+ // short factual answer (>=5 chars and contains a word boundary or
57
+ // punctuation i.e. it's a word/phrase, not a concatenated string).
58
+ function _looksLikeAnswerText(text) {
59
+ const t = (text || "").trim();
60
+ if (t.length > 50) return true;
61
+ return t.length >= 5 && /\s|[.,!?;:]/.test(t);
62
+ }
63
+
64
+ // First wait for the page to navigate to a search results URL (perplexity.ai/search/...)
65
+ // The homepage has a sidebar with nav items that would be falsely picked up as the answer.
66
+ const navResult = await cdp(
67
+ [
68
+ "eval",
69
+ tab,
70
+ `new Promise((resolve) => {
71
+ const _deadline = Date.now() + 8000;
72
+ function _checkNav() {
73
+ const url = document.location.href;
74
+ if (url.includes('/search/') || url.includes('/thread/') || url.match(/perplexity.ai\\/[^/]+/)) {
75
+ resolve('navigated');
76
+ } else if (Date.now() < _deadline) {
77
+ setTimeout(_checkNav, 300);
78
+ } else {
79
+ resolve('timeout');
80
+ }
81
+ }
82
+ _checkNav();
83
+ })`,
84
+ ],
85
+ 10000,
86
+ ).catch(() => "timeout");
87
+
88
+ if (navResult === "timeout") {
89
+ // Page never navigated to a search URL — answer extraction will be unreliable
90
+ return null;
91
+ }
92
+
93
+ // Perplexity renders the answer in a prose container after the user message.
94
+ // First wait for the answer to actually appear (up to 5s), then extract it.
95
+ // Note: the looksLikeAnswerText helper is inlined into the browser-side
96
+ // eval string below (it can't reference a Node-side function via template).
97
+ const domExtract = await cdp(
98
+ [
99
+ "eval",
100
+ tab,
101
+ `new Promise((resolve) => {
102
+ const _deadline = Date.now() + 5000;
103
+ function _looksLikeAnswerText(text) {
104
+ const t = (text || '').trim();
105
+ if (t.length > 50) return true;
106
+ return t.length >= 5 && /\\s|[.,!?;:]/.test(t);
107
+ }
108
+ function _tryExtract() {
109
+ try {
110
+ // Strategy 1: Find .prose block that's NOT the question
111
+ // and NOT in the sidebar/nav. The answer is the last .prose
112
+ // that contains substantial text and is in the main content area.
113
+ const proseBlocks = Array.from(document.querySelectorAll('.prose, [class*="prose"]'));
114
+ const candidates = proseBlocks.filter(el => {
115
+ const text = el.innerText?.trim() || '';
116
+ if (!_looksLikeAnswerText(text)) return false;
117
+ // Exclude sidebar/nav (they're usually in <nav> or <aside> or have specific classes)
118
+ if (el.closest('nav, aside, [role="navigation"], [class*="sidebar"], [class*="nav-"]')) return false;
119
+ return true;
120
+ });
121
+ if (candidates.length > 0) {
122
+ const last = candidates[candidates.length - 1];
123
+ return resolve(JSON.stringify({ answer: last.innerText.trim(), method: 'prose' }));
124
+ }
125
+
126
+ // Strategy 2: Look for the answer container by data attributes
127
+ // Perplexity uses [data-testid*="answer"] or [class*="answer-content"]
128
+ const answerContainer = document.querySelector('[data-testid*="answer"], [class*="answer-content"], [class*="response-content"]');
129
+ if (answerContainer && _looksLikeAnswerText(answerContainer.innerText?.trim())) {
130
+ return resolve(JSON.stringify({ answer: answerContainer.innerText.trim(), method: 'answer-container' }));
131
+ }
132
+
133
+ // Strategy 3: Find the largest text block in the main content area
134
+ // (not in nav/aside/sidebar), positioned after the input.
135
+ const input = document.querySelector('${S.input}');
136
+ if (!input) return resolve(null);
137
+ const inputRect = input.getBoundingClientRect();
138
+ const main = document.querySelector('main, [role="main"], [class*="main-content"]') || document.body;
139
+ const blocks = Array.from(main.querySelectorAll('div, article, section'))
140
+ .filter(d => {
141
+ const r = d.getBoundingClientRect();
142
+ if (r.top <= inputRect.bottom) return false; // not below input
143
+ if (r.width === 0 || r.height === 0) return false; // not visible
144
+ if (d.closest('nav, aside, [role="navigation"], [class*="sidebar"]')) return false; // not in nav
145
+ const text = d.innerText?.trim() || '';
146
+ return _looksLikeAnswerText(text) && d.children.length < 20;
147
+ })
148
+ .sort((a, b) => (b.innerText?.length || 0) - (a.innerText?.length || 0));
149
+ if (blocks.length > 0) {
150
+ return resolve(JSON.stringify({ answer: blocks[0].innerText.trim(), method: 'main-content' }));
151
+ }
152
+
153
+ // Retry if we haven't found anything yet
154
+ if (Date.now() < _deadline) {
155
+ setTimeout(_tryExtract, 400);
156
+ } else {
157
+ resolve(null);
158
+ }
159
+ } catch(e) { resolve(null); }
160
+ }
161
+ _tryExtract();
162
+ })`,
163
+ ],
164
+ 8000,
165
+ ).catch(() => null);
166
+
167
+ if (!domExtract || domExtract === "null") return null;
168
+
169
+ try {
170
+ const { answer, method } = JSON.parse(domExtract);
171
+ if (answer && _looksLikeAnswerText(answer)) {
172
+ env.fallbackUsed = `dom:${method}`;
173
+ env.clipboardEmpty = true;
174
+ // Try to extract sources from links near the answer
175
+ const sourcesExtract = await cdp(
176
+ [
177
+ "eval",
178
+ tab,
179
+ `(() => {
180
+ const links = Array.from(document.querySelectorAll('a[href^="https://"]'))
181
+ .filter(a => {
182
+ const href = a.href || '';
183
+ return !href.includes('perplexity.ai') && !href.includes('google.com') && !href.includes('gstatic');
184
+ })
185
+ .slice(0, 10)
186
+ .map(a => ({ title: a.innerText?.trim() || a.href, url: a.href }));
187
+ return JSON.stringify(links);
188
+ })()`,
189
+ ],
190
+ 3000,
191
+ ).catch(() => "[]");
192
+ let sources = [];
193
+ try {
194
+ sources = JSON.parse(sourcesExtract || "[]");
195
+ } catch {}
196
+ return { answer, sources };
197
+ }
198
+ } catch {}
199
+ return null;
200
+ }
201
+
202
+ // ============================================================================
203
+ // Extraction
204
+ // ============================================================================
205
+
206
+ async function extractAnswer(tab, env) {
207
+ const copyBtnExpr = findCopyButtonJsExpression();
208
+
209
+ await cdp(["eval", tab, `${copyBtnExpr}?.click()`]);
210
+ await new Promise((r) => setTimeout(r, 400));
211
+
212
+ let answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
213
+ env.clipboardEmpty = !answer;
214
+
215
+ // Retry once if clipboard is empty (Perplexity might be slow to write)
216
+ if (!answer) {
217
+ console.error("[perplexity] Clipboard empty, retrying in 2s...");
218
+ await cdp(["eval", tab, `${copyBtnExpr}?.click()`]);
219
+ await new Promise((r) => setTimeout(r, 2000));
220
+ answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
221
+ env.clipboardEmpty = !answer;
222
+ }
223
+
224
+ // Reject suspicious answers: the user's query echoed back, or a copy
225
+ // button click that landed on the question (not the answer) copy
226
+ // button. Both manifest as a clipboard write that contains the query
227
+ // text — the old path treated it as a valid answer and the synthesis
228
+ // would silently include a paraphrased-query result.
229
+ if (env.query && answer) {
230
+ const queryNorm = env.query.toLowerCase().trim();
231
+ const answerNorm = answer.toLowerCase().trim();
232
+ if (
233
+ answerNorm === queryNorm ||
234
+ answer.trim().length < Math.max(20, queryNorm.length * 0.5)
235
+ ) {
236
+ console.error(
237
+ `[perplexity] Clipboard contains query echo or stub (${answer.length} chars), retrying with longer wait...`,
238
+ );
239
+ env.clipboardEmpty = true;
240
+ answer = "";
241
+ }
242
+ }
243
+
244
+ // DOM fallback: when clipboard interception fails (intermittent in headless),
245
+ // read the answer from the page DOM instead of triggering visible recovery.
246
+ if (!answer) {
247
+ console.error("[perplexity] Clipboard empty — trying DOM fallback...");
248
+ const domResult = await extractAnswerFromDom(tab, env);
249
+ if (domResult) {
250
+ console.error(
251
+ `[perplexity] DOM fallback succeeded (${env.fallbackUsed})`,
252
+ );
253
+ return domResult;
254
+ }
255
+ throw new Error("Clipboard interceptor returned empty text");
256
+ }
257
+
258
+ const sources = parseSourcesFromMarkdown(answer);
259
+ return { answer: answer.trim(), sources };
260
+ }
261
+
262
+ // ============================================================================
263
+ // Main
264
+ // ============================================================================
265
+
266
+ const USAGE =
267
+ 'Usage: node extractors/perplexity.mjs "<query>" [--tab <prefix>]\n';
268
+
269
+ async function main() {
270
+ const args = await prepareArgs(process.argv.slice(2));
271
+ validateQuery(args, USAGE);
272
+
273
+ const { query, tabPrefix, short } = parseArgs(args);
274
+ const startTime = Date.now();
275
+ const mode =
276
+ process.env.GREEDY_SEARCH_VISIBLE === "1" ? "visible" : "headless";
277
+
278
+ const env = {
279
+ engine: "perplexity",
280
+ mode,
281
+ clipboardEmpty: null,
282
+ fallbackUsed: null,
283
+ blockedBy: null,
284
+ verificationResult: null,
285
+ inputReady: null,
286
+ // Carry the original query into extractAnswer so it can reject
287
+ // answers that look like query-echo (a copy button click on the
288
+ // question's icon instead of the answer's) without needing to
289
+ // thread query through every helper.
290
+ query,
291
+ };
292
+
293
+ try {
294
+ // Only refresh page list when creating a fresh tab (no prefix provided)
295
+ if (!tabPrefix) await cdp(["list"]);
296
+
297
+ const tab = await getOrOpenTab(tabPrefix);
298
+
299
+ // Skip navigation if already on Perplexity domain (tab was seeded by search.mjs)
300
+ const currentUrl = await cdp(["eval", tab, "document.location.href"]).catch(
301
+ () => "",
302
+ );
303
+ let onPerplexity = false;
304
+ try {
305
+ const host = new URL(currentUrl).hostname.toLowerCase();
306
+ onPerplexity =
307
+ host === "perplexity.ai" || host.endsWith(".perplexity.ai");
308
+ } catch {}
309
+
310
+ if (!onPerplexity) {
311
+ await cdp(["nav", tab, "https://www.perplexity.ai/"], 20000);
312
+ // Wait for the React app to hydrate and make the input visible.
313
+ // In all-mode under CDP contention, the input element exists but
314
+ // its first 5 parent DIVs have visibility:hidden — focus()
315
+ // silently fails. Force the parents to visibility:visible, then
316
+ // poll up to 15s for the input to be focusable.
317
+ const _inputReady = await cdp(
318
+ [
319
+ "eval",
320
+ tab,
321
+ `new Promise((resolve) => {
322
+ const _deadline = Date.now() + 15000;
323
+ function _check() {
324
+ const input = document.querySelector('${S.input}');
325
+ if (input) {
326
+ // Force visibility on all parents up to body —
327
+ // Perplexity hides the first 5 wrapper DIVs until
328
+ // the user interacts with the page
329
+ let el = input;
330
+ while (el && el !== document.body) {
331
+ if (window.getComputedStyle(el).visibility === 'hidden') {
332
+ el.style.visibility = 'visible';
333
+ }
334
+ el = el.parentElement;
335
+ }
336
+ input.focus();
337
+ if (document.activeElement === input) return resolve('ready');
338
+ }
339
+ if (Date.now() < _deadline) setTimeout(_check, 500);
340
+ else resolve('timeout');
341
+ }
342
+ _check();
343
+ })`,
344
+ ],
345
+ 18000,
346
+ ).catch(() => "timeout");
347
+ if (_inputReady !== "ready") {
348
+ // Retry navigation up to 2 more times — the first nav may have
349
+ // been preempted by CDP contention in all-mode
350
+ for (let retry = 0; retry < 2; retry++) {
351
+ await cdp(["nav", tab, "https://www.perplexity.ai/"], 20000);
352
+ await new Promise((r) => setTimeout(r, 2000));
353
+ const _retryReady = await cdp(
354
+ [
355
+ "eval",
356
+ tab,
357
+ `(() => {
358
+ const input = document.querySelector('${S.input}');
359
+ if (!input) return false;
360
+ let el = input;
361
+ while (el && el !== document.body) {
362
+ if (window.getComputedStyle(el).visibility === 'hidden') {
363
+ el.style.visibility = 'visible';
364
+ }
365
+ el = el.parentElement;
366
+ }
367
+ input.focus();
368
+ return document.activeElement === input;
369
+ })()`,
370
+ ],
371
+ 5000,
372
+ ).catch(() => false);
373
+ if (_retryReady === "true") break;
374
+ }
375
+ } else {
376
+ await new Promise((r) => setTimeout(r, 600));
377
+ }
378
+ }
379
+ // Handle verification challenges (Cloudflare Turnstile, etc.)
380
+ const verifyResult = await handleVerification(tab, cdp, 10000);
381
+ env.verificationResult = verifyResult;
382
+ if (verifyResult === "needs-human") {
383
+ throw new Error(
384
+ "Perplexity verification required — please solve it manually in the browser window",
385
+ );
386
+ }
387
+ await dismissConsent(tab, cdp);
388
+
389
+ // After verification, page may have redirected — wait for it to settle
390
+ // then re-navigate to homepage if we ended up somewhere else.
391
+ if (verifyResult === "clicked") {
392
+ await new Promise((r) => setTimeout(r, TIMING.afterVerify));
393
+ const postVerifyUrl = await cdp([
394
+ "eval",
395
+ tab,
396
+ "document.location.href",
397
+ ]).catch(() => "");
398
+ let onPerplexityAfter = false;
399
+ try {
400
+ const host = new URL(postVerifyUrl).hostname.toLowerCase();
401
+ onPerplexityAfter =
402
+ host === "perplexity.ai" || host.endsWith(".perplexity.ai");
403
+ } catch {}
404
+ if (!onPerplexityAfter) {
405
+ await cdp(["nav", tab, "https://www.perplexity.ai/"], 20000);
406
+ await new Promise((r) => setTimeout(r, 800));
407
+ await dismissConsent(tab, cdp);
408
+ }
409
+ }
410
+
411
+ // Wait for React app to mount input (up to 15s — gives CF redirect + hydration time)
412
+ // Note: we no longer fast-fail on Cloudflare detection here because the
413
+ // new CDP-pierce + browser-level-click path in handleVerification can
414
+ // auto-clear the Turnstile checkbox from a fresh headless session. The
415
+ // downstream handleVerification() call will either click through or
416
+ // surface needs-human; let it run.
417
+ const inputReady = await waitForSelector(tab, S.input, 15000, 400);
418
+ env.inputReady = inputReady;
419
+
420
+ if (!inputReady) {
421
+ throw new Error(
422
+ "Perplexity input not found — page may not have loaded or is in unexpected state",
423
+ );
424
+ }
425
+
426
+ await new Promise((r) => setTimeout(r, jitter(300)));
427
+
428
+ await injectClipboardInterceptor(tab, GLOBAL_VAR);
429
+ await cdp(["click", tab, S.input]);
430
+ await new Promise((r) => setTimeout(r, jitter(400)));
431
+
432
+ // Type via execCommand + focus. This triggers React's onChange
433
+ // (via the synthetic input event) in a way that Input.insertText
434
+ // cannot — Input.insertText sends raw text but doesn't dispatch
435
+ // the events that React's controlled-input system listens for.
436
+ // Causes the query to not register in all-mode under CDP contention.
437
+ // Retry up to 3 times — execCommand can fail if the input isn't
438
+ // fully focused yet (common under CDP contention in all-mode).
439
+ let typeResult;
440
+ for (let attempt = 0; attempt < 3; attempt++) {
441
+ typeResult = await cdp(
442
+ [
443
+ "eval",
444
+ tab,
445
+ `(() => {
446
+ try {
447
+ const input = document.querySelector('${S.input}');
448
+ if (!input) return 'no-input';
449
+ input.focus();
450
+ if (document.activeElement !== input) {
451
+ const activeTag = document.activeElement?.tagName || 'none';
452
+ const activeClass = (document.activeElement?.className || '').slice(0, 80);
453
+ return 'not-focused:active=' + activeTag + '.' + activeClass;
454
+ }
455
+ // execCommand('insertText') dispatches the proper input
456
+ // event that React's onChange listens for
457
+ const ok = document.execCommand('insertText', false, ${JSON.stringify(query)});
458
+ return ok ? 'ok' : 'exec-failed';
459
+ } catch (e) { return 'err:' + e.message; }
460
+ })()`,
461
+ ],
462
+ 5000,
463
+ );
464
+ if (typeResult === "ok") break;
465
+ // On not-focused, try clicking the input first to force focus
466
+ if (String(typeResult).startsWith("not-focused")) {
467
+ await cdp(["click", tab, S.input]).catch(() => {});
468
+ }
469
+ await new Promise((r) => setTimeout(r, 800));
470
+ }
471
+ if (typeResult !== "ok") {
472
+ throw new Error(`Perplexity type failed: ${typeResult}`);
473
+ }
474
+ await new Promise((r) => setTimeout(r, jitter(400)));
475
+
476
+ // Submit with Enter — use a real KeyboardEvent on the input so React's
477
+ // keydown handler fires. keyCode:13 is needed for compatibility.
478
+ await cdp([
479
+ "eval",
480
+ tab,
481
+ `(() => {
482
+ const input = document.querySelector('${S.input}');
483
+ if (!input) return 'no-input';
484
+ input.focus();
485
+ const ev = new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', keyCode: 13, which: 13, bubbles: true, cancelable: true });
486
+ input.dispatchEvent(ev);
487
+ return 'ok';
488
+ })()`,
489
+ ]);
490
+
491
+ await waitForStreamComplete(tab, {
492
+ timeout: 20000,
493
+ interval: 600,
494
+ stableRounds: 5,
495
+ minLength: 50,
496
+ selector: "document.body",
497
+ });
498
+
499
+ // Detect Perplexity's free-search-limit wall. Shown as a [dialog]
500
+ // in the accessibility tree after hitting the rate limit. The wall
501
+ // text is localized (Greek, English, etc.) so we detect the
502
+ // structural [dialog] marker combined with the Upgrade button
503
+ // (αναβάθμιση/upgrade/Pro). Visible-mode cookies can't bypass
504
+ // this — it's account-level, not session-level.
505
+ if (process.env.GREEDY_SEARCH_HEADLESS === "1") {
506
+ const postSnap = await cdp(["snap", tab]).catch(() => "");
507
+ // [dialog] + upgrade-related button + no answer prose = rate-limit wall
508
+ if (
509
+ /\[dialog\]/i.test(postSnap) &&
510
+ /Pro|αναβάθμιση|upgrade/i.test(postSnap) &&
511
+ !/\.prose|\[article\]/i.test(postSnap)
512
+ ) {
513
+ console.error(
514
+ "[perplexity] Rate Limited — skipping (visible retry won't help)",
515
+ );
516
+ env.blockedBy = "rate-limit";
517
+ throw new Error(
518
+ "Rate Limited — Perplexity free search limit reached. Wait a few hours.",
519
+ );
520
+ }
521
+ }
522
+
523
+ const { answer, sources } = await extractAnswer(tab, env);
524
+
525
+ if (!answer)
526
+ throw new Error(
527
+ "No answer extracted — Perplexity may not have responded",
528
+ );
529
+
530
+ const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
531
+ () => "",
532
+ );
533
+ env.durationMs = Date.now() - startTime;
534
+ outputJson({
535
+ query,
536
+ url: finalUrl,
537
+ answer: formatAnswer(answer, short),
538
+ sources,
539
+ _envelope: buildEnvelope(env),
540
+ });
541
+ } catch (e) {
542
+ env.durationMs = Date.now() - startTime;
543
+ handleError(e, buildEnvelope(env));
544
+ }
545
+ }
546
+
547
+ main();