@apmantza/greedysearch-pi 1.8.2 → 1.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/README.md +10 -1
- package/bin/launch.mjs +366 -366
- package/bin/search.mjs +388 -388
- package/extractors/common.mjs +291 -291
- package/extractors/gemini.mjs +146 -146
- package/extractors/google-ai.mjs +125 -125
- package/extractors/perplexity.mjs +147 -145
- package/extractors/selectors.mjs +54 -54
- package/index.ts +256 -278
- package/package.json +1 -1
- package/src/github.mjs +237 -237
- package/src/reddit.mjs +210 -0
- package/src/search/chrome.mjs +222 -222
- package/src/search/constants.mjs +37 -37
- package/src/search/defaults.mjs +14 -14
- package/src/search/engines.mjs +62 -62
- package/src/search/fetch-source.mjs +35 -3
- package/src/search/output.mjs +58 -58
- package/src/search/sources.mjs +445 -445
- package/src/search/synthesis-runner.mjs +63 -63
- package/src/search/synthesis.mjs +223 -223
- package/src/tools/deep-research-handler.ts +36 -36
- package/src/tools/greedy-search-handler.ts +53 -57
- package/src/tools/shared.ts +135 -130
- package/src/types.ts +103 -103
- package/test.mjs +423 -377
|
@@ -1,145 +1,147 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
// extractors/perplexity.mjs
|
|
4
|
-
// Navigate Perplexity, wait for streaming to complete, return clean answer + sources.
|
|
5
|
-
//
|
|
6
|
-
// Usage:
|
|
7
|
-
// node extractors/perplexity.mjs "<query>" [--tab <prefix>]
|
|
8
|
-
//
|
|
9
|
-
// Output (stdout): JSON { answer, sources, query, url }
|
|
10
|
-
// Errors go to stderr only — stdout is always clean JSON for piping.
|
|
11
|
-
//
|
|
12
|
-
// TODO: Refactor - this file has 42 lines duplicated with google-ai.mjs (line 28)
|
|
13
|
-
|
|
14
|
-
import {
|
|
15
|
-
cdp,
|
|
16
|
-
formatAnswer,
|
|
17
|
-
getOrOpenTab,
|
|
18
|
-
handleError,
|
|
19
|
-
injectClipboardInterceptor,
|
|
20
|
-
outputJson,
|
|
21
|
-
parseArgs,
|
|
22
|
-
parseSourcesFromMarkdown,
|
|
23
|
-
validateQuery,
|
|
24
|
-
waitForStreamComplete,
|
|
25
|
-
} from "./common.mjs";
|
|
26
|
-
import { dismissConsent } from "./consent.mjs";
|
|
27
|
-
import { SELECTORS } from "./selectors.mjs";
|
|
28
|
-
|
|
29
|
-
const S = SELECTORS.perplexity;
|
|
30
|
-
const GLOBAL_VAR = "__pplxClipboard";
|
|
31
|
-
|
|
32
|
-
// ============================================================================
|
|
33
|
-
// Language-agnostic copy button finder
|
|
34
|
-
// ============================================================================
|
|
35
|
-
|
|
36
|
-
function findCopyButtonJsExpression() {
|
|
37
|
-
// Perplexity uses SVG icons via <use xlink:href="#pplx-icon-copy">
|
|
38
|
-
// This works across all locales since it doesn't depend on aria-label text
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
// ============================================================================
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
// ============================================================================
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
await
|
|
105
|
-
|
|
106
|
-
await
|
|
107
|
-
await cdp(["
|
|
108
|
-
await new Promise((r) => setTimeout(r, 400));
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// extractors/perplexity.mjs
|
|
4
|
+
// Navigate Perplexity, wait for streaming to complete, return clean answer + sources.
|
|
5
|
+
//
|
|
6
|
+
// Usage:
|
|
7
|
+
// node extractors/perplexity.mjs "<query>" [--tab <prefix>]
|
|
8
|
+
//
|
|
9
|
+
// Output (stdout): JSON { answer, sources, query, url }
|
|
10
|
+
// Errors go to stderr only — stdout is always clean JSON for piping.
|
|
11
|
+
//
|
|
12
|
+
// TODO: Refactor - this file has 42 lines duplicated with google-ai.mjs (line 28)
|
|
13
|
+
|
|
14
|
+
import {
|
|
15
|
+
cdp,
|
|
16
|
+
formatAnswer,
|
|
17
|
+
getOrOpenTab,
|
|
18
|
+
handleError,
|
|
19
|
+
injectClipboardInterceptor,
|
|
20
|
+
outputJson,
|
|
21
|
+
parseArgs,
|
|
22
|
+
parseSourcesFromMarkdown,
|
|
23
|
+
validateQuery,
|
|
24
|
+
waitForStreamComplete,
|
|
25
|
+
} from "./common.mjs";
|
|
26
|
+
import { dismissConsent } from "./consent.mjs";
|
|
27
|
+
import { SELECTORS } from "./selectors.mjs";
|
|
28
|
+
|
|
29
|
+
const S = SELECTORS.perplexity;
|
|
30
|
+
const GLOBAL_VAR = "__pplxClipboard";
|
|
31
|
+
|
|
32
|
+
// ============================================================================
|
|
33
|
+
// Language-agnostic copy button finder
|
|
34
|
+
// ============================================================================
|
|
35
|
+
|
|
36
|
+
function findCopyButtonJsExpression() {
|
|
37
|
+
// Perplexity uses SVG icons via <use xlink:href="#pplx-icon-copy">
|
|
38
|
+
// This works across all locales since it doesn't depend on aria-label text
|
|
39
|
+
// Use .pop() to get the last matching button (the answer copy button),
|
|
40
|
+
// not the first one which is the question copy button
|
|
41
|
+
return `Array.from(document.querySelectorAll('button')).filter(b => b.innerHTML.includes('#pplx-icon-copy')).pop()`;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ============================================================================
|
|
45
|
+
// Extraction
|
|
46
|
+
// ============================================================================
|
|
47
|
+
|
|
48
|
+
async function extractAnswer(tab) {
|
|
49
|
+
const copyBtnExpr = findCopyButtonJsExpression();
|
|
50
|
+
|
|
51
|
+
await cdp(["eval", tab, `${copyBtnExpr}?.click()`]);
|
|
52
|
+
await new Promise((r) => setTimeout(r, 400));
|
|
53
|
+
|
|
54
|
+
let answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
|
|
55
|
+
|
|
56
|
+
// Retry once if clipboard is empty (Perplexity might be slow to write)
|
|
57
|
+
if (!answer) {
|
|
58
|
+
console.error("[perplexity] Clipboard empty, retrying in 2s...");
|
|
59
|
+
await cdp(["eval", tab, `${copyBtnExpr}?.click()`]);
|
|
60
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
61
|
+
answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (!answer) throw new Error("Clipboard interceptor returned empty text");
|
|
65
|
+
|
|
66
|
+
const sources = parseSourcesFromMarkdown(answer);
|
|
67
|
+
return { answer: answer.trim(), sources };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// ============================================================================
|
|
71
|
+
// Main
|
|
72
|
+
// ============================================================================
|
|
73
|
+
|
|
74
|
+
const USAGE =
|
|
75
|
+
'Usage: node extractors/perplexity.mjs "<query>" [--tab <prefix>]\n';
|
|
76
|
+
|
|
77
|
+
async function main() {
|
|
78
|
+
const args = process.argv.slice(2);
|
|
79
|
+
validateQuery(args, USAGE);
|
|
80
|
+
|
|
81
|
+
const { query, tabPrefix, short } = parseArgs(args);
|
|
82
|
+
|
|
83
|
+
try {
|
|
84
|
+
// Refresh page list so cache is current
|
|
85
|
+
await cdp(["list"]);
|
|
86
|
+
|
|
87
|
+
const tab = await getOrOpenTab(tabPrefix);
|
|
88
|
+
|
|
89
|
+
// Navigate to homepage and use the search box (direct ?q= URLs trigger bot redirect)
|
|
90
|
+
await cdp(["nav", tab, "https://www.perplexity.ai/"], 35000);
|
|
91
|
+
await dismissConsent(tab, cdp);
|
|
92
|
+
|
|
93
|
+
// Wait for React app to mount input (up to 8s)
|
|
94
|
+
const deadline = Date.now() + 8000;
|
|
95
|
+
while (Date.now() < deadline) {
|
|
96
|
+
const found = await cdp([
|
|
97
|
+
"eval",
|
|
98
|
+
tab,
|
|
99
|
+
`!!document.querySelector('${S.input}')`,
|
|
100
|
+
]).catch(() => "false");
|
|
101
|
+
if (found === "true") break;
|
|
102
|
+
await new Promise((r) => setTimeout(r, 400));
|
|
103
|
+
}
|
|
104
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
105
|
+
|
|
106
|
+
await injectClipboardInterceptor(tab, GLOBAL_VAR);
|
|
107
|
+
await cdp(["click", tab, S.input]);
|
|
108
|
+
await new Promise((r) => setTimeout(r, 400));
|
|
109
|
+
await cdp(["type", tab, query]);
|
|
110
|
+
await new Promise((r) => setTimeout(r, 400));
|
|
111
|
+
|
|
112
|
+
// Submit with Enter (most reliable across Chrome instances)
|
|
113
|
+
await cdp([
|
|
114
|
+
"eval",
|
|
115
|
+
tab,
|
|
116
|
+
`document.querySelector('${S.input}')?.dispatchEvent(new KeyboardEvent('keydown',{key:'Enter',bubbles:true,keyCode:13})), 'ok'`,
|
|
117
|
+
]);
|
|
118
|
+
|
|
119
|
+
await waitForStreamComplete(tab, {
|
|
120
|
+
timeout: 30000,
|
|
121
|
+
interval: 600,
|
|
122
|
+
stableRounds: 3,
|
|
123
|
+
selector: "document.body",
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
const { answer, sources } = await extractAnswer(tab);
|
|
127
|
+
|
|
128
|
+
if (!answer)
|
|
129
|
+
throw new Error(
|
|
130
|
+
"No answer extracted — Perplexity may not have responded",
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
|
|
134
|
+
() => "",
|
|
135
|
+
);
|
|
136
|
+
outputJson({
|
|
137
|
+
query,
|
|
138
|
+
url: finalUrl,
|
|
139
|
+
answer: formatAnswer(answer, short),
|
|
140
|
+
sources,
|
|
141
|
+
});
|
|
142
|
+
} catch (e) {
|
|
143
|
+
handleError(e);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
main();
|
package/extractors/selectors.mjs
CHANGED
|
@@ -1,54 +1,54 @@
|
|
|
1
|
-
// extractors/selectors.mjs
|
|
2
|
-
// Centralized CSS selectors for all engines.
|
|
3
|
-
// Update selectors here when a site changes its UI.
|
|
4
|
-
|
|
5
|
-
export const SELECTORS = {
|
|
6
|
-
// ──────────────────────────────────────────────
|
|
7
|
-
// Perplexity (perplexity.ai)
|
|
8
|
-
// ──────────────────────────────────────────────
|
|
9
|
-
perplexity: {
|
|
10
|
-
input: "#ask-input",
|
|
11
|
-
// Note: copy button found via JS in extractor (language-agnostic)
|
|
12
|
-
copyButton: null,
|
|
13
|
-
sourceItem: "[data-pplx-citation-url]",
|
|
14
|
-
sourceLink: "a",
|
|
15
|
-
consent: "#onetrust-accept-btn-handler",
|
|
16
|
-
},
|
|
17
|
-
|
|
18
|
-
// ──────────────────────────────────────────────
|
|
19
|
-
// Bing Copilot (copilot.microsoft.com)
|
|
20
|
-
// ──────────────────────────────────────────────
|
|
21
|
-
bing: {
|
|
22
|
-
input: "#userInput",
|
|
23
|
-
copyButton: 'button[data-testid="copy-ai-message-button"]',
|
|
24
|
-
sourceLink: 'a[href^="http"][target="_blank"]',
|
|
25
|
-
sourceExclude: "copilot.microsoft.com",
|
|
26
|
-
consent: "#onetrust-accept-btn-handler",
|
|
27
|
-
},
|
|
28
|
-
|
|
29
|
-
// ──────────────────────────────────────────────
|
|
30
|
-
// Google AI Mode (google.com/search?udm=50)
|
|
31
|
-
// ──────────────────────────────────────────────
|
|
32
|
-
google: {
|
|
33
|
-
answerContainer: ".pWvJNd",
|
|
34
|
-
sourceLink: 'a[href^="http"]',
|
|
35
|
-
sourceExclude: ["google.", "gstatic", "googleapis"],
|
|
36
|
-
sourceHeadingParent: "[data-snhf]",
|
|
37
|
-
consent: '#L2AGLb, button[jsname="b3VHJd"], .tHlp8d',
|
|
38
|
-
},
|
|
39
|
-
|
|
40
|
-
// ──────────────────────────────────────────────
|
|
41
|
-
// Gemini (gemini.google.com/app)
|
|
42
|
-
// ──────────────────────────────────────────────
|
|
43
|
-
gemini: {
|
|
44
|
-
input: "rich-textarea .ql-editor",
|
|
45
|
-
// Language-agnostic: use Material icon data attributes (work across locales)
|
|
46
|
-
copyButton: 'button:has(mat-icon[data-mat-icon-name="content_copy"])',
|
|
47
|
-
sendButton: 'button:has(mat-icon[data-mat-icon-name="send"]), .send-button',
|
|
48
|
-
sourcesSidebarButton: "button.legacy-sources-sidebar-button",
|
|
49
|
-
sourcesExclude: ["gemini.google", "gstatic", "google.com/search"],
|
|
50
|
-
citationButtonPattern: 'button[aria-label*="citation from"]',
|
|
51
|
-
// For parsing citation aria-labels: "View source details for citation from {name}. Opens side panel."
|
|
52
|
-
citationNameRegex: /from\s+(.+?)\.\s/,
|
|
53
|
-
},
|
|
54
|
-
};
|
|
1
|
+
// extractors/selectors.mjs
|
|
2
|
+
// Centralized CSS selectors for all engines.
|
|
3
|
+
// Update selectors here when a site changes its UI.
|
|
4
|
+
|
|
5
|
+
export const SELECTORS = {
|
|
6
|
+
// ──────────────────────────────────────────────
|
|
7
|
+
// Perplexity (perplexity.ai)
|
|
8
|
+
// ──────────────────────────────────────────────
|
|
9
|
+
perplexity: {
|
|
10
|
+
input: "#ask-input",
|
|
11
|
+
// Note: copy button found via JS in extractor (language-agnostic)
|
|
12
|
+
copyButton: null,
|
|
13
|
+
sourceItem: "[data-pplx-citation-url]",
|
|
14
|
+
sourceLink: "a",
|
|
15
|
+
consent: "#onetrust-accept-btn-handler",
|
|
16
|
+
},
|
|
17
|
+
|
|
18
|
+
// ──────────────────────────────────────────────
|
|
19
|
+
// Bing Copilot (copilot.microsoft.com)
|
|
20
|
+
// ──────────────────────────────────────────────
|
|
21
|
+
bing: {
|
|
22
|
+
input: "#userInput",
|
|
23
|
+
copyButton: 'button[data-testid="copy-ai-message-button"]',
|
|
24
|
+
sourceLink: 'a[href^="http"][target="_blank"]',
|
|
25
|
+
sourceExclude: "copilot.microsoft.com",
|
|
26
|
+
consent: "#onetrust-accept-btn-handler",
|
|
27
|
+
},
|
|
28
|
+
|
|
29
|
+
// ──────────────────────────────────────────────
|
|
30
|
+
// Google AI Mode (google.com/search?udm=50)
|
|
31
|
+
// ──────────────────────────────────────────────
|
|
32
|
+
google: {
|
|
33
|
+
answerContainer: ".pWvJNd",
|
|
34
|
+
sourceLink: 'a[href^="http"]',
|
|
35
|
+
sourceExclude: ["google.", "gstatic", "googleapis"],
|
|
36
|
+
sourceHeadingParent: "[data-snhf]",
|
|
37
|
+
consent: '#L2AGLb, button[jsname="b3VHJd"], .tHlp8d',
|
|
38
|
+
},
|
|
39
|
+
|
|
40
|
+
// ──────────────────────────────────────────────
|
|
41
|
+
// Gemini (gemini.google.com/app)
|
|
42
|
+
// ──────────────────────────────────────────────
|
|
43
|
+
gemini: {
|
|
44
|
+
input: "rich-textarea .ql-editor",
|
|
45
|
+
// Language-agnostic: use Material icon data attributes (work across locales)
|
|
46
|
+
copyButton: 'button:has(mat-icon[data-mat-icon-name="content_copy"])',
|
|
47
|
+
sendButton: 'button:has(mat-icon[data-mat-icon-name="send"]), .send-button',
|
|
48
|
+
sourcesSidebarButton: "button.legacy-sources-sidebar-button",
|
|
49
|
+
sourcesExclude: ["gemini.google", "gstatic", "google.com/search"],
|
|
50
|
+
citationButtonPattern: 'button[aria-label*="citation from"]',
|
|
51
|
+
// For parsing citation aria-labels: "View source details for citation from {name}. Opens side panel."
|
|
52
|
+
citationNameRegex: /from\s+(.+?)\.\s/,
|
|
53
|
+
},
|
|
54
|
+
};
|