@apmantza/greedysearch-pi 1.8.9 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +503 -446
- package/bin/cdp.mjs +15 -2
- package/bin/search.mjs +679 -668
- package/extractors/bing-copilot.mjs +68 -11
- package/extractors/common.mjs +37 -2
- package/extractors/consent.mjs +388 -294
- package/extractors/gemini.mjs +217 -150
- package/extractors/perplexity.mjs +56 -7
- package/package.json +1 -1
- package/src/search/chrome.mjs +62 -1
- package/src/search/constants.mjs +1 -6
- package/src/search/engines.mjs +76 -67
- package/src/search/file-sources.mjs +46 -0
- package/src/search/query.mjs +49 -0
- package/src/search/recovery.mjs +20 -1
- package/src/search/sources.mjs +37 -21
- package/src/search/synthesis.mjs +27 -16
- package/extractors/bing-aria.mjs +0 -539
- package/extractors/google-search.mjs +0 -234
package/extractors/gemini.mjs
CHANGED
|
@@ -1,150 +1,217 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
// extractors/gemini.mjs
|
|
4
|
-
// Navigate gemini.google.com/app, submit query, wait for answer, return clean answer + sources.
|
|
5
|
-
//
|
|
6
|
-
// Usage:
|
|
7
|
-
// node extractors/gemini.mjs "<query>" [--tab <prefix>]
|
|
8
|
-
//
|
|
9
|
-
// Output (stdout): JSON { answer, sources, query, url }
|
|
10
|
-
// Errors go to stderr only — stdout is always clean JSON for piping.
|
|
11
|
-
|
|
12
|
-
import {
|
|
13
|
-
cdp,
|
|
14
|
-
formatAnswer,
|
|
15
|
-
getOrOpenTab,
|
|
16
|
-
handleError,
|
|
17
|
-
injectClipboardInterceptor,
|
|
18
|
-
jitter,
|
|
19
|
-
outputJson,
|
|
20
|
-
parseArgs,
|
|
21
|
-
parseSourcesFromMarkdown,
|
|
22
|
-
prepareArgs,
|
|
23
|
-
TIMING,
|
|
24
|
-
validateQuery,
|
|
25
|
-
waitForSelector,
|
|
26
|
-
waitForStreamComplete,
|
|
27
|
-
} from "./common.mjs";
|
|
28
|
-
import { dismissConsent, handleVerification } from "./consent.mjs";
|
|
29
|
-
import { SELECTORS } from "./selectors.mjs";
|
|
30
|
-
|
|
31
|
-
const S = SELECTORS.gemini;
|
|
32
|
-
const GLOBAL_VAR = "__geminiClipboard";
|
|
33
|
-
|
|
34
|
-
// ============================================================================
|
|
35
|
-
// Gemini-specific helpers
|
|
36
|
-
// ============================================================================
|
|
37
|
-
|
|
38
|
-
async function typeIntoGemini(tab, text) {
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
async function
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// extractors/gemini.mjs
|
|
4
|
+
// Navigate gemini.google.com/app, submit query, wait for answer, return clean answer + sources.
|
|
5
|
+
//
|
|
6
|
+
// Usage:
|
|
7
|
+
// node extractors/gemini.mjs "<query>" [--tab <prefix>]
|
|
8
|
+
//
|
|
9
|
+
// Output (stdout): JSON { answer, sources, query, url }
|
|
10
|
+
// Errors go to stderr only — stdout is always clean JSON for piping.
|
|
11
|
+
|
|
12
|
+
import {
|
|
13
|
+
cdp,
|
|
14
|
+
formatAnswer,
|
|
15
|
+
getOrOpenTab,
|
|
16
|
+
handleError,
|
|
17
|
+
injectClipboardInterceptor,
|
|
18
|
+
jitter,
|
|
19
|
+
outputJson,
|
|
20
|
+
parseArgs,
|
|
21
|
+
parseSourcesFromMarkdown,
|
|
22
|
+
prepareArgs,
|
|
23
|
+
TIMING,
|
|
24
|
+
validateQuery,
|
|
25
|
+
waitForSelector,
|
|
26
|
+
waitForStreamComplete,
|
|
27
|
+
} from "./common.mjs";
|
|
28
|
+
import { dismissConsent, handleVerification } from "./consent.mjs";
|
|
29
|
+
import { SELECTORS } from "./selectors.mjs";
|
|
30
|
+
|
|
31
|
+
const S = SELECTORS.gemini;
|
|
32
|
+
const GLOBAL_VAR = "__geminiClipboard";
|
|
33
|
+
|
|
34
|
+
// ============================================================================
|
|
35
|
+
// Gemini-specific helpers
|
|
36
|
+
// ============================================================================
|
|
37
|
+
|
|
38
|
+
async function typeIntoGemini(tab, text) {
|
|
39
|
+
// 1. Focus the input area via click (more reliable than eval focus for shadow-DOM editors)
|
|
40
|
+
await cdp(["click", tab, S.input]);
|
|
41
|
+
await new Promise((r) => setTimeout(r, jitter(200)));
|
|
42
|
+
|
|
43
|
+
// 2. Type using CDP Input.insertText (more reliable than document.execCommand)
|
|
44
|
+
await cdp(["type", tab, text]);
|
|
45
|
+
await new Promise((r) => setTimeout(r, jitter(300)));
|
|
46
|
+
|
|
47
|
+
// 3. Verify the text was actually inserted
|
|
48
|
+
const inserted = await cdp([
|
|
49
|
+
"eval",
|
|
50
|
+
tab,
|
|
51
|
+
`(function() {
|
|
52
|
+
var el = document.querySelector('${S.input}');
|
|
53
|
+
if (!el) return false;
|
|
54
|
+
var content = el.innerText || el.textContent || '';
|
|
55
|
+
return content.trim().length >= ${Math.floor(text.length * 0.8)};
|
|
56
|
+
})()`,
|
|
57
|
+
]);
|
|
58
|
+
if (inserted !== "true") {
|
|
59
|
+
throw new Error(
|
|
60
|
+
"Gemini input field did not accept text — input verification failed",
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async function scrollToBottom(tab) {
|
|
66
|
+
await cdp([
|
|
67
|
+
"eval",
|
|
68
|
+
tab,
|
|
69
|
+
`(function() {
|
|
70
|
+
const chat = document.querySelector('chat-window, [role="main"], main') || document.body;
|
|
71
|
+
chat.scrollTo ? chat.scrollTo({ top: chat.scrollHeight, behavior: 'smooth' }) : window.scrollTo(0, document.body.scrollHeight);
|
|
72
|
+
})()`,
|
|
73
|
+
]);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async function extractAnswer(tab, query = "") {
|
|
77
|
+
const queryNorm = query.toLowerCase().trim();
|
|
78
|
+
|
|
79
|
+
// Wait for the assistant response copy button to appear.
|
|
80
|
+
// A fresh conversation has 1 copy button (user message); after the
|
|
81
|
+
// assistant responds there are 2+. This prevents clicking the user's
|
|
82
|
+
// copy button before React hydrates the assistant's.
|
|
83
|
+
let copyReady = false;
|
|
84
|
+
const copyDeadline = Date.now() + 12000;
|
|
85
|
+
while (Date.now() < copyDeadline) {
|
|
86
|
+
const count = await cdp([
|
|
87
|
+
"eval",
|
|
88
|
+
tab,
|
|
89
|
+
`document.querySelectorAll('${S.copyButton}').length`,
|
|
90
|
+
]);
|
|
91
|
+
if (parseInt(count, 10) >= 2) {
|
|
92
|
+
copyReady = true;
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
await new Promise((r) => setTimeout(r, 800));
|
|
96
|
+
}
|
|
97
|
+
if (!copyReady) {
|
|
98
|
+
console.error("[gemini] Warning: assistant copy button did not appear");
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Click the LAST copy button (assistant's response at the bottom)
|
|
102
|
+
await cdp([
|
|
103
|
+
"eval",
|
|
104
|
+
tab,
|
|
105
|
+
`(() => {
|
|
106
|
+
const buttons = document.querySelectorAll('${S.copyButton}');
|
|
107
|
+
buttons[buttons.length - 1]?.click();
|
|
108
|
+
})()`,
|
|
109
|
+
]);
|
|
110
|
+
await new Promise((r) => setTimeout(r, 600));
|
|
111
|
+
|
|
112
|
+
let answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
|
|
113
|
+
|
|
114
|
+
// Retry once if clipboard contains the user's query instead of the response.
|
|
115
|
+
// This can happen when the assistant response hasn't rendered its copy button yet.
|
|
116
|
+
if (
|
|
117
|
+
answer &&
|
|
118
|
+
queryNorm &&
|
|
119
|
+
(answer.toLowerCase().trim() === queryNorm ||
|
|
120
|
+
answer.trim().length < queryNorm.length)
|
|
121
|
+
) {
|
|
122
|
+
console.error("[gemini] Clipboard echoed query, retrying in 2s...");
|
|
123
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
124
|
+
await cdp([
|
|
125
|
+
"eval",
|
|
126
|
+
tab,
|
|
127
|
+
`(() => {
|
|
128
|
+
const buttons = document.querySelectorAll('${S.copyButton}');
|
|
129
|
+
buttons[buttons.length - 1]?.click();
|
|
130
|
+
})()`,
|
|
131
|
+
]);
|
|
132
|
+
await new Promise((r) => setTimeout(r, 600));
|
|
133
|
+
answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (!answer) throw new Error("Clipboard interceptor returned empty text");
|
|
137
|
+
|
|
138
|
+
const sources = parseSourcesFromMarkdown(answer);
|
|
139
|
+
return { answer: answer.trim(), sources };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ============================================================================
|
|
143
|
+
// Main
|
|
144
|
+
// ============================================================================
|
|
145
|
+
|
|
146
|
+
const USAGE = 'Usage: node extractors/gemini.mjs "<query>" [--tab <prefix>]\n';
|
|
147
|
+
|
|
148
|
+
async function main() {
|
|
149
|
+
const args = await prepareArgs(process.argv.slice(2));
|
|
150
|
+
validateQuery(args, USAGE);
|
|
151
|
+
|
|
152
|
+
const { query, tabPrefix, short } = parseArgs(args);
|
|
153
|
+
|
|
154
|
+
try {
|
|
155
|
+
await cdp(["list"]);
|
|
156
|
+
const tab = await getOrOpenTab(tabPrefix);
|
|
157
|
+
|
|
158
|
+
// Skip navigation if tab was pre-seeded to Gemini (e.g. by search.mjs
|
|
159
|
+
// opening the tab in parallel with source fetch to save ~4s nav time).
|
|
160
|
+
const currentUrl = await cdp(["eval", tab, "document.location.href"]).catch(() => "");
|
|
161
|
+
let onGemini = false;
|
|
162
|
+
try {
|
|
163
|
+
const host = new URL(currentUrl).hostname.toLowerCase();
|
|
164
|
+
onGemini = host === "gemini.google.com" || host.endsWith(".gemini.google.com");
|
|
165
|
+
} catch {}
|
|
166
|
+
|
|
167
|
+
if (!onGemini) {
|
|
168
|
+
await cdp(["nav", tab, "https://gemini.google.com/app"], 20000);
|
|
169
|
+
await new Promise((r) => setTimeout(r, 600));
|
|
170
|
+
}
|
|
171
|
+
await dismissConsent(tab, cdp);
|
|
172
|
+
await handleVerification(tab, cdp, 10000);
|
|
173
|
+
|
|
174
|
+
// Wait for input to be ready
|
|
175
|
+
await waitForSelector(tab, S.input, 8000, TIMING.inputPoll);
|
|
176
|
+
await new Promise((r) => setTimeout(r, jitter(TIMING.postClick)));
|
|
177
|
+
|
|
178
|
+
await injectClipboardInterceptor(tab, GLOBAL_VAR);
|
|
179
|
+
await typeIntoGemini(tab, query);
|
|
180
|
+
await new Promise((r) => setTimeout(r, jitter(TIMING.postType)));
|
|
181
|
+
|
|
182
|
+
await cdp([
|
|
183
|
+
"eval",
|
|
184
|
+
tab,
|
|
185
|
+
`document.querySelector('${S.sendButton}')?.click()`,
|
|
186
|
+
]);
|
|
187
|
+
|
|
188
|
+
// Wait for Gemini's response to finish streaming before extracting.
|
|
189
|
+
// Periodic scrolling keeps lazy-loaded content triggered in the viewport.
|
|
190
|
+
let pollTick = 0;
|
|
191
|
+
const scrollInterval = setInterval(() => {
|
|
192
|
+
if (++pollTick % 10 === 0) scrollToBottom(tab).catch(() => null);
|
|
193
|
+
}, 6000);
|
|
194
|
+
try {
|
|
195
|
+
await waitForStreamComplete(tab, { timeout: 45000, minLength: 50 });
|
|
196
|
+
} finally {
|
|
197
|
+
clearInterval(scrollInterval);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const { answer, sources } = await extractAnswer(tab, query);
|
|
201
|
+
if (!answer) throw new Error("No answer captured from Gemini clipboard");
|
|
202
|
+
|
|
203
|
+
const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
|
|
204
|
+
() => "https://gemini.google.com/app",
|
|
205
|
+
);
|
|
206
|
+
outputJson({
|
|
207
|
+
query,
|
|
208
|
+
url: finalUrl,
|
|
209
|
+
answer: formatAnswer(answer, short),
|
|
210
|
+
sources,
|
|
211
|
+
});
|
|
212
|
+
} catch (e) {
|
|
213
|
+
handleError(e);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
main();
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
// TODO: Refactor - this file has 42 lines duplicated with google-ai.mjs (line 28)
|
|
13
13
|
|
|
14
14
|
import {
|
|
15
|
+
buildEnvelope,
|
|
15
16
|
cdp,
|
|
16
17
|
formatAnswer,
|
|
17
18
|
getOrOpenTab,
|
|
@@ -49,13 +50,14 @@ function findCopyButtonJsExpression() {
|
|
|
49
50
|
// Extraction
|
|
50
51
|
// ============================================================================
|
|
51
52
|
|
|
52
|
-
async function extractAnswer(tab) {
|
|
53
|
+
async function extractAnswer(tab, env) {
|
|
53
54
|
const copyBtnExpr = findCopyButtonJsExpression();
|
|
54
55
|
|
|
55
56
|
await cdp(["eval", tab, `${copyBtnExpr}?.click()`]);
|
|
56
57
|
await new Promise((r) => setTimeout(r, 400));
|
|
57
58
|
|
|
58
59
|
let answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
|
|
60
|
+
env.clipboardEmpty = !answer;
|
|
59
61
|
|
|
60
62
|
// Retry once if clipboard is empty (Perplexity might be slow to write)
|
|
61
63
|
if (!answer) {
|
|
@@ -63,6 +65,7 @@ async function extractAnswer(tab) {
|
|
|
63
65
|
await cdp(["eval", tab, `${copyBtnExpr}?.click()`]);
|
|
64
66
|
await new Promise((r) => setTimeout(r, 2000));
|
|
65
67
|
answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
|
|
68
|
+
env.clipboardEmpty = !answer;
|
|
66
69
|
}
|
|
67
70
|
|
|
68
71
|
if (!answer) throw new Error("Clipboard interceptor returned empty text");
|
|
@@ -83,6 +86,19 @@ async function main() {
|
|
|
83
86
|
validateQuery(args, USAGE);
|
|
84
87
|
|
|
85
88
|
const { query, tabPrefix, short } = parseArgs(args);
|
|
89
|
+
const startTime = Date.now();
|
|
90
|
+
const mode =
|
|
91
|
+
process.env.GREEDY_SEARCH_VISIBLE === "1" ? "visible" : "headless";
|
|
92
|
+
|
|
93
|
+
const env = {
|
|
94
|
+
engine: "perplexity",
|
|
95
|
+
mode,
|
|
96
|
+
clipboardEmpty: null,
|
|
97
|
+
fallbackUsed: null,
|
|
98
|
+
blockedBy: null,
|
|
99
|
+
verificationResult: null,
|
|
100
|
+
inputReady: null,
|
|
101
|
+
};
|
|
86
102
|
|
|
87
103
|
try {
|
|
88
104
|
// Only refresh page list when creating a fresh tab (no prefix provided)
|
|
@@ -107,6 +123,7 @@ async function main() {
|
|
|
107
123
|
}
|
|
108
124
|
// Handle verification challenges (Cloudflare Turnstile, etc.)
|
|
109
125
|
const verifyResult = await handleVerification(tab, cdp, 10000);
|
|
126
|
+
env.verificationResult = verifyResult;
|
|
110
127
|
if (verifyResult === "needs-human") {
|
|
111
128
|
throw new Error(
|
|
112
129
|
"Perplexity verification required — please solve it manually in the browser window",
|
|
@@ -114,14 +131,43 @@ async function main() {
|
|
|
114
131
|
}
|
|
115
132
|
await dismissConsent(tab, cdp);
|
|
116
133
|
|
|
117
|
-
// After verification, page may have redirected
|
|
134
|
+
// After verification, page may have redirected — wait for it to settle
|
|
135
|
+
// then re-navigate to homepage if we ended up somewhere else.
|
|
118
136
|
if (verifyResult === "clicked") {
|
|
119
137
|
await new Promise((r) => setTimeout(r, TIMING.afterVerify));
|
|
120
|
-
await
|
|
138
|
+
const postVerifyUrl = await cdp(["eval", tab, "document.location.href"]).catch(() => "");
|
|
139
|
+
let onPerplexityAfter = false;
|
|
140
|
+
try {
|
|
141
|
+
const host = new URL(postVerifyUrl).hostname.toLowerCase();
|
|
142
|
+
onPerplexityAfter = host === "perplexity.ai" || host.endsWith(".perplexity.ai");
|
|
143
|
+
} catch {}
|
|
144
|
+
if (!onPerplexityAfter) {
|
|
145
|
+
await cdp(["nav", tab, "https://www.perplexity.ai/"], 20000);
|
|
146
|
+
await new Promise((r) => setTimeout(r, 800));
|
|
147
|
+
await dismissConsent(tab, cdp);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// In headless mode: snap the accessibility tree to detect Cloudflare
|
|
152
|
+
// before burning the selector wait. Perplexity is CF-protected in headless
|
|
153
|
+
// just like Bing — fast-fail triggers the visible retry.
|
|
154
|
+
if (process.env.GREEDY_SEARCH_HEADLESS === "1") {
|
|
155
|
+
const snap = await cdp(["snap", tab]).catch(() => "");
|
|
156
|
+
if (/cloudflare|challenge|security check/i.test(snap)) {
|
|
157
|
+
console.error("[perplexity] Cloudflare challenge in snap — fast-failing to visible retry");
|
|
158
|
+
env.blockedBy = "cloudflare";
|
|
159
|
+
throw new Error("Cloudflare challenge detected — headless blocked");
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Wait for React app to mount input (up to 15s — gives CF redirect + hydration time)
|
|
164
|
+
const inputReady = await waitForSelector(tab, S.input, 15000, 400);
|
|
165
|
+
env.inputReady = inputReady;
|
|
166
|
+
|
|
167
|
+
if (!inputReady) {
|
|
168
|
+
throw new Error("Perplexity input not found — page may not have loaded or is in unexpected state");
|
|
121
169
|
}
|
|
122
170
|
|
|
123
|
-
// Wait for React app to mount input (up to 5s)
|
|
124
|
-
await waitForSelector(tab, S.input, 5000, 400);
|
|
125
171
|
await new Promise((r) => setTimeout(r, jitter(300)));
|
|
126
172
|
|
|
127
173
|
await injectClipboardInterceptor(tab, GLOBAL_VAR);
|
|
@@ -144,7 +190,7 @@ async function main() {
|
|
|
144
190
|
selector: "document.body",
|
|
145
191
|
});
|
|
146
192
|
|
|
147
|
-
const { answer, sources } = await extractAnswer(tab);
|
|
193
|
+
const { answer, sources } = await extractAnswer(tab, env);
|
|
148
194
|
|
|
149
195
|
if (!answer)
|
|
150
196
|
throw new Error(
|
|
@@ -154,14 +200,17 @@ async function main() {
|
|
|
154
200
|
const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
|
|
155
201
|
() => "",
|
|
156
202
|
);
|
|
203
|
+
env.durationMs = Date.now() - startTime;
|
|
157
204
|
outputJson({
|
|
158
205
|
query,
|
|
159
206
|
url: finalUrl,
|
|
160
207
|
answer: formatAnswer(answer, short),
|
|
161
208
|
sources,
|
|
209
|
+
_envelope: buildEnvelope(env),
|
|
162
210
|
});
|
|
163
211
|
} catch (e) {
|
|
164
|
-
|
|
212
|
+
env.durationMs = Date.now() - startTime;
|
|
213
|
+
handleError(e, buildEnvelope(env));
|
|
165
214
|
}
|
|
166
215
|
}
|
|
167
216
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@apmantza/greedysearch-pi",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.9.0",
|
|
4
4
|
"description": "Headless multi-engine AI search (Perplexity, Bing Copilot, Google AI) via browser automation -- NO API KEYS needed. Extracts answers with sources, optional synthesis. Grounded AI answers from real browser interactions.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"keywords": [
|
package/src/search/chrome.mjs
CHANGED
|
@@ -113,6 +113,65 @@ function getPortPid() {
|
|
|
113
113
|
}
|
|
114
114
|
}
|
|
115
115
|
|
|
116
|
+
/**
|
|
117
|
+
* Send Browser.close via CDP WebSocket so Chrome flushes its cookie DB to disk
|
|
118
|
+
* before we force-kill it. Gives the process up to `graceMs` to exit on its own.
|
|
119
|
+
* Falls back to force-kill if Chrome is still running after the grace period.
|
|
120
|
+
* Returns true if the process is gone after the call.
|
|
121
|
+
*/
|
|
122
|
+
async function gracefulCloseChrome(graceMs = 1500) {
|
|
123
|
+
try {
|
|
124
|
+
const version = await new Promise((resolve, reject) => {
|
|
125
|
+
const req = http.get(
|
|
126
|
+
`http://localhost:${GREEDY_PORT}/json/version`,
|
|
127
|
+
(res) => {
|
|
128
|
+
let body = "";
|
|
129
|
+
res.on("data", (d) => (body += d));
|
|
130
|
+
res.on("end", () => {
|
|
131
|
+
try {
|
|
132
|
+
resolve(JSON.parse(body));
|
|
133
|
+
} catch {
|
|
134
|
+
reject(new Error("bad JSON"));
|
|
135
|
+
}
|
|
136
|
+
});
|
|
137
|
+
},
|
|
138
|
+
);
|
|
139
|
+
req.on("error", reject);
|
|
140
|
+
req.setTimeout(1000, () => {
|
|
141
|
+
req.destroy();
|
|
142
|
+
reject(new Error("timeout"));
|
|
143
|
+
});
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
const ws = new globalThis.WebSocket(version.webSocketDebuggerUrl);
|
|
147
|
+
await new Promise((resolve) => {
|
|
148
|
+
ws.onopen = () => {
|
|
149
|
+
ws.send(JSON.stringify({ id: 1, method: "Browser.close" }));
|
|
150
|
+
// Give Chrome a moment to receive the command before we close the socket
|
|
151
|
+
setTimeout(() => {
|
|
152
|
+
ws.close();
|
|
153
|
+
resolve();
|
|
154
|
+
}, 200);
|
|
155
|
+
};
|
|
156
|
+
ws.onerror = () => resolve();
|
|
157
|
+
setTimeout(resolve, 1000);
|
|
158
|
+
});
|
|
159
|
+
} catch {
|
|
160
|
+
// Chrome not reachable — skip to force-kill
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Wait for Chrome to exit gracefully (flushes SQLite cookie DB)
|
|
164
|
+
const deadline = Date.now() + graceMs;
|
|
165
|
+
while (Date.now() < deadline) {
|
|
166
|
+
const pid = getPortPid();
|
|
167
|
+
if (!pid) return true; // already gone
|
|
168
|
+
await new Promise((r) => setTimeout(r, 150));
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Still running — force-kill
|
|
172
|
+
return killProcessOnPort();
|
|
173
|
+
}
|
|
174
|
+
|
|
116
175
|
/**
|
|
117
176
|
* Force-kill whatever process is listening on GREEDY_PORT.
|
|
118
177
|
* Uses OS tools to find the PID (not the PID file — handles ghost processes).
|
|
@@ -159,7 +218,9 @@ export async function killChrome() {
|
|
|
159
218
|
return false;
|
|
160
219
|
}
|
|
161
220
|
|
|
162
|
-
|
|
221
|
+
// Graceful close: sends Browser.close so Chrome flushes its cookie DB,
|
|
222
|
+
// then force-kills if it doesn't exit within the grace period.
|
|
223
|
+
const killed = await gracefulCloseChrome(1500);
|
|
163
224
|
|
|
164
225
|
// Clean up tracking files regardless of kill success
|
|
165
226
|
try {
|
package/src/search/constants.mjs
CHANGED
|
@@ -21,23 +21,18 @@ export const ENGINE_DOMAINS = {
|
|
|
21
21
|
|
|
22
22
|
export const ENGINES = {
|
|
23
23
|
perplexity: "perplexity.mjs",
|
|
24
|
-
pplx: "perplexity.mjs",
|
|
25
24
|
p: "perplexity.mjs",
|
|
26
25
|
bing: "bing-copilot.mjs",
|
|
27
|
-
bing2: "bing-aria.mjs",
|
|
28
|
-
copilot: "bing-copilot.mjs",
|
|
29
26
|
b: "bing-copilot.mjs",
|
|
30
27
|
google: "google-ai.mjs",
|
|
31
28
|
g: "google-ai.mjs",
|
|
32
29
|
gemini: "gemini.mjs",
|
|
33
30
|
gem: "gemini.mjs",
|
|
34
|
-
googlesearch: "google-search.mjs",
|
|
35
|
-
gs: "google-search.mjs",
|
|
36
31
|
};
|
|
37
32
|
|
|
38
33
|
export const SOURCE_FETCH_CONCURRENCY = Math.max(
|
|
39
34
|
1,
|
|
40
|
-
Number.parseInt(process.env.GREEDY_FETCH_CONCURRENCY || "
|
|
35
|
+
Number.parseInt(process.env.GREEDY_FETCH_CONCURRENCY || "5", 10) || 5,
|
|
41
36
|
);
|
|
42
37
|
|
|
43
38
|
// Tell cdp.mjs to prefer the GreedySearch Chrome profile's DevToolsActivePort
|