@apmantza/greedysearch-pi 1.8.2 → 1.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/README.md +10 -1
- package/bin/launch.mjs +366 -366
- package/bin/search.mjs +388 -388
- package/extractors/common.mjs +291 -291
- package/extractors/gemini.mjs +146 -146
- package/extractors/google-ai.mjs +125 -125
- package/extractors/perplexity.mjs +147 -145
- package/extractors/selectors.mjs +54 -54
- package/index.ts +256 -278
- package/package.json +1 -1
- package/src/github.mjs +237 -237
- package/src/reddit.mjs +210 -0
- package/src/search/chrome.mjs +222 -222
- package/src/search/constants.mjs +37 -37
- package/src/search/defaults.mjs +14 -14
- package/src/search/engines.mjs +62 -62
- package/src/search/fetch-source.mjs +35 -3
- package/src/search/output.mjs +58 -58
- package/src/search/sources.mjs +445 -445
- package/src/search/synthesis-runner.mjs +63 -63
- package/src/search/synthesis.mjs +223 -223
- package/src/tools/deep-research-handler.ts +36 -36
- package/src/tools/greedy-search-handler.ts +53 -57
- package/src/tools/shared.ts +135 -130
- package/src/types.ts +103 -103
- package/test.mjs +423 -377
package/bin/search.mjs
CHANGED
|
@@ -1,388 +1,388 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
// search.mjs - unified CLI for GreedySearch extractors
|
|
4
|
-
//
|
|
5
|
-
// Usage:
|
|
6
|
-
// node search.mjs <engine> "<query>"
|
|
7
|
-
// node search.mjs all "<query>"
|
|
8
|
-
//
|
|
9
|
-
// Engines:
|
|
10
|
-
// perplexity | pplx | p
|
|
11
|
-
// bing | copilot | b
|
|
12
|
-
// google | g
|
|
13
|
-
// gemini | gem
|
|
14
|
-
// all - fan-out to all engines in parallel
|
|
15
|
-
//
|
|
16
|
-
// Output: JSON to stdout, errors to stderr
|
|
17
|
-
//
|
|
18
|
-
// Examples:
|
|
19
|
-
// node search.mjs p "what is memoization"
|
|
20
|
-
// node search.mjs gem "latest React features"
|
|
21
|
-
// node search.mjs all "how does TCP congestion control work"
|
|
22
|
-
|
|
23
|
-
import { existsSync, readFileSync } from "node:fs";
|
|
24
|
-
// Config file for user defaults
|
|
25
|
-
import { homedir } from "node:os";
|
|
26
|
-
import { join } from "node:path";
|
|
27
|
-
import {
|
|
28
|
-
activateTab,
|
|
29
|
-
cdp,
|
|
30
|
-
closeTab,
|
|
31
|
-
closeTabs,
|
|
32
|
-
ensureChrome,
|
|
33
|
-
openNewTab,
|
|
34
|
-
} from "../src/search/chrome.mjs";
|
|
35
|
-
import { ALL_ENGINES, ENGINES } from "../src/search/constants.mjs";
|
|
36
|
-
import { runExtractor } from "../src/search/engines.mjs";
|
|
37
|
-
import {
|
|
38
|
-
fetchMultipleSources,
|
|
39
|
-
fetchTopSource,
|
|
40
|
-
} from "../src/search/fetch-source.mjs";
|
|
41
|
-
import { writeOutput } from "../src/search/output.mjs";
|
|
42
|
-
import {
|
|
43
|
-
buildSourceRegistry,
|
|
44
|
-
mergeFetchDataIntoSources,
|
|
45
|
-
} from "../src/search/sources.mjs";
|
|
46
|
-
import { buildConfidence } from "../src/search/synthesis.mjs";
|
|
47
|
-
import { synthesizeWithGemini } from "../src/search/synthesis-runner.mjs";
|
|
48
|
-
|
|
49
|
-
const CONFIG_DIR = join(homedir(), ".config", "greedysearch");
|
|
50
|
-
const CONFIG_FILE = join(CONFIG_DIR, "config.json");
|
|
51
|
-
|
|
52
|
-
function loadUserConfig() {
|
|
53
|
-
try {
|
|
54
|
-
if (existsSync(CONFIG_FILE)) {
|
|
55
|
-
return JSON.parse(readFileSync(CONFIG_FILE, "utf8"));
|
|
56
|
-
}
|
|
57
|
-
} catch {
|
|
58
|
-
// Ignore errors
|
|
59
|
-
}
|
|
60
|
-
return {};
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
// ─── Main ──────────────────────────────────────────────────────────────────
|
|
64
|
-
|
|
65
|
-
async function main() {
|
|
66
|
-
const args = process.argv.slice(2);
|
|
67
|
-
if (args.length < 2 || args[0] === "--help") {
|
|
68
|
-
process.stderr.write(
|
|
69
|
-
`${[
|
|
70
|
-
'Usage: node search.mjs <engine> "<query>"',
|
|
71
|
-
"",
|
|
72
|
-
"Engines: perplexity (p), bing (b), google (g), gemini (gem), all",
|
|
73
|
-
"",
|
|
74
|
-
"Flags:",
|
|
75
|
-
" --fast Quick mode: no source fetching or synthesis",
|
|
76
|
-
" --synthesize Deprecated: synthesis is now default for multi-engine",
|
|
77
|
-
" --deep-research Deprecated: source fetching is now default",
|
|
78
|
-
" --fetch-top-source Fetch content from top source",
|
|
79
|
-
" --inline Output JSON to stdout (for piping)",
|
|
80
|
-
" --locale <lang> Force results language (en, de, fr, etc.)",
|
|
81
|
-
"",
|
|
82
|
-
"Environment:",
|
|
83
|
-
" GREEDY_SEARCH_LOCALE Default locale (default: en)",
|
|
84
|
-
" GREEDY_SEARCH_VISIBLE Set to 1 to show Chrome window",
|
|
85
|
-
"",
|
|
86
|
-
"Examples:",
|
|
87
|
-
' node search.mjs all "Node.js streams" # Default: sources + synthesis',
|
|
88
|
-
' node search.mjs all "quick check" --fast # Fast: no sources/synthesis',
|
|
89
|
-
' node search.mjs p "what is memoization" # Single engine: fast mode',
|
|
90
|
-
].join("\n")}\n`,
|
|
91
|
-
);
|
|
92
|
-
process.exit(1);
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
await ensureChrome();
|
|
96
|
-
|
|
97
|
-
// Depth modes: fast (no synthesis/fetch), standard (synthesis+fetch 5 sources)
|
|
98
|
-
const depthIdx = args.indexOf("--depth");
|
|
99
|
-
let depth = "standard"; // DEFAULT: synthesis + source fetch
|
|
100
|
-
|
|
101
|
-
if (depthIdx !== -1 && args[depthIdx + 1]) {
|
|
102
|
-
depth = args[depthIdx + 1];
|
|
103
|
-
} else if (args.includes("--fast")) {
|
|
104
|
-
depth = "fast"; // Explicit fast mode requested
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
// For single engine (not "all"), default to fast unless explicit
|
|
108
|
-
const engineArg = args.find((a) => !a.startsWith("--"))?.toLowerCase();
|
|
109
|
-
if (engineArg !== "all" && depthIdx === -1 && !args.includes("--fast")) {
|
|
110
|
-
depth = "fast";
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
// --deep-research / --deep flags map to deep mode (backward compat)
|
|
114
|
-
if (args.includes("--deep-research")) {
|
|
115
|
-
depth = "standard";
|
|
116
|
-
process.stderr.write(
|
|
117
|
-
"[greedysearch] --deep-research is deprecated; use --depth standard (now default)\n",
|
|
118
|
-
);
|
|
119
|
-
}
|
|
120
|
-
if (args.includes("--deep")) {
|
|
121
|
-
depth = "deep";
|
|
122
|
-
}
|
|
123
|
-
if (args.includes("--synthesize")) {
|
|
124
|
-
process.stderr.write(
|
|
125
|
-
"[greedysearch] --synthesize is deprecated; synthesis is now default for multi-engine\n",
|
|
126
|
-
);
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
const full = args.includes("--full");
|
|
130
|
-
const short = !full;
|
|
131
|
-
const fetchSource = args.includes("--fetch-top-source");
|
|
132
|
-
const inline = args.includes("--inline");
|
|
133
|
-
const outIdx = args.indexOf("--out");
|
|
134
|
-
const outFile = outIdx !== -1 ? args[outIdx + 1] : null;
|
|
135
|
-
|
|
136
|
-
// Locale handling: CLI flag > env var > config file > default (en)
|
|
137
|
-
const localeIdx = args.indexOf("--locale");
|
|
138
|
-
const envLocale = process.env.GREEDY_SEARCH_LOCALE;
|
|
139
|
-
const userConfig = loadUserConfig();
|
|
140
|
-
let locale = "en"; // Default to English
|
|
141
|
-
|
|
142
|
-
if (localeIdx !== -1 && args[localeIdx + 1]) {
|
|
143
|
-
locale = args[localeIdx + 1];
|
|
144
|
-
} else if (envLocale) {
|
|
145
|
-
locale = envLocale;
|
|
146
|
-
} else if (userConfig.locale) {
|
|
147
|
-
locale = userConfig.locale;
|
|
148
|
-
}
|
|
149
|
-
const rest = args.filter(
|
|
150
|
-
(a, i) =>
|
|
151
|
-
a !== "--full" &&
|
|
152
|
-
a !== "--short" &&
|
|
153
|
-
a !== "--fast" &&
|
|
154
|
-
a !== "--fetch-top-source" &&
|
|
155
|
-
a !== "--synthesize" &&
|
|
156
|
-
a !== "--deep-research" &&
|
|
157
|
-
a !== "--deep" &&
|
|
158
|
-
a !== "--inline" &&
|
|
159
|
-
a !== "--depth" &&
|
|
160
|
-
a !== "--out" &&
|
|
161
|
-
a !== "--help" &&
|
|
162
|
-
(depthIdx === -1 || i !== depthIdx + 1) &&
|
|
163
|
-
(outIdx === -1 || i !== outIdx + 1),
|
|
164
|
-
);
|
|
165
|
-
const engine = rest[0].toLowerCase();
|
|
166
|
-
const query = rest.slice(1).join(" ");
|
|
167
|
-
|
|
168
|
-
if (engine === "all") {
|
|
169
|
-
await cdp(["list"]); // refresh pages cache
|
|
170
|
-
|
|
171
|
-
// Create fresh tabs for each engine to avoid race conditions
|
|
172
|
-
const engineTabs = [];
|
|
173
|
-
for (let i = 0; i < ALL_ENGINES.length; i++) {
|
|
174
|
-
if (i > 0) await new Promise((r) => setTimeout(r, 300));
|
|
175
|
-
const tab = await openNewTab();
|
|
176
|
-
engineTabs.push(tab);
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
try {
|
|
180
|
-
const results = await Promise.allSettled(
|
|
181
|
-
ALL_ENGINES.map((e, i) =>
|
|
182
|
-
runExtractor(ENGINES[e], query, engineTabs[i], short, null, locale)
|
|
183
|
-
.then((r) => {
|
|
184
|
-
process.stderr.write(`PROGRESS:${e}:done\n`);
|
|
185
|
-
return { engine: e, ...r };
|
|
186
|
-
})
|
|
187
|
-
.catch((err) => {
|
|
188
|
-
process.stderr.write(`PROGRESS:${e}:error\n`);
|
|
189
|
-
throw err;
|
|
190
|
-
}),
|
|
191
|
-
),
|
|
192
|
-
);
|
|
193
|
-
|
|
194
|
-
const out = {};
|
|
195
|
-
for (let i = 0; i < results.length; i++) {
|
|
196
|
-
const r = results[i];
|
|
197
|
-
if (r.status === "fulfilled") {
|
|
198
|
-
out[r.value.engine] = r.value;
|
|
199
|
-
} else {
|
|
200
|
-
out[ALL_ENGINES[i]] = { error: r.reason?.message || "unknown error" };
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
// Build a canonical source registry across all engines
|
|
205
|
-
out._sources = buildSourceRegistry(out, query);
|
|
206
|
-
|
|
207
|
-
// Source fetching: default for all "all" searches
|
|
208
|
-
if (depth !== "fast" && out._sources.length > 0) {
|
|
209
|
-
process.stderr.write("PROGRESS:source-fetch:start\n");
|
|
210
|
-
const fetchedSources = await fetchMultipleSources(
|
|
211
|
-
out._sources,
|
|
212
|
-
5,
|
|
213
|
-
8000,
|
|
214
|
-
);
|
|
215
|
-
|
|
216
|
-
out._sources = mergeFetchDataIntoSources(out._sources, fetchedSources);
|
|
217
|
-
out._fetchedSources = fetchedSources;
|
|
218
|
-
process.stderr.write("PROGRESS:source-fetch:done\n");
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
// Synthesize with Gemini for all non-fast modes
|
|
222
|
-
if (depth !== "fast") {
|
|
223
|
-
process.stderr.write("PROGRESS:synthesis:start\n");
|
|
224
|
-
process.stderr.write(
|
|
225
|
-
"[greedysearch] Synthesizing results with Gemini...\n",
|
|
226
|
-
);
|
|
227
|
-
try {
|
|
228
|
-
const geminiTab = await openNewTab();
|
|
229
|
-
await activateTab(geminiTab);
|
|
230
|
-
const synthesis = await synthesizeWithGemini(query, out, {
|
|
231
|
-
grounded: depth === "deep",
|
|
232
|
-
tabPrefix: geminiTab,
|
|
233
|
-
});
|
|
234
|
-
out._synthesis = {
|
|
235
|
-
...synthesis,
|
|
236
|
-
synthesized: true,
|
|
237
|
-
};
|
|
238
|
-
await closeTab(geminiTab);
|
|
239
|
-
process.stderr.write("PROGRESS:synthesis:done\n");
|
|
240
|
-
} catch (e) {
|
|
241
|
-
process.stderr.write(
|
|
242
|
-
`[greedysearch] Synthesis failed: ${e.message}\n`,
|
|
243
|
-
);
|
|
244
|
-
out._synthesis = { error: e.message, synthesized: false };
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
if (fetchSource) {
|
|
249
|
-
const top = pickTopSource(out);
|
|
250
|
-
if (top)
|
|
251
|
-
out._topSource = await fetchTopSource(top.canonicalUrl || top.url);
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
// Always include confidence metrics for non-fast searches
|
|
255
|
-
if (depth !== "fast") out._confidence = buildConfidence(out);
|
|
256
|
-
|
|
257
|
-
writeOutput(out, outFile, {
|
|
258
|
-
inline,
|
|
259
|
-
synthesize: depth !== "fast",
|
|
260
|
-
query,
|
|
261
|
-
});
|
|
262
|
-
return;
|
|
263
|
-
} finally {
|
|
264
|
-
await closeTabs(engineTabs);
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
// Single engine
|
|
269
|
-
const script = ENGINES[engine];
|
|
270
|
-
if (!script) {
|
|
271
|
-
process.stderr.write(
|
|
272
|
-
`Unknown engine: "${engine}"\nAvailable: ${Object.keys(ENGINES).join(", ")}\n`,
|
|
273
|
-
);
|
|
274
|
-
process.exit(1);
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
try {
|
|
278
|
-
const result = await runExtractor(script, query, null, short, null, locale);
|
|
279
|
-
if (fetchSource && result.sources?.length > 0) {
|
|
280
|
-
result.topSource = await fetchTopSource(result.sources[0].url);
|
|
281
|
-
}
|
|
282
|
-
writeOutput(result, outFile, { inline, synthesize: false, query });
|
|
283
|
-
} catch (e) {
|
|
284
|
-
process.stderr.write(`Error: ${e.message}\n`);
|
|
285
|
-
process.exit(1);
|
|
286
|
-
}
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
function pickTopSource(out) {
|
|
290
|
-
if (Array.isArray(out._sources) && out._sources.length > 0)
|
|
291
|
-
return out._sources[0];
|
|
292
|
-
for (const engine of ["perplexity", "google", "bing"]) {
|
|
293
|
-
const r = out[engine];
|
|
294
|
-
if (r?.sources?.length > 0) return r.sources[0];
|
|
295
|
-
}
|
|
296
|
-
return null;
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
/**
|
|
300
|
-
* Minimize Chrome window via CDP after search completes.
|
|
301
|
-
* Called at the end of search to keep window minimized.
|
|
302
|
-
*/
|
|
303
|
-
async function minimizeChrome() {
|
|
304
|
-
if (process.env.GREEDY_SEARCH_VISIBLE === "1") return;
|
|
305
|
-
|
|
306
|
-
try {
|
|
307
|
-
const http = await import("node:http");
|
|
308
|
-
const version = await new Promise((resolve, reject) => {
|
|
309
|
-
http
|
|
310
|
-
.get(`http://localhost:9222/json/version`, (res) => {
|
|
311
|
-
let body = "";
|
|
312
|
-
res.on("data", (d) => (body += d));
|
|
313
|
-
res.on("end", () => resolve(JSON.parse(body)));
|
|
314
|
-
})
|
|
315
|
-
.on("error", reject);
|
|
316
|
-
});
|
|
317
|
-
|
|
318
|
-
const wsUrl = version.webSocketDebuggerUrl;
|
|
319
|
-
const WebSocket = globalThis.WebSocket;
|
|
320
|
-
if (!WebSocket) return;
|
|
321
|
-
|
|
322
|
-
const ws = new WebSocket(wsUrl);
|
|
323
|
-
let requestId = 0;
|
|
324
|
-
const pending = new Map();
|
|
325
|
-
|
|
326
|
-
ws.onopen = () => {
|
|
327
|
-
const id = ++requestId;
|
|
328
|
-
pending.set(id, {
|
|
329
|
-
resolve: (result) => {
|
|
330
|
-
const targets = result.targetInfos || [];
|
|
331
|
-
const pageTarget = targets.find((t) => t.type === "page");
|
|
332
|
-
if (!pageTarget) {
|
|
333
|
-
ws.close();
|
|
334
|
-
return;
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
const winId = ++requestId;
|
|
338
|
-
pending.set(winId, {
|
|
339
|
-
resolve: (winResult) => {
|
|
340
|
-
const windowId = winResult.windowId;
|
|
341
|
-
const minId = ++requestId;
|
|
342
|
-
pending.set(minId, { resolve: () => {}, reject: () => {} });
|
|
343
|
-
ws.send(
|
|
344
|
-
JSON.stringify({
|
|
345
|
-
id: minId,
|
|
346
|
-
method: "Browser.setWindowBounds",
|
|
347
|
-
params: { windowId, bounds: { windowState: "minimized" } },
|
|
348
|
-
}),
|
|
349
|
-
);
|
|
350
|
-
setTimeout(() => ws.close(), 500);
|
|
351
|
-
},
|
|
352
|
-
reject: () => ws.close(),
|
|
353
|
-
});
|
|
354
|
-
ws.send(
|
|
355
|
-
JSON.stringify({
|
|
356
|
-
id: winId,
|
|
357
|
-
method: "Browser.getWindowForTarget",
|
|
358
|
-
params: { targetId: pageTarget.targetId },
|
|
359
|
-
}),
|
|
360
|
-
);
|
|
361
|
-
},
|
|
362
|
-
reject: () => ws.close(),
|
|
363
|
-
});
|
|
364
|
-
ws.send(JSON.stringify({ id, method: "Target.getTargets", params: {} }));
|
|
365
|
-
};
|
|
366
|
-
|
|
367
|
-
ws.onmessage = (event) => {
|
|
368
|
-
const msg = JSON.parse(event.data);
|
|
369
|
-
if (msg.id && pending.has(msg.id)) {
|
|
370
|
-
const { resolve, reject } = pending.get(msg.id);
|
|
371
|
-
pending.delete(msg.id);
|
|
372
|
-
if (msg.error) reject?.(msg.error);
|
|
373
|
-
else resolve?.(msg.result);
|
|
374
|
-
}
|
|
375
|
-
};
|
|
376
|
-
|
|
377
|
-
setTimeout(() => ws.close(), 3000);
|
|
378
|
-
} catch {
|
|
379
|
-
// Best-effort
|
|
380
|
-
}
|
|
381
|
-
}
|
|
382
|
-
|
|
383
|
-
main().finally(async () => {
|
|
384
|
-
// Ensure window is minimized after search completes
|
|
385
|
-
await minimizeChrome();
|
|
386
|
-
// Give minimize time to complete before exit
|
|
387
|
-
await new Promise((r) => setTimeout(r, 1500));
|
|
388
|
-
});
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// search.mjs - unified CLI for GreedySearch extractors
|
|
4
|
+
//
|
|
5
|
+
// Usage:
|
|
6
|
+
// node search.mjs <engine> "<query>"
|
|
7
|
+
// node search.mjs all "<query>"
|
|
8
|
+
//
|
|
9
|
+
// Engines:
|
|
10
|
+
// perplexity | pplx | p
|
|
11
|
+
// bing | copilot | b
|
|
12
|
+
// google | g
|
|
13
|
+
// gemini | gem
|
|
14
|
+
// all - fan-out to all engines in parallel
|
|
15
|
+
//
|
|
16
|
+
// Output: JSON to stdout, errors to stderr
|
|
17
|
+
//
|
|
18
|
+
// Examples:
|
|
19
|
+
// node search.mjs p "what is memoization"
|
|
20
|
+
// node search.mjs gem "latest React features"
|
|
21
|
+
// node search.mjs all "how does TCP congestion control work"
|
|
22
|
+
|
|
23
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
24
|
+
// Config file for user defaults
|
|
25
|
+
import { homedir } from "node:os";
|
|
26
|
+
import { join } from "node:path";
|
|
27
|
+
import {
|
|
28
|
+
activateTab,
|
|
29
|
+
cdp,
|
|
30
|
+
closeTab,
|
|
31
|
+
closeTabs,
|
|
32
|
+
ensureChrome,
|
|
33
|
+
openNewTab,
|
|
34
|
+
} from "../src/search/chrome.mjs";
|
|
35
|
+
import { ALL_ENGINES, ENGINES } from "../src/search/constants.mjs";
|
|
36
|
+
import { runExtractor } from "../src/search/engines.mjs";
|
|
37
|
+
import {
|
|
38
|
+
fetchMultipleSources,
|
|
39
|
+
fetchTopSource,
|
|
40
|
+
} from "../src/search/fetch-source.mjs";
|
|
41
|
+
import { writeOutput } from "../src/search/output.mjs";
|
|
42
|
+
import {
|
|
43
|
+
buildSourceRegistry,
|
|
44
|
+
mergeFetchDataIntoSources,
|
|
45
|
+
} from "../src/search/sources.mjs";
|
|
46
|
+
import { buildConfidence } from "../src/search/synthesis.mjs";
|
|
47
|
+
import { synthesizeWithGemini } from "../src/search/synthesis-runner.mjs";
|
|
48
|
+
|
|
49
|
+
const CONFIG_DIR = join(homedir(), ".config", "greedysearch");
|
|
50
|
+
const CONFIG_FILE = join(CONFIG_DIR, "config.json");
|
|
51
|
+
|
|
52
|
+
function loadUserConfig() {
|
|
53
|
+
try {
|
|
54
|
+
if (existsSync(CONFIG_FILE)) {
|
|
55
|
+
return JSON.parse(readFileSync(CONFIG_FILE, "utf8"));
|
|
56
|
+
}
|
|
57
|
+
} catch {
|
|
58
|
+
// Ignore errors
|
|
59
|
+
}
|
|
60
|
+
return {};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ─── Main ──────────────────────────────────────────────────────────────────
|
|
64
|
+
|
|
65
|
+
async function main() {
|
|
66
|
+
const args = process.argv.slice(2);
|
|
67
|
+
if (args.length < 2 || args[0] === "--help") {
|
|
68
|
+
process.stderr.write(
|
|
69
|
+
`${[
|
|
70
|
+
'Usage: node search.mjs <engine> "<query>"',
|
|
71
|
+
"",
|
|
72
|
+
"Engines: perplexity (p), bing (b), google (g), gemini (gem), all",
|
|
73
|
+
"",
|
|
74
|
+
"Flags:",
|
|
75
|
+
" --fast Quick mode: no source fetching or synthesis",
|
|
76
|
+
" --synthesize Deprecated: synthesis is now default for multi-engine",
|
|
77
|
+
" --deep-research Deprecated: source fetching is now default",
|
|
78
|
+
" --fetch-top-source Fetch content from top source",
|
|
79
|
+
" --inline Output JSON to stdout (for piping)",
|
|
80
|
+
" --locale <lang> Force results language (en, de, fr, etc.)",
|
|
81
|
+
"",
|
|
82
|
+
"Environment:",
|
|
83
|
+
" GREEDY_SEARCH_LOCALE Default locale (default: en)",
|
|
84
|
+
" GREEDY_SEARCH_VISIBLE Set to 1 to show Chrome window",
|
|
85
|
+
"",
|
|
86
|
+
"Examples:",
|
|
87
|
+
' node search.mjs all "Node.js streams" # Default: sources + synthesis',
|
|
88
|
+
' node search.mjs all "quick check" --fast # Fast: no sources/synthesis',
|
|
89
|
+
' node search.mjs p "what is memoization" # Single engine: fast mode',
|
|
90
|
+
].join("\n")}\n`,
|
|
91
|
+
);
|
|
92
|
+
process.exit(1);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
await ensureChrome();
|
|
96
|
+
|
|
97
|
+
// Depth modes: fast (no synthesis/fetch), standard (synthesis+fetch 5 sources)
|
|
98
|
+
const depthIdx = args.indexOf("--depth");
|
|
99
|
+
let depth = "standard"; // DEFAULT: synthesis + source fetch
|
|
100
|
+
|
|
101
|
+
if (depthIdx !== -1 && args[depthIdx + 1]) {
|
|
102
|
+
depth = args[depthIdx + 1];
|
|
103
|
+
} else if (args.includes("--fast")) {
|
|
104
|
+
depth = "fast"; // Explicit fast mode requested
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// For single engine (not "all"), default to fast unless explicit
|
|
108
|
+
const engineArg = args.find((a) => !a.startsWith("--"))?.toLowerCase();
|
|
109
|
+
if (engineArg !== "all" && depthIdx === -1 && !args.includes("--fast")) {
|
|
110
|
+
depth = "fast";
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// --deep-research / --deep flags map to deep mode (backward compat)
|
|
114
|
+
if (args.includes("--deep-research")) {
|
|
115
|
+
depth = "standard";
|
|
116
|
+
process.stderr.write(
|
|
117
|
+
"[greedysearch] --deep-research is deprecated; use --depth standard (now default)\n",
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
if (args.includes("--deep")) {
|
|
121
|
+
depth = "deep";
|
|
122
|
+
}
|
|
123
|
+
if (args.includes("--synthesize")) {
|
|
124
|
+
process.stderr.write(
|
|
125
|
+
"[greedysearch] --synthesize is deprecated; synthesis is now default for multi-engine\n",
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const full = args.includes("--full");
|
|
130
|
+
const short = !full;
|
|
131
|
+
const fetchSource = args.includes("--fetch-top-source");
|
|
132
|
+
const inline = args.includes("--inline");
|
|
133
|
+
const outIdx = args.indexOf("--out");
|
|
134
|
+
const outFile = outIdx !== -1 ? args[outIdx + 1] : null;
|
|
135
|
+
|
|
136
|
+
// Locale handling: CLI flag > env var > config file > default (en)
|
|
137
|
+
const localeIdx = args.indexOf("--locale");
|
|
138
|
+
const envLocale = process.env.GREEDY_SEARCH_LOCALE;
|
|
139
|
+
const userConfig = loadUserConfig();
|
|
140
|
+
let locale = "en"; // Default to English
|
|
141
|
+
|
|
142
|
+
if (localeIdx !== -1 && args[localeIdx + 1]) {
|
|
143
|
+
locale = args[localeIdx + 1];
|
|
144
|
+
} else if (envLocale) {
|
|
145
|
+
locale = envLocale;
|
|
146
|
+
} else if (userConfig.locale) {
|
|
147
|
+
locale = userConfig.locale;
|
|
148
|
+
}
|
|
149
|
+
const rest = args.filter(
|
|
150
|
+
(a, i) =>
|
|
151
|
+
a !== "--full" &&
|
|
152
|
+
a !== "--short" &&
|
|
153
|
+
a !== "--fast" &&
|
|
154
|
+
a !== "--fetch-top-source" &&
|
|
155
|
+
a !== "--synthesize" &&
|
|
156
|
+
a !== "--deep-research" &&
|
|
157
|
+
a !== "--deep" &&
|
|
158
|
+
a !== "--inline" &&
|
|
159
|
+
a !== "--depth" &&
|
|
160
|
+
a !== "--out" &&
|
|
161
|
+
a !== "--help" &&
|
|
162
|
+
(depthIdx === -1 || i !== depthIdx + 1) &&
|
|
163
|
+
(outIdx === -1 || i !== outIdx + 1),
|
|
164
|
+
);
|
|
165
|
+
const engine = rest[0].toLowerCase();
|
|
166
|
+
const query = rest.slice(1).join(" ");
|
|
167
|
+
|
|
168
|
+
if (engine === "all") {
|
|
169
|
+
await cdp(["list"]); // refresh pages cache
|
|
170
|
+
|
|
171
|
+
// Create fresh tabs for each engine to avoid race conditions
|
|
172
|
+
const engineTabs = [];
|
|
173
|
+
for (let i = 0; i < ALL_ENGINES.length; i++) {
|
|
174
|
+
if (i > 0) await new Promise((r) => setTimeout(r, 300));
|
|
175
|
+
const tab = await openNewTab();
|
|
176
|
+
engineTabs.push(tab);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
try {
|
|
180
|
+
const results = await Promise.allSettled(
|
|
181
|
+
ALL_ENGINES.map((e, i) =>
|
|
182
|
+
runExtractor(ENGINES[e], query, engineTabs[i], short, null, locale)
|
|
183
|
+
.then((r) => {
|
|
184
|
+
process.stderr.write(`PROGRESS:${e}:done\n`);
|
|
185
|
+
return { engine: e, ...r };
|
|
186
|
+
})
|
|
187
|
+
.catch((err) => {
|
|
188
|
+
process.stderr.write(`PROGRESS:${e}:error\n`);
|
|
189
|
+
throw err;
|
|
190
|
+
}),
|
|
191
|
+
),
|
|
192
|
+
);
|
|
193
|
+
|
|
194
|
+
const out = {};
|
|
195
|
+
for (let i = 0; i < results.length; i++) {
|
|
196
|
+
const r = results[i];
|
|
197
|
+
if (r.status === "fulfilled") {
|
|
198
|
+
out[r.value.engine] = r.value;
|
|
199
|
+
} else {
|
|
200
|
+
out[ALL_ENGINES[i]] = { error: r.reason?.message || "unknown error" };
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Build a canonical source registry across all engines
|
|
205
|
+
out._sources = buildSourceRegistry(out, query);
|
|
206
|
+
|
|
207
|
+
// Source fetching: default for all "all" searches
|
|
208
|
+
if (depth !== "fast" && out._sources.length > 0) {
|
|
209
|
+
process.stderr.write("PROGRESS:source-fetch:start\n");
|
|
210
|
+
const fetchedSources = await fetchMultipleSources(
|
|
211
|
+
out._sources,
|
|
212
|
+
5,
|
|
213
|
+
8000,
|
|
214
|
+
);
|
|
215
|
+
|
|
216
|
+
out._sources = mergeFetchDataIntoSources(out._sources, fetchedSources);
|
|
217
|
+
out._fetchedSources = fetchedSources;
|
|
218
|
+
process.stderr.write("PROGRESS:source-fetch:done\n");
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Synthesize with Gemini for all non-fast modes
|
|
222
|
+
if (depth !== "fast") {
|
|
223
|
+
process.stderr.write("PROGRESS:synthesis:start\n");
|
|
224
|
+
process.stderr.write(
|
|
225
|
+
"[greedysearch] Synthesizing results with Gemini...\n",
|
|
226
|
+
);
|
|
227
|
+
try {
|
|
228
|
+
const geminiTab = await openNewTab();
|
|
229
|
+
await activateTab(geminiTab);
|
|
230
|
+
const synthesis = await synthesizeWithGemini(query, out, {
|
|
231
|
+
grounded: depth === "deep",
|
|
232
|
+
tabPrefix: geminiTab,
|
|
233
|
+
});
|
|
234
|
+
out._synthesis = {
|
|
235
|
+
...synthesis,
|
|
236
|
+
synthesized: true,
|
|
237
|
+
};
|
|
238
|
+
await closeTab(geminiTab);
|
|
239
|
+
process.stderr.write("PROGRESS:synthesis:done\n");
|
|
240
|
+
} catch (e) {
|
|
241
|
+
process.stderr.write(
|
|
242
|
+
`[greedysearch] Synthesis failed: ${e.message}\n`,
|
|
243
|
+
);
|
|
244
|
+
out._synthesis = { error: e.message, synthesized: false };
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if (fetchSource) {
|
|
249
|
+
const top = pickTopSource(out);
|
|
250
|
+
if (top)
|
|
251
|
+
out._topSource = await fetchTopSource(top.canonicalUrl || top.url);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Always include confidence metrics for non-fast searches
|
|
255
|
+
if (depth !== "fast") out._confidence = buildConfidence(out);
|
|
256
|
+
|
|
257
|
+
writeOutput(out, outFile, {
|
|
258
|
+
inline,
|
|
259
|
+
synthesize: depth !== "fast",
|
|
260
|
+
query,
|
|
261
|
+
});
|
|
262
|
+
return;
|
|
263
|
+
} finally {
|
|
264
|
+
await closeTabs(engineTabs);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Single engine
|
|
269
|
+
const script = ENGINES[engine];
|
|
270
|
+
if (!script) {
|
|
271
|
+
process.stderr.write(
|
|
272
|
+
`Unknown engine: "${engine}"\nAvailable: ${Object.keys(ENGINES).join(", ")}\n`,
|
|
273
|
+
);
|
|
274
|
+
process.exit(1);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
try {
|
|
278
|
+
const result = await runExtractor(script, query, null, short, null, locale);
|
|
279
|
+
if (fetchSource && result.sources?.length > 0) {
|
|
280
|
+
result.topSource = await fetchTopSource(result.sources[0].url);
|
|
281
|
+
}
|
|
282
|
+
writeOutput(result, outFile, { inline, synthesize: false, query });
|
|
283
|
+
} catch (e) {
|
|
284
|
+
process.stderr.write(`Error: ${e.message}\n`);
|
|
285
|
+
process.exit(1);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
function pickTopSource(out) {
|
|
290
|
+
if (Array.isArray(out._sources) && out._sources.length > 0)
|
|
291
|
+
return out._sources[0];
|
|
292
|
+
for (const engine of ["perplexity", "google", "bing"]) {
|
|
293
|
+
const r = out[engine];
|
|
294
|
+
if (r?.sources?.length > 0) return r.sources[0];
|
|
295
|
+
}
|
|
296
|
+
return null;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Minimize Chrome window via CDP after search completes.
|
|
301
|
+
* Called at the end of search to keep window minimized.
|
|
302
|
+
*/
|
|
303
|
+
async function minimizeChrome() {
|
|
304
|
+
if (process.env.GREEDY_SEARCH_VISIBLE === "1") return;
|
|
305
|
+
|
|
306
|
+
try {
|
|
307
|
+
const http = await import("node:http");
|
|
308
|
+
const version = await new Promise((resolve, reject) => {
|
|
309
|
+
http
|
|
310
|
+
.get(`http://localhost:9222/json/version`, (res) => {
|
|
311
|
+
let body = "";
|
|
312
|
+
res.on("data", (d) => (body += d));
|
|
313
|
+
res.on("end", () => resolve(JSON.parse(body)));
|
|
314
|
+
})
|
|
315
|
+
.on("error", reject);
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
const wsUrl = version.webSocketDebuggerUrl;
|
|
319
|
+
const WebSocket = globalThis.WebSocket;
|
|
320
|
+
if (!WebSocket) return;
|
|
321
|
+
|
|
322
|
+
const ws = new WebSocket(wsUrl);
|
|
323
|
+
let requestId = 0;
|
|
324
|
+
const pending = new Map();
|
|
325
|
+
|
|
326
|
+
ws.onopen = () => {
|
|
327
|
+
const id = ++requestId;
|
|
328
|
+
pending.set(id, {
|
|
329
|
+
resolve: (result) => {
|
|
330
|
+
const targets = result.targetInfos || [];
|
|
331
|
+
const pageTarget = targets.find((t) => t.type === "page");
|
|
332
|
+
if (!pageTarget) {
|
|
333
|
+
ws.close();
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const winId = ++requestId;
|
|
338
|
+
pending.set(winId, {
|
|
339
|
+
resolve: (winResult) => {
|
|
340
|
+
const windowId = winResult.windowId;
|
|
341
|
+
const minId = ++requestId;
|
|
342
|
+
pending.set(minId, { resolve: () => {}, reject: () => {} });
|
|
343
|
+
ws.send(
|
|
344
|
+
JSON.stringify({
|
|
345
|
+
id: minId,
|
|
346
|
+
method: "Browser.setWindowBounds",
|
|
347
|
+
params: { windowId, bounds: { windowState: "minimized" } },
|
|
348
|
+
}),
|
|
349
|
+
);
|
|
350
|
+
setTimeout(() => ws.close(), 500);
|
|
351
|
+
},
|
|
352
|
+
reject: () => ws.close(),
|
|
353
|
+
});
|
|
354
|
+
ws.send(
|
|
355
|
+
JSON.stringify({
|
|
356
|
+
id: winId,
|
|
357
|
+
method: "Browser.getWindowForTarget",
|
|
358
|
+
params: { targetId: pageTarget.targetId },
|
|
359
|
+
}),
|
|
360
|
+
);
|
|
361
|
+
},
|
|
362
|
+
reject: () => ws.close(),
|
|
363
|
+
});
|
|
364
|
+
ws.send(JSON.stringify({ id, method: "Target.getTargets", params: {} }));
|
|
365
|
+
};
|
|
366
|
+
|
|
367
|
+
ws.onmessage = (event) => {
|
|
368
|
+
const msg = JSON.parse(event.data);
|
|
369
|
+
if (msg.id && pending.has(msg.id)) {
|
|
370
|
+
const { resolve, reject } = pending.get(msg.id);
|
|
371
|
+
pending.delete(msg.id);
|
|
372
|
+
if (msg.error) reject?.(msg.error);
|
|
373
|
+
else resolve?.(msg.result);
|
|
374
|
+
}
|
|
375
|
+
};
|
|
376
|
+
|
|
377
|
+
setTimeout(() => ws.close(), 3000);
|
|
378
|
+
} catch {
|
|
379
|
+
// Best-effort
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
main().finally(async () => {
|
|
384
|
+
// Ensure window is minimized after search completes
|
|
385
|
+
await minimizeChrome();
|
|
386
|
+
// Give minimize time to complete before exit
|
|
387
|
+
await new Promise((r) => setTimeout(r, 1500));
|
|
388
|
+
});
|