@apmantza/greedysearch-pi 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -7
- package/cdp.mjs +16 -15
- package/index.ts +169 -53
- package/launch.mjs +33 -77
- package/newfeaturesideas.md +105 -0
- package/package.json +1 -1
- package/search.mjs +516 -129
package/README.md
CHANGED
|
@@ -4,12 +4,13 @@ Pi extension that adds a `greedy_search` tool — fans out queries to Perplexity
|
|
|
4
4
|
|
|
5
5
|
Forked from [GreedySearch-claude](https://github.com/apmantza/GreedySearch-claude).
|
|
6
6
|
|
|
7
|
-
## What's New (v1.
|
|
7
|
+
## What's New (v1.4.0)
|
|
8
8
|
|
|
9
|
-
- **
|
|
10
|
-
- **
|
|
11
|
-
- **
|
|
12
|
-
- **
|
|
9
|
+
- **Grounded synthesis** — Gemini now receives a normalized source registry with stable source IDs, agreement summaries, caveats, and cited claims
|
|
10
|
+
- **Real deep research** — top sources are fetched before synthesis so deep research answers are grounded in fetched evidence, not just engine summaries
|
|
11
|
+
- **Richer source metadata** — source output now includes canonical URLs, domains, source types, per-engine attribution, and confidence metadata
|
|
12
|
+
- **Cleaner tab lifecycle** — temporary Perplexity, Bing, and Google tabs are closed after each fan-out search, and synthesis finishes on the Gemini tab
|
|
13
|
+
- **Isolated Chrome targeting** — GreedySearch now refuses to fall back to your normal Chrome session, preventing stray remote-debugging prompts
|
|
13
14
|
|
|
14
15
|
## Install
|
|
15
16
|
|
|
@@ -69,7 +70,15 @@ For complex research questions, use `synthesize: true` with `engine: "all"`:
|
|
|
69
70
|
greedy_search({ query: "best auth patterns for SaaS in 2026", engine: "all", synthesize: true })
|
|
70
71
|
```
|
|
71
72
|
|
|
72
|
-
This deduplicates sources across engines and feeds
|
|
73
|
+
This deduplicates sources across engines, builds a normalized source registry, and feeds that context to Gemini for one clean synthesized answer. Adds ~30s but now returns agreement summaries, caveats, key claims, and better-labeled top sources.
|
|
74
|
+
|
|
75
|
+
For the most grounded mode, use deep research from the CLI:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
node search.mjs all "best auth patterns for SaaS in 2026" --deep-research
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Deep research fetches top source pages before synthesis and reports source confidence metadata such as agreement level, fetched-source success rate, and source mix.
|
|
73
82
|
|
|
74
83
|
**Use synthesis when:**
|
|
75
84
|
- You need one definitive answer, not multiple perspectives
|
|
@@ -112,7 +121,7 @@ greedy_search({ query: "Error: Cannot find module 'react-dom/client' Next.js 15"
|
|
|
112
121
|
|
|
113
122
|
## Requirements
|
|
114
123
|
|
|
115
|
-
- **Chrome** — must be installed. The extension auto-launches a dedicated Chrome instance on port 9222
|
|
124
|
+
- **Chrome** — must be installed. The extension auto-launches a dedicated Chrome instance on port 9222 with its own isolated profile and DevTools port file, separate from your main browser session.
|
|
116
125
|
- **Node.js 22+** — for built-in `fetch` and WebSocket support.
|
|
117
126
|
|
|
118
127
|
## Setup (first time)
|
package/cdp.mjs
CHANGED
|
@@ -37,21 +37,22 @@ function getDevToolsActivePortPath() {
|
|
|
37
37
|
return join(homedir(), '.config', 'google-chrome', 'DevToolsActivePort');
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
-
function getWsUrl() {
|
|
41
|
-
// If CDP_PROFILE_DIR is set (by search.mjs), prefer that profile's port file
|
|
42
|
-
// so GreedySearch targets its own Chrome, not the user's main session.
|
|
43
|
-
const profileDir = process.env.CDP_PROFILE_DIR;
|
|
44
|
-
if (profileDir) {
|
|
45
|
-
const p = profileDir.replace(/\\/g, '/') + '/DevToolsActivePort';
|
|
46
|
-
if (existsSync(p)) {
|
|
47
|
-
const lines = readFileSync(p, 'utf8').trim().split('\n');
|
|
48
|
-
return `ws://127.0.0.1:${lines[0]}${lines[1]}`;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
const
|
|
53
|
-
|
|
54
|
-
}
|
|
40
|
+
function getWsUrl() {
|
|
41
|
+
// If CDP_PROFILE_DIR is set (by search.mjs), prefer that profile's port file
|
|
42
|
+
// so GreedySearch targets its own Chrome, not the user's main session.
|
|
43
|
+
const profileDir = process.env.CDP_PROFILE_DIR;
|
|
44
|
+
if (profileDir) {
|
|
45
|
+
const p = profileDir.replace(/\\/g, '/') + '/DevToolsActivePort';
|
|
46
|
+
if (existsSync(p)) {
|
|
47
|
+
const lines = readFileSync(p, 'utf8').trim().split('\n');
|
|
48
|
+
return `ws://127.0.0.1:${lines[0]}${lines[1]}`;
|
|
49
|
+
}
|
|
50
|
+
throw new Error(`GreedySearch DevToolsActivePort not found at ${p}. Refusing to fall back to the main Chrome session.`);
|
|
51
|
+
}
|
|
52
|
+
const portFile = getDevToolsActivePortPath();
|
|
53
|
+
const lines = readFileSync(portFile, 'utf8').trim().split('\n');
|
|
54
|
+
return `ws://127.0.0.1:${lines[0]}${lines[1]}`;
|
|
55
|
+
}
|
|
55
56
|
|
|
56
57
|
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
|
57
58
|
|
package/index.ts
CHANGED
|
@@ -68,31 +68,164 @@ function runSearch(
|
|
|
68
68
|
});
|
|
69
69
|
}
|
|
70
70
|
|
|
71
|
+
function formatEngineName(engine: string): string {
|
|
72
|
+
if (engine === "bing") return "Bing Copilot";
|
|
73
|
+
if (engine === "google") return "Google AI";
|
|
74
|
+
return engine.charAt(0).toUpperCase() + engine.slice(1);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function humanizeSourceType(sourceType: string): string {
|
|
78
|
+
if (!sourceType) return "";
|
|
79
|
+
if (sourceType === "official-docs") return "official docs";
|
|
80
|
+
return sourceType.replace(/-/g, " ");
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function sourceUrl(source: Record<string, unknown>): string {
|
|
84
|
+
return String(source.displayUrl || source.canonicalUrl || source.url || "");
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function sourceLabel(source: Record<string, unknown>): string {
|
|
88
|
+
return String(source.title || source.domain || sourceUrl(source) || "Untitled source");
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function sourceConsensus(source: Record<string, unknown>): number {
|
|
92
|
+
if (typeof source.engineCount === "number") return source.engineCount;
|
|
93
|
+
const engines = Array.isArray(source.engines) ? (source.engines as string[]) : [];
|
|
94
|
+
return engines.length;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function formatAgreementLevel(level: string): string {
|
|
98
|
+
if (!level) return "Mixed";
|
|
99
|
+
return level.charAt(0).toUpperCase() + level.slice(1);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function getSourceMap(sources: Array<Record<string, unknown>>): Map<string, Record<string, unknown>> {
|
|
103
|
+
return new Map(
|
|
104
|
+
sources
|
|
105
|
+
.map((source) => [String(source.id || ""), source] as const)
|
|
106
|
+
.filter(([id]) => id),
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function formatSourceLine(source: Record<string, unknown>): string {
|
|
111
|
+
const id = String(source.id || "?");
|
|
112
|
+
const url = sourceUrl(source);
|
|
113
|
+
const title = sourceLabel(source);
|
|
114
|
+
const domain = String(source.domain || "");
|
|
115
|
+
const engines = Array.isArray(source.engines) ? (source.engines as string[]) : [];
|
|
116
|
+
const consensus = sourceConsensus(source);
|
|
117
|
+
const typeLabel = humanizeSourceType(String(source.sourceType || ""));
|
|
118
|
+
const fetch = source.fetch as Record<string, unknown> | undefined;
|
|
119
|
+
const fetchStatus = fetch?.ok ? `fetched ${fetch.status || 200}` : fetch?.attempted ? "fetch failed" : "";
|
|
120
|
+
const pieces = [
|
|
121
|
+
`${id} - [${title}](${url})`,
|
|
122
|
+
domain,
|
|
123
|
+
typeLabel,
|
|
124
|
+
engines.length ? `cited by ${engines.map(formatEngineName).join(", ")} (${consensus}/3)` : `${consensus}/3`,
|
|
125
|
+
fetchStatus,
|
|
126
|
+
].filter(Boolean);
|
|
127
|
+
return `- ${pieces.join(" - ")}`;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function renderSourceEvidence(lines: string[], source: Record<string, unknown>): void {
|
|
131
|
+
const fetch = source.fetch as Record<string, unknown> | undefined;
|
|
132
|
+
if (!fetch?.attempted) return;
|
|
133
|
+
|
|
134
|
+
const snippet = String(fetch.snippet || "").trim();
|
|
135
|
+
const lastModified = String(fetch.lastModified || "").trim();
|
|
136
|
+
if (snippet) lines.push(` Evidence: ${snippet}`);
|
|
137
|
+
if (lastModified) lines.push(` Last-Modified: ${lastModified}`);
|
|
138
|
+
if (fetch.error) lines.push(` Fetch error: ${String(fetch.error)}`);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function pickSources(
|
|
142
|
+
sources: Array<Record<string, unknown>>,
|
|
143
|
+
recommendedIds: string[] = [],
|
|
144
|
+
max = 6,
|
|
145
|
+
): Array<Record<string, unknown>> {
|
|
146
|
+
if (!sources.length) return [];
|
|
147
|
+
const sourceMap = getSourceMap(sources);
|
|
148
|
+
const recommended = recommendedIds
|
|
149
|
+
.map((id) => sourceMap.get(id))
|
|
150
|
+
.filter((source): source is Record<string, unknown> => Boolean(source));
|
|
151
|
+
if (recommended.length > 0) return recommended.slice(0, max);
|
|
152
|
+
return sources.slice(0, max);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function renderSynthesis(
|
|
156
|
+
lines: string[],
|
|
157
|
+
synthesis: Record<string, unknown>,
|
|
158
|
+
sources: Array<Record<string, unknown>>,
|
|
159
|
+
maxSources = 6,
|
|
160
|
+
): void {
|
|
161
|
+
if (synthesis.answer) {
|
|
162
|
+
lines.push("## Answer");
|
|
163
|
+
lines.push(String(synthesis.answer));
|
|
164
|
+
lines.push("");
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const agreement = synthesis.agreement as Record<string, unknown> | undefined;
|
|
168
|
+
const agreementSummary = String(agreement?.summary || "").trim();
|
|
169
|
+
const agreementLevel = String(agreement?.level || "").trim();
|
|
170
|
+
if (agreementSummary || agreementLevel) {
|
|
171
|
+
lines.push("## Consensus");
|
|
172
|
+
lines.push(`- ${formatAgreementLevel(agreementLevel)}${agreementSummary ? ` - ${agreementSummary}` : ""}`);
|
|
173
|
+
lines.push("");
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const differences = Array.isArray(synthesis.differences) ? (synthesis.differences as string[]) : [];
|
|
177
|
+
if (differences.length > 0) {
|
|
178
|
+
lines.push("## Where Engines Differ");
|
|
179
|
+
for (const difference of differences) lines.push(`- ${difference}`);
|
|
180
|
+
lines.push("");
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const caveats = Array.isArray(synthesis.caveats) ? (synthesis.caveats as string[]) : [];
|
|
184
|
+
if (caveats.length > 0) {
|
|
185
|
+
lines.push("## Caveats");
|
|
186
|
+
for (const caveat of caveats) lines.push(`- ${caveat}`);
|
|
187
|
+
lines.push("");
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const claims = Array.isArray(synthesis.claims)
|
|
191
|
+
? (synthesis.claims as Array<Record<string, unknown>>)
|
|
192
|
+
: [];
|
|
193
|
+
if (claims.length > 0) {
|
|
194
|
+
lines.push("## Key Claims");
|
|
195
|
+
for (const claim of claims) {
|
|
196
|
+
const sourceIds = Array.isArray(claim.sourceIds) ? (claim.sourceIds as string[]) : [];
|
|
197
|
+
const support = String(claim.support || "moderate");
|
|
198
|
+
lines.push(`- ${String(claim.claim || "")} [${support}${sourceIds.length ? `; ${sourceIds.join(", ")}` : ""}]`);
|
|
199
|
+
}
|
|
200
|
+
lines.push("");
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const recommendedIds = Array.isArray(synthesis.recommendedSources)
|
|
204
|
+
? (synthesis.recommendedSources as string[])
|
|
205
|
+
: [];
|
|
206
|
+
const topSources = pickSources(sources, recommendedIds, maxSources);
|
|
207
|
+
if (topSources.length > 0) {
|
|
208
|
+
lines.push("## Top Sources");
|
|
209
|
+
for (const source of topSources) lines.push(formatSourceLine(source));
|
|
210
|
+
lines.push("");
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
71
214
|
function formatResults(engine: string, data: Record<string, unknown>): string {
|
|
72
215
|
const lines: string[] = [];
|
|
73
216
|
|
|
74
217
|
if (engine === "all") {
|
|
75
|
-
// Synthesized output: prefer _synthesis + _sources
|
|
76
218
|
const synthesis = data._synthesis as Record<string, unknown> | undefined;
|
|
77
219
|
const dedupedSources = data._sources as Array<Record<string, unknown>> | undefined;
|
|
78
220
|
if (synthesis?.answer) {
|
|
79
|
-
lines
|
|
80
|
-
lines.push(
|
|
81
|
-
if (dedupedSources?.length) {
|
|
82
|
-
lines.push("\n**Top sources by consensus:**");
|
|
83
|
-
for (const s of dedupedSources.slice(0, 6)) {
|
|
84
|
-
const engines = (s.engines as string[]) || [];
|
|
85
|
-
lines.push(`- [${s.title || s.url}](${s.url}) [${engines.length}/3]`);
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
lines.push("\n---\n*Synthesized from Perplexity, Bing Copilot, and Google AI*");
|
|
221
|
+
renderSynthesis(lines, synthesis, dedupedSources || [], 6);
|
|
222
|
+
lines.push("*Synthesized from Perplexity, Bing Copilot, and Google AI*\n");
|
|
89
223
|
return lines.join("\n").trim();
|
|
90
224
|
}
|
|
91
225
|
|
|
92
|
-
// Standard output: per-engine answers
|
|
93
226
|
for (const [eng, result] of Object.entries(data)) {
|
|
94
227
|
if (eng.startsWith("_")) continue;
|
|
95
|
-
lines.push(`\n## ${
|
|
228
|
+
lines.push(`\n## ${formatEngineName(eng)}`);
|
|
96
229
|
const r = result as Record<string, unknown>;
|
|
97
230
|
if (r.error) {
|
|
98
231
|
lines.push(`Error: ${r.error}`);
|
|
@@ -128,33 +261,42 @@ function formatResults(engine: string, data: Record<string, unknown>): string {
|
|
|
128
261
|
function formatDeepResearch(data: Record<string, unknown>): string {
|
|
129
262
|
const lines: string[] = [];
|
|
130
263
|
const confidence = data._confidence as Record<string, unknown> | undefined;
|
|
131
|
-
const fetchedSources = data._fetchedSources as Array<Record<string, unknown>> | undefined;
|
|
132
264
|
const dedupedSources = data._sources as Array<Record<string, unknown>> | undefined;
|
|
265
|
+
const synthesis = data._synthesis as Record<string, unknown> | undefined;
|
|
133
266
|
|
|
134
267
|
lines.push("# Deep Research Report\n");
|
|
135
268
|
|
|
136
|
-
// Confidence summary
|
|
137
269
|
if (confidence) {
|
|
138
270
|
const enginesResponded = (confidence.enginesResponded as string[]) || [];
|
|
139
271
|
const enginesFailed = (confidence.enginesFailed as string[]) || [];
|
|
140
|
-
const
|
|
272
|
+
const agreementLevel = String(confidence.agreementLevel || "mixed");
|
|
273
|
+
const firstPartySourceCount = Number(confidence.firstPartySourceCount || 0);
|
|
274
|
+
const sourceTypeBreakdown = confidence.sourceTypeBreakdown as Record<string, number> | undefined;
|
|
141
275
|
|
|
142
276
|
lines.push("## Confidence\n");
|
|
143
|
-
lines.push(`-
|
|
277
|
+
lines.push(`- Agreement: ${formatAgreementLevel(agreementLevel)}`);
|
|
278
|
+
lines.push(`- Engines responded: ${enginesResponded.map(formatEngineName).join(", ") || "none"}`);
|
|
144
279
|
if (enginesFailed.length > 0) {
|
|
145
|
-
lines.push(`-
|
|
280
|
+
lines.push(`- Engines failed: ${enginesFailed.map(formatEngineName).join(", ")}`);
|
|
281
|
+
}
|
|
282
|
+
lines.push(`- Top source consensus: ${confidence.topSourceConsensus || 0}/3 engines`);
|
|
283
|
+
lines.push(`- Total unique sources: ${confidence.sourcesCount || 0}`);
|
|
284
|
+
lines.push(`- Official sources: ${confidence.officialSourceCount || 0}`);
|
|
285
|
+
lines.push(`- First-party sources: ${firstPartySourceCount}`);
|
|
286
|
+
lines.push(`- Fetch success rate: ${confidence.fetchedSourceSuccessRate || 0}`);
|
|
287
|
+
if (sourceTypeBreakdown && Object.keys(sourceTypeBreakdown).length > 0) {
|
|
288
|
+
lines.push(`- Source mix: ${Object.entries(sourceTypeBreakdown).map(([type, count]) => `${humanizeSourceType(type)} ${count}`).join(", ")}`);
|
|
146
289
|
}
|
|
147
|
-
lines.push(`- **Top source consensus:** ${consensusScore}/3 engines`);
|
|
148
|
-
lines.push(`- **Total unique sources:** ${confidence.sourcesCount || 0}`);
|
|
149
290
|
lines.push("");
|
|
150
291
|
}
|
|
151
292
|
|
|
152
|
-
|
|
153
|
-
|
|
293
|
+
if (synthesis?.answer) renderSynthesis(lines, synthesis, dedupedSources || [], 8);
|
|
294
|
+
|
|
295
|
+
lines.push("## Engine Perspectives\n");
|
|
154
296
|
for (const engine of ["perplexity", "bing", "google"]) {
|
|
155
297
|
const r = data[engine] as Record<string, unknown> | undefined;
|
|
156
298
|
if (!r) continue;
|
|
157
|
-
lines.push(`### ${
|
|
299
|
+
lines.push(`### ${formatEngineName(engine)}`);
|
|
158
300
|
if (r.error) {
|
|
159
301
|
lines.push(`⚠️ Error: ${r.error}`);
|
|
160
302
|
} else if (r.answer) {
|
|
@@ -163,41 +305,15 @@ function formatDeepResearch(data: Record<string, unknown>): string {
|
|
|
163
305
|
lines.push("");
|
|
164
306
|
}
|
|
165
307
|
|
|
166
|
-
// Synthesis
|
|
167
|
-
const synthesis = data._synthesis as Record<string, unknown> | undefined;
|
|
168
|
-
if (synthesis?.answer) {
|
|
169
|
-
lines.push("## Synthesized Answer\n");
|
|
170
|
-
lines.push(String(synthesis.answer));
|
|
171
|
-
lines.push("");
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
// Deduplicated sources by consensus
|
|
175
308
|
if (dedupedSources && dedupedSources.length > 0) {
|
|
176
|
-
lines.push("##
|
|
177
|
-
for (const
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
lines.push(`- **[${consensus}/3]** [${s.title || "Untitled"}](${s.url})`);
|
|
309
|
+
lines.push("## Source Registry\n");
|
|
310
|
+
for (const source of dedupedSources) {
|
|
311
|
+
lines.push(formatSourceLine(source));
|
|
312
|
+
renderSourceEvidence(lines, source);
|
|
181
313
|
}
|
|
182
314
|
lines.push("");
|
|
183
315
|
}
|
|
184
316
|
|
|
185
|
-
// Fetched source content
|
|
186
|
-
if (fetchedSources && fetchedSources.length > 0) {
|
|
187
|
-
lines.push("## Source Content (Top Matches)\n");
|
|
188
|
-
for (const fs of fetchedSources) {
|
|
189
|
-
lines.push(`### ${fs.title || fs.url}`);
|
|
190
|
-
lines.push(`*Source: ${fs.url}*`);
|
|
191
|
-
lines.push("");
|
|
192
|
-
if (fs.content) {
|
|
193
|
-
lines.push(String(fs.content).slice(0, 3000));
|
|
194
|
-
} else if (fs.error) {
|
|
195
|
-
lines.push(`⚠️ Could not fetch: ${fs.error}`);
|
|
196
|
-
}
|
|
197
|
-
lines.push("\n---\n");
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
|
|
201
317
|
return lines.join("\n").trim();
|
|
202
318
|
}
|
|
203
319
|
|
package/launch.mjs
CHANGED
|
@@ -5,9 +5,8 @@
|
|
|
5
5
|
// the "Allow remote debugging?" dialog entirely. It runs on port 9222 so it doesn't
|
|
6
6
|
// conflict with your main Chrome session (which may use port 9223).
|
|
7
7
|
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
// The original file is restored on --kill.
|
|
8
|
+
// search.mjs passes CDP_PROFILE_DIR so cdp.mjs targets this dedicated Chrome
|
|
9
|
+
// without ever touching the user's main Chrome DevToolsActivePort file.
|
|
11
10
|
//
|
|
12
11
|
// Usage:
|
|
13
12
|
// node launch.mjs — launch (or report if already running)
|
|
@@ -15,8 +14,8 @@
|
|
|
15
14
|
// node launch.mjs --status — check if running
|
|
16
15
|
|
|
17
16
|
import { spawn } from 'child_process';
|
|
18
|
-
import { existsSync, writeFileSync, readFileSync,
|
|
19
|
-
import { tmpdir,
|
|
17
|
+
import { existsSync, writeFileSync, readFileSync, mkdirSync, unlinkSync } from 'fs';
|
|
18
|
+
import { tmpdir, platform } from 'os';
|
|
20
19
|
import { join } from 'path';
|
|
21
20
|
import http from 'http';
|
|
22
21
|
|
|
@@ -43,18 +42,8 @@ function findChrome() {
|
|
|
43
42
|
return candidates.find(existsSync) || null;
|
|
44
43
|
}
|
|
45
44
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
if (os === 'win32') return join(homedir(), 'AppData', 'Local', 'Google', 'Chrome', 'User Data', 'DevToolsActivePort');
|
|
49
|
-
if (os === 'darwin') return join(homedir(), 'Library', 'Application Support', 'Google', 'Chrome', 'DevToolsActivePort');
|
|
50
|
-
return join(homedir(), '.config', 'google-chrome', 'DevToolsActivePort');
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
const SYSTEM_PORT = systemPortPath();
|
|
54
|
-
const SYSTEM_BACKUP = SYSTEM_PORT + '.bak';
|
|
55
|
-
|
|
56
|
-
const CHROME_FLAGS = [
|
|
57
|
-
`--remote-debugging-port=${PORT}`,
|
|
45
|
+
const CHROME_FLAGS = [
|
|
46
|
+
`--remote-debugging-port=${PORT}`,
|
|
58
47
|
'--disable-features=DevToolsPrivacyUI', // suppresses "Allow remote debugging?" dialog
|
|
59
48
|
'--no-first-run',
|
|
60
49
|
'--no-default-browser-check',
|
|
@@ -108,52 +97,21 @@ async function writePortFile(timeoutMs = 15000) {
|
|
|
108
97
|
return false;
|
|
109
98
|
}
|
|
110
99
|
|
|
111
|
-
|
|
112
|
-
// Back up system DevToolsActivePort (user's main Chrome)
|
|
113
|
-
if (existsSync(SYSTEM_PORT) && !existsSync(SYSTEM_BACKUP)) {
|
|
114
|
-
copyFileSync(SYSTEM_PORT, SYSTEM_BACKUP);
|
|
115
|
-
}
|
|
116
|
-
// Point cdp.mjs to our dedicated Chrome's port
|
|
117
|
-
// On Windows, main Chrome may hold a lock on SYSTEM_PORT (EBUSY).
|
|
118
|
-
// Fall back to writeFileSync which uses CreateFile/WriteFile instead of CopyFile.
|
|
119
|
-
try {
|
|
120
|
-
copyFileSync(ACTIVE_PORT, SYSTEM_PORT);
|
|
121
|
-
} catch (e) {
|
|
122
|
-
if (e.code !== 'EBUSY') throw e;
|
|
123
|
-
try {
|
|
124
|
-
writeFileSync(SYSTEM_PORT, readFileSync(ACTIVE_PORT, 'utf8'), 'utf8');
|
|
125
|
-
} catch {
|
|
126
|
-
console.warn('Warning: could not redirect DevToolsActivePort (file busy) — cdp.mjs will use existing port.');
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
function restoreCdpToMainChrome() {
|
|
132
|
-
if (existsSync(SYSTEM_BACKUP)) {
|
|
133
|
-
copyFileSync(SYSTEM_BACKUP, SYSTEM_PORT);
|
|
134
|
-
console.log('Restored DevToolsActivePort to main Chrome.');
|
|
135
|
-
} else if (existsSync(SYSTEM_PORT)) {
|
|
136
|
-
// No backup means main Chrome wasn't using CDP — remove our file
|
|
137
|
-
try { unlinkSync(SYSTEM_PORT); } catch {}
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
// ---------------------------------------------------------------------------
|
|
100
|
+
// ---------------------------------------------------------------------------
|
|
142
101
|
|
|
143
102
|
async function main() {
|
|
144
103
|
const arg = process.argv[2];
|
|
145
104
|
|
|
146
|
-
if (arg === '--kill') {
|
|
147
|
-
const pid = isRunning();
|
|
148
|
-
if (pid) {
|
|
149
|
-
try { process.kill(pid, 'SIGTERM'); console.log(`Stopped Chrome (pid ${pid}).`); }
|
|
150
|
-
catch (e) { console.error(`Failed: ${e.message}`); }
|
|
151
|
-
} else {
|
|
152
|
-
console.log('GreedySearch Chrome is not running.');
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
}
|
|
105
|
+
if (arg === '--kill') {
|
|
106
|
+
const pid = isRunning();
|
|
107
|
+
if (pid) {
|
|
108
|
+
try { process.kill(pid, 'SIGTERM'); console.log(`Stopped Chrome (pid ${pid}).`); }
|
|
109
|
+
catch (e) { console.error(`Failed: ${e.message}`); }
|
|
110
|
+
} else {
|
|
111
|
+
console.log('GreedySearch Chrome is not running.');
|
|
112
|
+
}
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
157
115
|
|
|
158
116
|
if (arg === '--status') {
|
|
159
117
|
const pid = isRunning();
|
|
@@ -165,13 +123,12 @@ async function main() {
|
|
|
165
123
|
// Already running?
|
|
166
124
|
const existing = isRunning();
|
|
167
125
|
if (existing) {
|
|
168
|
-
const ready = await writePortFile(5000);
|
|
169
|
-
if (ready) {
|
|
170
|
-
console.log(`GreedySearch Chrome already running (pid ${existing}, port ${PORT}).`);
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
}
|
|
126
|
+
const ready = await writePortFile(5000);
|
|
127
|
+
if (ready) {
|
|
128
|
+
console.log(`GreedySearch Chrome already running (pid ${existing}, port ${PORT}).`);
|
|
129
|
+
console.log('Dedicated GreedySearch DevToolsActivePort is ready.');
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
175
132
|
// Stale PID — process alive but not Chrome on port 9223. Fall through to fresh launch.
|
|
176
133
|
console.log(`Stale PID ${existing} detected (not Chrome on port ${PORT}) — launching fresh.`);
|
|
177
134
|
try { unlinkSync(PID_FILE); } catch {}
|
|
@@ -195,16 +152,15 @@ async function main() {
|
|
|
195
152
|
proc.unref();
|
|
196
153
|
writeFileSync(PID_FILE, String(proc.pid));
|
|
197
154
|
|
|
198
|
-
// Wait for Chrome HTTP endpoint
|
|
199
|
-
const portFileReady = await writePortFile();
|
|
200
|
-
if (!portFileReady) {
|
|
201
|
-
console.error('Chrome did not become ready within 15s.');
|
|
202
|
-
process.exit(1);
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
console.log(
|
|
207
|
-
|
|
208
|
-
}
|
|
155
|
+
// Wait for Chrome HTTP endpoint and build the dedicated DevToolsActivePort file
|
|
156
|
+
const portFileReady = await writePortFile();
|
|
157
|
+
if (!portFileReady) {
|
|
158
|
+
console.error('Chrome did not become ready within 15s.');
|
|
159
|
+
process.exit(1);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
console.log(`Ready. No more "Allow remote debugging?" dialogs.`);
|
|
163
|
+
console.log('GreedySearch now uses its own isolated DevToolsActivePort file.');
|
|
164
|
+
}
|
|
209
165
|
|
|
210
166
|
main();
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# New Feature Ideas
|
|
2
|
+
|
|
3
|
+
Ideas for future features — thinking from the perspective of an AI assistant using these tools.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 1. Source Verification
|
|
8
|
+
|
|
9
|
+
**Problem:** I get sources but can't verify if they're live, updated, or actually support the claimed content.
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
verify_sources({ urls: ["https://...", "https://..."] })
|
|
13
|
+
→ [{ url, status: 200, title, snippet, lastModified, claim: "supports X" }]
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
**Use cases:**
|
|
17
|
+
- Before citing a source, verify it's not 404
|
|
18
|
+
- Check if a page actually contains the claimed information
|
|
19
|
+
- Get last-modified dates to assess freshness
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## 2. Incremental / Continuation Research
|
|
24
|
+
|
|
25
|
+
**Problem:** After deep_research on "RAG vs fine-tuning", going deeper on just RAG means re-running everything with a new query and losing original context.
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
deep_research({ query: "RAG vs fine-tuning", ... }) // initial
|
|
29
|
+
continue_research({ previousId: "...", query: "production RAG architectures" }) // goes deeper on RAG
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
**Use cases:**
|
|
33
|
+
- Drill into a specific aspect after initial broad research
|
|
34
|
+
- Build on previous results without re-fetching everything
|
|
35
|
+
- Progressive disclosure of complex topics
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## 3. Multi-Query Synthesis
|
|
40
|
+
|
|
41
|
+
**Problem:** One query isn't enough for complex research. I chain multiple greedy_search calls manually.
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
multi_research({
|
|
45
|
+
queries: ["auth best practices", "NextAuth vs Clerk vs Lucia", "Next.js auth security"],
|
|
46
|
+
synthesize: true
|
|
47
|
+
})
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**Use cases:**
|
|
51
|
+
- "Best auth for Next.js" needs multiple angles
|
|
52
|
+
- Research with different facets (comparison, security, performance)
|
|
53
|
+
- Casting a wider net when single query returns narrow results
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## 4. Structured Extraction
|
|
58
|
+
|
|
59
|
+
**Problem:** When researching "which libraries are maintained", I want tables (name, stars, last commit, license), not prose.
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
extract_structured({
|
|
63
|
+
query: "Python HTTP client libraries 2026",
|
|
64
|
+
schema: { name: "string", stars: "number", lastUpdated: "date", async: "boolean" }
|
|
65
|
+
})
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Use cases:**
|
|
69
|
+
- Library comparisons as structured data
|
|
70
|
+
- Dependency audits
|
|
71
|
+
- Feature matrices for tools/frameworks
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## 5. Confidence Scoring on Specific Claims
|
|
76
|
+
|
|
77
|
+
**Problem:** I say "high confidence" but it's hand-wavy. What if I could ask: "how confident are we that library X is actively maintained?"
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
verify_claim({
|
|
81
|
+
claim: "Prisma is actively maintained",
|
|
82
|
+
evidence: ["last commit: 2 weeks ago", "open issues: 45", "npm downloads: 2M/week"]
|
|
83
|
+
})
|
|
84
|
+
→ { confidence: 0.95, reasoning: "..." }
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## 6. Research Cache / History
|
|
90
|
+
|
|
91
|
+
**Problem:** I do expensive deep_research, then the user asks a follow-up. I have to re-run everything.
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
get_research(id: "...") // retrieve previous results
|
|
95
|
+
list_research({ query: "RAG" }) // find related previous research
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Priority
|
|
101
|
+
|
|
102
|
+
1. **Source verification** — high value, relatively simple, fixes trust gap
|
|
103
|
+
2. **Multi-query synthesis** — high value, complex but powerful
|
|
104
|
+
3. **Incremental research** — medium value, nice UX improvement
|
|
105
|
+
4. **Structured extraction** — medium value, specialized use cases
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@apmantza/greedysearch-pi",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.4.0",
|
|
4
4
|
"description": "Pi extension: browser-automation tool that searches Perplexity, Bing Copilot, and Google AI in parallel, extracts answers and sources via CDP, with optional Gemini synthesis — grounded AI answers from real browser interactions.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"keywords": [
|
package/search.mjs
CHANGED
|
@@ -54,12 +54,400 @@ const ENGINE_DOMAINS = {
|
|
|
54
54
|
gemini: 'gemini.google.com',
|
|
55
55
|
};
|
|
56
56
|
|
|
57
|
-
|
|
57
|
+
const TRACKING_PARAMS = [
|
|
58
|
+
'fbclid',
|
|
59
|
+
'gclid',
|
|
60
|
+
'ref',
|
|
61
|
+
'ref_src',
|
|
62
|
+
'ref_url',
|
|
63
|
+
'source',
|
|
64
|
+
'utm_campaign',
|
|
65
|
+
'utm_content',
|
|
66
|
+
'utm_medium',
|
|
67
|
+
'utm_source',
|
|
68
|
+
'utm_term',
|
|
69
|
+
];
|
|
70
|
+
|
|
71
|
+
const COMMUNITY_HOSTS = [
|
|
72
|
+
'dev.to',
|
|
73
|
+
'hashnode.com',
|
|
74
|
+
'medium.com',
|
|
75
|
+
'reddit.com',
|
|
76
|
+
'stackoverflow.com',
|
|
77
|
+
'stackexchange.com',
|
|
78
|
+
'substack.com',
|
|
79
|
+
];
|
|
80
|
+
|
|
81
|
+
const NEWS_HOSTS = [
|
|
82
|
+
'arstechnica.com',
|
|
83
|
+
'techcrunch.com',
|
|
84
|
+
'theverge.com',
|
|
85
|
+
'venturebeat.com',
|
|
86
|
+
'wired.com',
|
|
87
|
+
'zdnet.com',
|
|
88
|
+
];
|
|
89
|
+
|
|
90
|
+
function trimText(text = '', maxChars = 240) {
|
|
91
|
+
const clean = String(text).replace(/\s+/g, ' ').trim();
|
|
92
|
+
if (clean.length <= maxChars) return clean;
|
|
93
|
+
return clean.slice(0, maxChars).replace(/\s+\S*$/, '') + '...';
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function normalizeSourceTitle(title = '') {
|
|
97
|
+
const clean = trimText(title, 180);
|
|
98
|
+
if (!clean) return '';
|
|
99
|
+
if (/^https?:\/\//i.test(clean)) return '';
|
|
100
|
+
|
|
101
|
+
const wordCount = clean.split(/\s+/).filter(Boolean).length;
|
|
102
|
+
const hasUppercase = /[A-Z]/.test(clean);
|
|
103
|
+
const hasDigit = /\d/.test(clean);
|
|
104
|
+
const looksLikeFragment = clean === clean.toLowerCase() && wordCount <= 4 && !hasUppercase && !hasDigit;
|
|
105
|
+
return looksLikeFragment ? '' : clean;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function pickPreferredTitle(currentTitle = '', nextTitle = '') {
|
|
109
|
+
const current = normalizeSourceTitle(currentTitle);
|
|
110
|
+
const next = normalizeSourceTitle(nextTitle);
|
|
111
|
+
if (!next) return current;
|
|
112
|
+
if (!current) return next;
|
|
113
|
+
const currentLooksLikeUrl = /^https?:\/\//i.test(current);
|
|
114
|
+
const nextLooksLikeUrl = /^https?:\/\//i.test(next);
|
|
115
|
+
if (currentLooksLikeUrl && !nextLooksLikeUrl) return next;
|
|
116
|
+
if (!currentLooksLikeUrl && nextLooksLikeUrl) return current;
|
|
117
|
+
return next.length > current.length ? next : current;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function normalizeUrl(rawUrl) {
|
|
121
|
+
if (!rawUrl) return null;
|
|
122
|
+
try {
|
|
123
|
+
const url = new URL(rawUrl);
|
|
124
|
+
if (!['http:', 'https:'].includes(url.protocol)) return null;
|
|
125
|
+
url.hash = '';
|
|
126
|
+
url.hostname = url.hostname.toLowerCase();
|
|
127
|
+
if ((url.protocol === 'https:' && url.port === '443') || (url.protocol === 'http:' && url.port === '80')) {
|
|
128
|
+
url.port = '';
|
|
129
|
+
}
|
|
130
|
+
for (const key of [...url.searchParams.keys()]) {
|
|
131
|
+
const lower = key.toLowerCase();
|
|
132
|
+
if (TRACKING_PARAMS.includes(lower) || lower.startsWith('utm_')) {
|
|
133
|
+
url.searchParams.delete(key);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
url.searchParams.sort();
|
|
137
|
+
const normalizedPath = url.pathname.replace(/\/+$/, '') || '/';
|
|
138
|
+
url.pathname = normalizedPath;
|
|
139
|
+
const normalized = url.toString();
|
|
140
|
+
return normalizedPath === '/' ? normalized.replace(/\/$/, '') : normalized;
|
|
141
|
+
} catch {
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function getDomain(rawUrl) {
|
|
147
|
+
try {
|
|
148
|
+
const domain = new URL(rawUrl).hostname.toLowerCase();
|
|
149
|
+
return domain.replace(/^www\./, '');
|
|
150
|
+
} catch {
|
|
151
|
+
return '';
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function matchesDomain(domain, hosts) {
|
|
156
|
+
return hosts.some(host => domain === host || domain.endsWith(`.${host}`));
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function classifySourceType(domain, title = '', rawUrl = '') {
|
|
160
|
+
const lowerTitle = title.toLowerCase();
|
|
161
|
+
const lowerUrl = rawUrl.toLowerCase();
|
|
162
|
+
|
|
163
|
+
if (domain === 'github.com' || domain === 'gitlab.com') return 'repo';
|
|
164
|
+
if (matchesDomain(domain, COMMUNITY_HOSTS)) return 'community';
|
|
165
|
+
if (matchesDomain(domain, NEWS_HOSTS)) return 'news';
|
|
166
|
+
if (
|
|
167
|
+
domain.startsWith('docs.') ||
|
|
168
|
+
domain.startsWith('developer.') ||
|
|
169
|
+
domain.startsWith('developers.') ||
|
|
170
|
+
domain.startsWith('api.') ||
|
|
171
|
+
lowerTitle.includes('documentation') ||
|
|
172
|
+
lowerTitle.includes('docs') ||
|
|
173
|
+
lowerTitle.includes('reference') ||
|
|
174
|
+
lowerUrl.includes('/docs/') ||
|
|
175
|
+
lowerUrl.includes('/reference/') ||
|
|
176
|
+
lowerUrl.includes('/api/')
|
|
177
|
+
) {
|
|
178
|
+
return 'official-docs';
|
|
179
|
+
}
|
|
180
|
+
if (domain.startsWith('blog.') || lowerUrl.includes('/blog/')) return 'maintainer-blog';
|
|
181
|
+
return 'website';
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function sourceTypePriority(sourceType) {
|
|
185
|
+
switch (sourceType) {
|
|
186
|
+
case 'official-docs': return 5;
|
|
187
|
+
case 'repo': return 4;
|
|
188
|
+
case 'maintainer-blog': return 3;
|
|
189
|
+
case 'website': return 2;
|
|
190
|
+
case 'community': return 1;
|
|
191
|
+
case 'news': return 0;
|
|
192
|
+
default: return 0;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function bestRank(source) {
|
|
197
|
+
const ranks = Object.values(source.perEngine || {}).map(v => v?.rank || 99);
|
|
198
|
+
return ranks.length ? Math.min(...ranks) : 99;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
function buildSourceRegistry(out) {
|
|
202
|
+
const seen = new Map();
|
|
203
|
+
const engineOrder = ['perplexity', 'bing', 'google'];
|
|
204
|
+
|
|
205
|
+
for (const engine of engineOrder) {
|
|
206
|
+
const result = out[engine];
|
|
207
|
+
if (!result?.sources) continue;
|
|
208
|
+
|
|
209
|
+
for (let i = 0; i < result.sources.length; i++) {
|
|
210
|
+
const source = result.sources[i];
|
|
211
|
+
const canonicalUrl = normalizeUrl(source.url);
|
|
212
|
+
if (!canonicalUrl || canonicalUrl.length < 10) continue;
|
|
213
|
+
|
|
214
|
+
const title = normalizeSourceTitle(source.title || '');
|
|
215
|
+
const domain = getDomain(canonicalUrl);
|
|
216
|
+
const sourceType = classifySourceType(domain, title, canonicalUrl);
|
|
217
|
+
const existing = seen.get(canonicalUrl) || {
|
|
218
|
+
id: '',
|
|
219
|
+
canonicalUrl,
|
|
220
|
+
displayUrl: source.url || canonicalUrl,
|
|
221
|
+
domain,
|
|
222
|
+
title: '',
|
|
223
|
+
engines: [],
|
|
224
|
+
engineCount: 0,
|
|
225
|
+
perEngine: {},
|
|
226
|
+
sourceType,
|
|
227
|
+
isOfficial: sourceType === 'official-docs',
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
existing.title = pickPreferredTitle(existing.title, title);
|
|
231
|
+
existing.displayUrl = existing.displayUrl || source.url || canonicalUrl;
|
|
232
|
+
existing.sourceType = existing.sourceType || sourceType;
|
|
233
|
+
existing.isOfficial = existing.isOfficial || sourceType === 'official-docs';
|
|
234
|
+
|
|
235
|
+
if (!existing.engines.includes(engine)) {
|
|
236
|
+
existing.engines.push(engine);
|
|
237
|
+
}
|
|
238
|
+
existing.perEngine[engine] = {
|
|
239
|
+
rank: i + 1,
|
|
240
|
+
title: pickPreferredTitle(existing.perEngine[engine]?.title || '', title),
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
seen.set(canonicalUrl, existing);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const sources = Array.from(seen.values())
|
|
248
|
+
.map(source => ({
|
|
249
|
+
...source,
|
|
250
|
+
engineCount: source.engines.length,
|
|
251
|
+
}))
|
|
252
|
+
.sort((a, b) => {
|
|
253
|
+
if (b.engineCount !== a.engineCount) return b.engineCount - a.engineCount;
|
|
254
|
+
if (sourceTypePriority(b.sourceType) !== sourceTypePriority(a.sourceType)) {
|
|
255
|
+
return sourceTypePriority(b.sourceType) - sourceTypePriority(a.sourceType);
|
|
256
|
+
}
|
|
257
|
+
if (bestRank(a) !== bestRank(b)) return bestRank(a) - bestRank(b);
|
|
258
|
+
return a.domain.localeCompare(b.domain);
|
|
259
|
+
})
|
|
260
|
+
.slice(0, 12)
|
|
261
|
+
.map((source, index) => ({
|
|
262
|
+
...source,
|
|
263
|
+
id: `S${index + 1}`,
|
|
264
|
+
title: source.title || source.domain || source.canonicalUrl,
|
|
265
|
+
}));
|
|
266
|
+
|
|
267
|
+
return sources;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
function mergeFetchDataIntoSources(sources, fetchedSources) {
|
|
271
|
+
const byId = new Map(fetchedSources.map(source => [source.id, source]));
|
|
272
|
+
return sources.map(source => {
|
|
273
|
+
const fetched = byId.get(source.id);
|
|
274
|
+
if (!fetched) return source;
|
|
275
|
+
|
|
276
|
+
const title = pickPreferredTitle(source.title, fetched.title || '');
|
|
277
|
+
return {
|
|
278
|
+
...source,
|
|
279
|
+
title: title || source.title,
|
|
280
|
+
fetch: {
|
|
281
|
+
attempted: true,
|
|
282
|
+
ok: !fetched.error,
|
|
283
|
+
status: fetched.status || null,
|
|
284
|
+
finalUrl: fetched.finalUrl || fetched.url || source.canonicalUrl,
|
|
285
|
+
contentType: fetched.contentType || '',
|
|
286
|
+
lastModified: fetched.lastModified || '',
|
|
287
|
+
title: fetched.title || '',
|
|
288
|
+
snippet: fetched.snippet || '',
|
|
289
|
+
contentChars: fetched.contentChars || 0,
|
|
290
|
+
error: fetched.error || '',
|
|
291
|
+
},
|
|
292
|
+
};
|
|
293
|
+
});
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
function parseStructuredJson(text) {
|
|
297
|
+
if (!text) return null;
|
|
298
|
+
const trimmed = String(text).trim();
|
|
299
|
+
const candidates = [
|
|
300
|
+
trimmed,
|
|
301
|
+
trimmed.replace(/^```json\s*/i, '').replace(/^```\s*/i, '').replace(/```$/i, '').trim(),
|
|
302
|
+
];
|
|
303
|
+
|
|
304
|
+
const objectMatch = trimmed.match(/\{[\s\S]*\}/);
|
|
305
|
+
if (objectMatch) candidates.push(objectMatch[0]);
|
|
306
|
+
|
|
307
|
+
for (const candidate of candidates) {
|
|
308
|
+
try {
|
|
309
|
+
return JSON.parse(candidate);
|
|
310
|
+
} catch {
|
|
311
|
+
// try next candidate
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
return null;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function normalizeSynthesisPayload(payload, sources, fallbackAnswer = '') {
|
|
318
|
+
const sourceIds = new Set(sources.map(source => source.id));
|
|
319
|
+
const agreementLevel = ['high', 'medium', 'low', 'mixed', 'conflicting'].includes(payload?.agreement?.level)
|
|
320
|
+
? payload.agreement.level
|
|
321
|
+
: 'mixed';
|
|
322
|
+
const claims = Array.isArray(payload?.claims)
|
|
323
|
+
? payload.claims.map(claim => ({
|
|
324
|
+
claim: trimText(claim?.claim || '', 260),
|
|
325
|
+
support: ['strong', 'moderate', 'weak', 'conflicting'].includes(claim?.support) ? claim.support : 'moderate',
|
|
326
|
+
sourceIds: Array.isArray(claim?.sourceIds) ? claim.sourceIds.filter(id => sourceIds.has(id)) : [],
|
|
327
|
+
})).filter(claim => claim.claim)
|
|
328
|
+
: [];
|
|
329
|
+
const recommendedSources = Array.isArray(payload?.recommendedSources)
|
|
330
|
+
? payload.recommendedSources.filter(id => sourceIds.has(id)).slice(0, 6)
|
|
331
|
+
: [];
|
|
332
|
+
|
|
333
|
+
return {
|
|
334
|
+
answer: trimText(payload?.answer || fallbackAnswer, 4000),
|
|
335
|
+
agreement: {
|
|
336
|
+
level: agreementLevel,
|
|
337
|
+
summary: trimText(payload?.agreement?.summary || '', 280),
|
|
338
|
+
},
|
|
339
|
+
differences: Array.isArray(payload?.differences)
|
|
340
|
+
? payload.differences.map(item => trimText(item, 220)).filter(Boolean).slice(0, 5)
|
|
341
|
+
: [],
|
|
342
|
+
caveats: Array.isArray(payload?.caveats)
|
|
343
|
+
? payload.caveats.map(item => trimText(item, 220)).filter(Boolean).slice(0, 5)
|
|
344
|
+
: [],
|
|
345
|
+
claims,
|
|
346
|
+
recommendedSources,
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
function buildSynthesisPrompt(query, results, sources, { grounded = false } = {}) {
|
|
351
|
+
const engineSummaries = {};
|
|
352
|
+
for (const engine of ['perplexity', 'bing', 'google']) {
|
|
353
|
+
const result = results[engine];
|
|
354
|
+
if (!result) continue;
|
|
355
|
+
if (result.error) {
|
|
356
|
+
engineSummaries[engine] = { status: 'error', error: String(result.error) };
|
|
357
|
+
continue;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
engineSummaries[engine] = {
|
|
361
|
+
status: 'ok',
|
|
362
|
+
answer: trimText(result.answer || '', grounded ? 4500 : 2200),
|
|
363
|
+
sourceIds: sources
|
|
364
|
+
.filter(source => source.engines.includes(engine))
|
|
365
|
+
.sort((a, b) => (a.perEngine[engine]?.rank || 99) - (b.perEngine[engine]?.rank || 99))
|
|
366
|
+
.map(source => source.id)
|
|
367
|
+
.slice(0, 6),
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
const sourceRegistry = sources.slice(0, grounded ? 10 : 8).map(source => ({
|
|
372
|
+
id: source.id,
|
|
373
|
+
title: source.title,
|
|
374
|
+
domain: source.domain,
|
|
375
|
+
canonicalUrl: source.canonicalUrl,
|
|
376
|
+
sourceType: source.sourceType,
|
|
377
|
+
isOfficial: source.isOfficial,
|
|
378
|
+
engines: source.engines,
|
|
379
|
+
engineCount: source.engineCount,
|
|
380
|
+
perEngine: source.perEngine,
|
|
381
|
+
fetch: grounded && source.fetch?.attempted ? {
|
|
382
|
+
ok: source.fetch.ok,
|
|
383
|
+
status: source.fetch.status,
|
|
384
|
+
lastModified: source.fetch.lastModified,
|
|
385
|
+
snippet: trimText(source.fetch.snippet || '', 700),
|
|
386
|
+
} : undefined,
|
|
387
|
+
}));
|
|
388
|
+
|
|
389
|
+
return [
|
|
390
|
+
'You are synthesizing results from Perplexity, Bing Copilot, and Google AI.',
|
|
391
|
+
grounded
|
|
392
|
+
? 'Use the fetched source snippets as the strongest evidence. Use engine answers for perspective and conflict detection.'
|
|
393
|
+
: 'Use the engine answers for perspective. Use the source registry for provenance and citations.',
|
|
394
|
+
'Prefer official docs, release notes, repositories, and maintainer-authored sources when available.',
|
|
395
|
+
'If the engines disagree, say so explicitly.',
|
|
396
|
+
'Do not invent sources. Only reference source IDs from the source registry.',
|
|
397
|
+
'Return valid JSON only. No markdown fences, no prose outside the JSON object.',
|
|
398
|
+
'',
|
|
399
|
+
'JSON schema:',
|
|
400
|
+
'{',
|
|
401
|
+
' "answer": "short direct answer",',
|
|
402
|
+
' "agreement": { "level": "high|medium|low|mixed|conflicting", "summary": "..." },',
|
|
403
|
+
' "differences": ["..."],',
|
|
404
|
+
' "caveats": ["..."],',
|
|
405
|
+
' "claims": [',
|
|
406
|
+
' { "claim": "...", "support": "strong|moderate|weak|conflicting", "sourceIds": ["S1"] }',
|
|
407
|
+
' ],',
|
|
408
|
+
' "recommendedSources": ["S1", "S2"]',
|
|
409
|
+
'}',
|
|
410
|
+
'',
|
|
411
|
+
`User query: ${query}`,
|
|
412
|
+
'',
|
|
413
|
+
`Engine results:\n${JSON.stringify(engineSummaries, null, 2)}`,
|
|
414
|
+
'',
|
|
415
|
+
`Source registry:\n${JSON.stringify(sourceRegistry, null, 2)}`,
|
|
416
|
+
].join('\n');
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
function buildConfidence(out) {
|
|
420
|
+
const sources = Array.isArray(out._sources) ? out._sources : [];
|
|
421
|
+
const topConsensus = sources.length > 0 ? sources[0]?.engineCount || 0 : 0;
|
|
422
|
+
const officialSourceCount = sources.filter(source => source.isOfficial).length;
|
|
423
|
+
const firstPartySourceCount = sources.filter(source => source.isOfficial || source.sourceType === 'maintainer-blog').length;
|
|
424
|
+
const fetchedAttempted = sources.filter(source => source.fetch?.attempted).length;
|
|
425
|
+
const fetchedSucceeded = sources.filter(source => source.fetch?.ok).length;
|
|
426
|
+
const sourceTypeBreakdown = sources.reduce((acc, source) => {
|
|
427
|
+
acc[source.sourceType] = (acc[source.sourceType] || 0) + 1;
|
|
428
|
+
return acc;
|
|
429
|
+
}, {});
|
|
430
|
+
const synthesisLevel = out._synthesis?.agreement?.level;
|
|
431
|
+
|
|
432
|
+
return {
|
|
433
|
+
sourcesCount: sources.length,
|
|
434
|
+
topSourceConsensus: topConsensus,
|
|
435
|
+
agreementLevel: synthesisLevel || (topConsensus >= 3 ? 'high' : topConsensus >= 2 ? 'medium' : 'low'),
|
|
436
|
+
enginesResponded: ALL_ENGINES.filter(engine => out[engine]?.answer && !out[engine]?.error),
|
|
437
|
+
enginesFailed: ALL_ENGINES.filter(engine => out[engine]?.error),
|
|
438
|
+
officialSourceCount,
|
|
439
|
+
firstPartySourceCount,
|
|
440
|
+
fetchedSourceSuccessRate: fetchedAttempted > 0 ? Number((fetchedSucceeded / fetchedAttempted).toFixed(2)) : 0,
|
|
441
|
+
sourceTypeBreakdown,
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
function getFullTabFromCache(engine) {
|
|
58
446
|
try {
|
|
59
447
|
if (!existsSync(PAGES_CACHE)) return null;
|
|
60
448
|
const pages = JSON.parse(readFileSync(PAGES_CACHE, 'utf8'));
|
|
61
449
|
const found = pages.find(p => p.url.includes(ENGINE_DOMAINS[engine]));
|
|
62
|
-
return found ? found.targetId
|
|
450
|
+
return found ? found.targetId : null;
|
|
63
451
|
} catch { return null; }
|
|
64
452
|
}
|
|
65
453
|
|
|
@@ -108,6 +496,31 @@ async function openNewTab() {
|
|
|
108
496
|
return targetId;
|
|
109
497
|
}
|
|
110
498
|
|
|
499
|
+
async function getOrOpenEngineTab(engine) {
|
|
500
|
+
await cdp(['list']);
|
|
501
|
+
return getFullTabFromCache(engine) || openNewTab();
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
async function activateTab(targetId) {
|
|
505
|
+
try {
|
|
506
|
+
const anchor = await getAnyTab();
|
|
507
|
+
await cdp(['evalraw', anchor, 'Target.activateTarget', JSON.stringify({ targetId })]);
|
|
508
|
+
} catch {
|
|
509
|
+
// best-effort
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
async function closeTabs(targetIds = []) {
|
|
514
|
+
for (const targetId of targetIds) {
|
|
515
|
+
if (!targetId) continue;
|
|
516
|
+
await closeTab(targetId);
|
|
517
|
+
}
|
|
518
|
+
if (targetIds.length > 0) {
|
|
519
|
+
await new Promise(r => setTimeout(r, 300));
|
|
520
|
+
await cdp(['list']).catch(() => null);
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
111
524
|
async function closeTab(targetId) {
|
|
112
525
|
try {
|
|
113
526
|
const anchor = await getAnyTab();
|
|
@@ -200,10 +613,22 @@ async function fetchSourceContent(url, maxChars = 5000) {
|
|
|
200
613
|
// Extract title
|
|
201
614
|
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
202
615
|
const title = titleMatch ? titleMatch[1].trim() : '';
|
|
616
|
+
const finalUrl = res.url || url;
|
|
617
|
+
const snippet = trimText(content, 320);
|
|
203
618
|
|
|
204
|
-
return {
|
|
619
|
+
return {
|
|
620
|
+
url,
|
|
621
|
+
finalUrl,
|
|
622
|
+
status: res.status,
|
|
623
|
+
contentType: res.headers.get('content-type') || '',
|
|
624
|
+
lastModified: res.headers.get('last-modified') || '',
|
|
625
|
+
title,
|
|
626
|
+
snippet,
|
|
627
|
+
content,
|
|
628
|
+
contentChars: content.length,
|
|
629
|
+
};
|
|
205
630
|
} catch (e) {
|
|
206
|
-
return { url, title: '', content: null, error: e.message };
|
|
631
|
+
return { url, title: '', content: null, snippet: '', contentChars: 0, error: e.message };
|
|
207
632
|
}
|
|
208
633
|
}
|
|
209
634
|
|
|
@@ -216,16 +641,17 @@ async function fetchMultipleSources(sources, maxSources = 5, maxChars = 5000) {
|
|
|
216
641
|
|
|
217
642
|
for (let i = 0; i < toFetch.length; i++) {
|
|
218
643
|
const s = toFetch[i];
|
|
219
|
-
process.stderr.write(`[greedysearch] Fetching ${i + 1}/${toFetch.length}: ${s.url.slice(0, 60)}...\n`);
|
|
644
|
+
process.stderr.write(`[greedysearch] Fetching ${i + 1}/${toFetch.length}: ${(s.canonicalUrl || s.url).slice(0, 60)}...\n`);
|
|
220
645
|
try {
|
|
221
|
-
const result = await fetchSourceContent(s.url, maxChars);
|
|
646
|
+
const result = await fetchSourceContent(s.canonicalUrl || s.url, maxChars);
|
|
647
|
+
fetched.push({ id: s.id, ...result });
|
|
222
648
|
if (result.content && result.content.length > 100) {
|
|
223
|
-
fetched.push(result);
|
|
224
649
|
process.stderr.write(`[greedysearch] ✓ Got ${result.content.length} chars\n`);
|
|
225
650
|
} else {
|
|
226
651
|
process.stderr.write(`[greedysearch] ✗ Empty or too short\n`);
|
|
227
652
|
}
|
|
228
653
|
} catch (e) {
|
|
654
|
+
fetched.push({ id: s.id, url: s.canonicalUrl || s.url, error: e.message });
|
|
229
655
|
process.stderr.write(`[greedysearch] ✗ Failed: ${e.message.slice(0, 80)}\n`);
|
|
230
656
|
}
|
|
231
657
|
process.stderr.write(`PROGRESS:fetch:${i + 1}/${toFetch.length}\n`);
|
|
@@ -235,6 +661,7 @@ async function fetchMultipleSources(sources, maxSources = 5, maxChars = 5000) {
|
|
|
235
661
|
}
|
|
236
662
|
|
|
237
663
|
function pickTopSource(out) {
|
|
664
|
+
if (Array.isArray(out._sources) && out._sources.length > 0) return out._sources[0];
|
|
238
665
|
for (const engine of ['perplexity', 'google', 'bing']) {
|
|
239
666
|
const r = out[engine];
|
|
240
667
|
if (r?.sources?.length > 0) return r.sources[0];
|
|
@@ -242,59 +669,13 @@ function pickTopSource(out) {
|
|
|
242
669
|
return null;
|
|
243
670
|
}
|
|
244
671
|
|
|
245
|
-
function
|
|
246
|
-
const
|
|
247
|
-
const
|
|
248
|
-
|
|
249
|
-
for (const engine of engineOrder) {
|
|
250
|
-
const r = out[engine];
|
|
251
|
-
if (!r?.sources) continue;
|
|
252
|
-
for (const s of r.sources) {
|
|
253
|
-
const url = s.url?.split('#')[0]?.replace(/\/$/, '');
|
|
254
|
-
if (!url || url.length < 10) continue;
|
|
255
|
-
if (!seen.has(url)) {
|
|
256
|
-
seen.set(url, { url: s.url, title: s.title || '', engines: [engine] });
|
|
257
|
-
} else {
|
|
258
|
-
const existing = seen.get(url);
|
|
259
|
-
if (!existing.engines.includes(engine)) {
|
|
260
|
-
existing.engines.push(engine);
|
|
261
|
-
}
|
|
262
|
-
if (!existing.title && s.title) existing.title = s.title;
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
// Sort by consensus (most engines = highest confidence)
|
|
268
|
-
return Array.from(seen.values())
|
|
269
|
-
.sort((a, b) => b.engines.length - a.engines.length)
|
|
270
|
-
.slice(0, 10);
|
|
271
|
-
}
|
|
672
|
+
async function synthesizeWithGemini(query, results, { grounded = false, tabPrefix = null } = {}) {
|
|
673
|
+
const sources = Array.isArray(results._sources) ? results._sources : buildSourceRegistry(results);
|
|
674
|
+
const prompt = buildSynthesisPrompt(query, results, sources, { grounded });
|
|
272
675
|
|
|
273
|
-
async function synthesizeWithGemini(query, results) {
|
|
274
|
-
// Build a prompt that includes all engine results
|
|
275
|
-
const sources = deduplicateSources(results);
|
|
276
|
-
|
|
277
|
-
let prompt = `Based on the following search results from multiple AI engines, provide a single, synthesized answer to the user's question. Combine the information, resolve any conflicts, and present the most accurate and complete answer.\n\n`;
|
|
278
|
-
prompt += `User's question: "${query}"\n\n`;
|
|
279
|
-
|
|
280
|
-
for (const engine of ['perplexity', 'bing', 'google']) {
|
|
281
|
-
const r = results[engine];
|
|
282
|
-
if (r?.error) {
|
|
283
|
-
prompt += `## ${engine} (failed)\nError: ${r.error}\n\n`;
|
|
284
|
-
} else if (r?.answer) {
|
|
285
|
-
prompt += `## ${engine}\n${r.answer}\n\n`;
|
|
286
|
-
}
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
prompt += `Provide a synthesized answer that:\n`;
|
|
290
|
-
prompt += `1. Combines the best information from all sources\n`;
|
|
291
|
-
prompt += `2. Notes where sources agree or disagree\n`;
|
|
292
|
-
prompt += `3. Is clear and well-structured\n`;
|
|
293
|
-
prompt += `4. Includes key sources at the end\n`;
|
|
294
|
-
|
|
295
|
-
// Run the query through Gemini extractor
|
|
296
676
|
return new Promise((resolve, reject) => {
|
|
297
|
-
const
|
|
677
|
+
const extraArgs = tabPrefix ? ['--tab', String(tabPrefix)] : [];
|
|
678
|
+
const proc = spawn('node', [join(__dir, 'extractors', 'gemini.mjs'), prompt, ...extraArgs], {
|
|
298
679
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
299
680
|
});
|
|
300
681
|
let out = '';
|
|
@@ -309,8 +690,18 @@ async function synthesizeWithGemini(query, results) {
|
|
|
309
690
|
clearTimeout(t);
|
|
310
691
|
if (code !== 0) reject(new Error(err.trim() || 'gemini extractor failed'));
|
|
311
692
|
else {
|
|
312
|
-
try {
|
|
313
|
-
|
|
693
|
+
try {
|
|
694
|
+
const raw = JSON.parse(out.trim());
|
|
695
|
+
const structured = parseStructuredJson(raw.answer || '');
|
|
696
|
+
resolve({
|
|
697
|
+
...normalizeSynthesisPayload(structured, sources, raw.answer || ''),
|
|
698
|
+
rawAnswer: raw.answer || '',
|
|
699
|
+
geminiSources: raw.sources || [],
|
|
700
|
+
});
|
|
701
|
+
}
|
|
702
|
+
catch {
|
|
703
|
+
reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`));
|
|
704
|
+
}
|
|
314
705
|
}
|
|
315
706
|
});
|
|
316
707
|
});
|
|
@@ -509,83 +900,79 @@ async function main() {
|
|
|
509
900
|
}
|
|
510
901
|
|
|
511
902
|
// All tabs assigned — run extractors in parallel
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
903
|
+
try {
|
|
904
|
+
const results = await Promise.allSettled(
|
|
905
|
+
ALL_ENGINES.map((e, i) =>
|
|
906
|
+
runExtractor(ENGINES[e], query, tabs[i], short)
|
|
907
|
+
.then(r => {
|
|
908
|
+
process.stderr.write(`PROGRESS:${e}:done\n`);
|
|
909
|
+
return { engine: e, ...r };
|
|
910
|
+
})
|
|
911
|
+
.catch(err => {
|
|
912
|
+
process.stderr.write(`PROGRESS:${e}:error\n`);
|
|
913
|
+
throw err;
|
|
914
|
+
})
|
|
915
|
+
)
|
|
916
|
+
);
|
|
917
|
+
|
|
918
|
+
const out = {};
|
|
919
|
+
for (let i = 0; i < results.length; i++) {
|
|
920
|
+
const r = results[i];
|
|
921
|
+
if (r.status === 'fulfilled') {
|
|
922
|
+
out[r.value.engine] = r.value;
|
|
923
|
+
} else {
|
|
924
|
+
out[ALL_ENGINES[i]] = { error: r.reason?.message || 'unknown error' };
|
|
925
|
+
}
|
|
533
926
|
}
|
|
534
|
-
}
|
|
535
927
|
|
|
536
|
-
|
|
537
|
-
out._sources = deduplicateSources(out);
|
|
928
|
+
await closeTabs(tabs);
|
|
538
929
|
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
const
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
process.stderr.write('PROGRESS:
|
|
551
|
-
} catch (e) {
|
|
552
|
-
process.stderr.write(`[greedysearch] Synthesis failed: ${e.message}\n`);
|
|
553
|
-
out._synthesis = { error: e.message, synthesized: false };
|
|
930
|
+
// Build a canonical source registry across all engines
|
|
931
|
+
out._sources = buildSourceRegistry(out);
|
|
932
|
+
|
|
933
|
+
if (deepResearch) {
|
|
934
|
+
process.stderr.write('PROGRESS:deep-research:start\n');
|
|
935
|
+
const fetchedSources = out._sources.length > 0
|
|
936
|
+
? await fetchMultipleSources(out._sources, 5, 8000)
|
|
937
|
+
: [];
|
|
938
|
+
|
|
939
|
+
out._sources = mergeFetchDataIntoSources(out._sources, fetchedSources);
|
|
940
|
+
out._fetchedSources = fetchedSources;
|
|
941
|
+
process.stderr.write(out._sources.length > 0 ? 'PROGRESS:deep-research:done\n' : 'PROGRESS:deep-research:no-sources\n');
|
|
554
942
|
}
|
|
555
|
-
}
|
|
556
943
|
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
944
|
+
// Synthesize with Gemini if requested
|
|
945
|
+
if (synthesize) {
|
|
946
|
+
process.stderr.write('PROGRESS:synthesis:start\n');
|
|
947
|
+
process.stderr.write('[greedysearch] Synthesizing results with Gemini...\n');
|
|
948
|
+
try {
|
|
949
|
+
const geminiTab = await getOrOpenEngineTab('gemini');
|
|
950
|
+
await activateTab(geminiTab);
|
|
951
|
+
const synthesis = await synthesizeWithGemini(query, out, { grounded: deepResearch, tabPrefix: geminiTab });
|
|
952
|
+
await activateTab(geminiTab);
|
|
953
|
+
out._synthesis = {
|
|
954
|
+
...synthesis,
|
|
955
|
+
synthesized: true,
|
|
956
|
+
};
|
|
957
|
+
process.stderr.write('PROGRESS:synthesis:done\n');
|
|
958
|
+
} catch (e) {
|
|
959
|
+
process.stderr.write(`[greedysearch] Synthesis failed: ${e.message}\n`);
|
|
960
|
+
out._synthesis = { error: e.message, synthesized: false };
|
|
961
|
+
}
|
|
962
|
+
}
|
|
561
963
|
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
// Get top sources by consensus
|
|
567
|
-
const topSources = out._sources || [];
|
|
568
|
-
|
|
569
|
-
if (topSources.length > 0) {
|
|
570
|
-
// Fetch content from top sources
|
|
571
|
-
out._fetchedSources = await fetchMultipleSources(topSources, 5, 8000);
|
|
572
|
-
process.stderr.write('PROGRESS:deep-research:done\n');
|
|
573
|
-
} else {
|
|
574
|
-
out._fetchedSources = [];
|
|
575
|
-
process.stderr.write('PROGRESS:deep-research:no-sources\n');
|
|
964
|
+
if (fetchSource) {
|
|
965
|
+
const top = pickTopSource(out);
|
|
966
|
+
if (top) out._topSource = await fetchTopSource(top.canonicalUrl || top.url);
|
|
576
967
|
}
|
|
577
|
-
|
|
578
|
-
// Build confidence scores
|
|
579
|
-
out._confidence = {
|
|
580
|
-
sourcesCount: topSources.length,
|
|
581
|
-
consensusScore: topSources.length > 0 ? topSources[0]?.engines?.length || 0 : 0,
|
|
582
|
-
enginesResponded: ALL_ENGINES.filter(e => out[e]?.answer && !out[e]?.error),
|
|
583
|
-
enginesFailed: ALL_ENGINES.filter(e => out[e]?.error),
|
|
584
|
-
};
|
|
585
|
-
}
|
|
586
968
|
|
|
587
|
-
|
|
588
|
-
|
|
969
|
+
if (deepResearch) out._confidence = buildConfidence(out);
|
|
970
|
+
|
|
971
|
+
writeOutput(out, outFile, { inline, synthesize, query });
|
|
972
|
+
return;
|
|
973
|
+
} finally {
|
|
974
|
+
await closeTabs(tabs);
|
|
975
|
+
}
|
|
589
976
|
}
|
|
590
977
|
|
|
591
978
|
const script = ENGINES[engine];
|