@apmantza/greedysearch-pi 1.7.7 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/bin/coding-task.mjs +27 -1
- package/bin/search.mjs +260 -1539
- package/index.ts +134 -421
- package/package.json +1 -1
- package/src/github.mjs +6 -1
- package/src/search/chrome.mjs +223 -0
- package/src/search/constants.mjs +38 -0
- package/src/search/defaults.mjs +15 -0
- package/src/search/engines.mjs +58 -0
- package/src/search/fetch-source.mjs +230 -0
- package/src/search/output.mjs +59 -0
- package/src/search/sources.mjs +446 -0
- package/src/search/synthesis-runner.mjs +64 -0
- package/src/search/synthesis.mjs +212 -0
- package/src/tools/deep-research-handler.ts +37 -0
- package/src/tools/greedy-search-handler.ts +58 -0
- package/src/tools/shared.ts +131 -0
- package/src/types.ts +104 -0
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
// src/search/sources.mjs — Source registry, URL normalization, domain inference, classification
|
|
2
|
+
//
|
|
3
|
+
// Responsible for: deduplicating sources across engines, normalizing URLs,
|
|
4
|
+
// classifying source types, inferring preferred domains from queries, and
|
|
5
|
+
// merging fetch data into source objects.
|
|
6
|
+
|
|
7
|
+
export const TRACKING_PARAMS = [
|
|
8
|
+
"fbclid",
|
|
9
|
+
"gclid",
|
|
10
|
+
"ref",
|
|
11
|
+
"ref_src",
|
|
12
|
+
"ref_url",
|
|
13
|
+
"source",
|
|
14
|
+
"utm_campaign",
|
|
15
|
+
"utm_content",
|
|
16
|
+
"utm_medium",
|
|
17
|
+
"utm_source",
|
|
18
|
+
"utm_term",
|
|
19
|
+
];
|
|
20
|
+
|
|
21
|
+
export const COMMUNITY_HOSTS = [
|
|
22
|
+
"dev.to",
|
|
23
|
+
"hashnode.com",
|
|
24
|
+
"medium.com",
|
|
25
|
+
"reddit.com",
|
|
26
|
+
"stackoverflow.com",
|
|
27
|
+
"stackexchange.com",
|
|
28
|
+
"substack.com",
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
export const NEWS_HOSTS = [
|
|
32
|
+
"arstechnica.com",
|
|
33
|
+
"techcrunch.com",
|
|
34
|
+
"theverge.com",
|
|
35
|
+
"venturebeat.com",
|
|
36
|
+
"wired.com",
|
|
37
|
+
"zdnet.com",
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
export function trimText(text = "", maxChars = 240) {
|
|
41
|
+
const clean = String(text).replace(/\s+/g, " ").trim();
|
|
42
|
+
if (clean.length <= maxChars) return clean;
|
|
43
|
+
return `${clean.slice(0, maxChars).replace(/\s+\S*$/, "")}...`;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function normalizeSourceTitle(title = "") {
|
|
47
|
+
const clean = trimText(title, 180);
|
|
48
|
+
if (!clean) return "";
|
|
49
|
+
if (/^https?:\/\//i.test(clean)) return "";
|
|
50
|
+
|
|
51
|
+
const wordCount = clean.split(/\s+/).filter(Boolean).length;
|
|
52
|
+
const hasUppercase = /[A-Z]/.test(clean);
|
|
53
|
+
const hasDigit = /\d/.test(clean);
|
|
54
|
+
const looksLikeFragment =
|
|
55
|
+
clean === clean.toLowerCase() &&
|
|
56
|
+
wordCount <= 4 &&
|
|
57
|
+
!hasUppercase &&
|
|
58
|
+
!hasDigit;
|
|
59
|
+
return looksLikeFragment ? "" : clean;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function pickPreferredTitle(currentTitle = "", nextTitle = "") {
|
|
63
|
+
const current = normalizeSourceTitle(currentTitle);
|
|
64
|
+
const next = normalizeSourceTitle(nextTitle);
|
|
65
|
+
if (!next) return current;
|
|
66
|
+
if (!current) return next;
|
|
67
|
+
const currentLooksLikeUrl = /^https?:\/\//i.test(current);
|
|
68
|
+
const nextLooksLikeUrl = /^https?:\/\//i.test(next);
|
|
69
|
+
if (currentLooksLikeUrl && !nextLooksLikeUrl) return next;
|
|
70
|
+
if (!currentLooksLikeUrl && nextLooksLikeUrl) return current;
|
|
71
|
+
return next.length > current.length ? next : current;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export function normalizeUrl(rawUrl) {
|
|
75
|
+
if (!rawUrl) return null;
|
|
76
|
+
try {
|
|
77
|
+
const url = new URL(rawUrl);
|
|
78
|
+
if (!["http:", "https:"].includes(url.protocol)) return null;
|
|
79
|
+
url.hash = "";
|
|
80
|
+
url.hostname = url.hostname.toLowerCase();
|
|
81
|
+
if (
|
|
82
|
+
(url.protocol === "https:" && url.port === "443") ||
|
|
83
|
+
(url.protocol === "http:" && url.port === "80")
|
|
84
|
+
) {
|
|
85
|
+
url.port = "";
|
|
86
|
+
}
|
|
87
|
+
for (const key of [...url.searchParams.keys()]) {
|
|
88
|
+
const lower = key.toLowerCase();
|
|
89
|
+
if (TRACKING_PARAMS.includes(lower) || lower.startsWith("utm_")) {
|
|
90
|
+
url.searchParams.delete(key);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
url.searchParams.sort();
|
|
94
|
+
const normalizedPath = url.pathname.replace(/\/+$/, "") || "/";
|
|
95
|
+
url.pathname = normalizedPath;
|
|
96
|
+
const normalized = url.toString();
|
|
97
|
+
return normalizedPath === "/" ? normalized.replace(/\/$/, "") : normalized;
|
|
98
|
+
} catch {
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function getDomain(rawUrl) {
|
|
104
|
+
try {
|
|
105
|
+
const domain = new URL(rawUrl).hostname.toLowerCase();
|
|
106
|
+
return domain.replace(/^www\./, "");
|
|
107
|
+
} catch {
|
|
108
|
+
return "";
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export function matchesDomain(domain, hosts) {
|
|
113
|
+
return hosts.some((host) => domain === host || domain.endsWith(`.${host}`));
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export function classifySourceType(domain, title = "", rawUrl = "") {
|
|
117
|
+
const lowerTitle = title.toLowerCase();
|
|
118
|
+
const lowerUrl = rawUrl.toLowerCase();
|
|
119
|
+
|
|
120
|
+
if (domain === "github.com" || domain === "gitlab.com") return "repo";
|
|
121
|
+
if (matchesDomain(domain, COMMUNITY_HOSTS)) return "community";
|
|
122
|
+
if (matchesDomain(domain, NEWS_HOSTS)) return "news";
|
|
123
|
+
if (
|
|
124
|
+
domain.startsWith("docs.") ||
|
|
125
|
+
domain.startsWith("developer.") ||
|
|
126
|
+
domain.startsWith("developers.") ||
|
|
127
|
+
domain.startsWith("api.") ||
|
|
128
|
+
lowerTitle.includes("documentation") ||
|
|
129
|
+
lowerTitle.includes("docs") ||
|
|
130
|
+
lowerTitle.includes("reference") ||
|
|
131
|
+
lowerUrl.includes("/docs/") ||
|
|
132
|
+
lowerUrl.includes("/reference/") ||
|
|
133
|
+
lowerUrl.includes("/api/")
|
|
134
|
+
) {
|
|
135
|
+
return "official-docs";
|
|
136
|
+
}
|
|
137
|
+
if (domain.startsWith("blog.") || lowerUrl.includes("/blog/"))
|
|
138
|
+
return "maintainer-blog";
|
|
139
|
+
return "website";
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
export function sourceTypePriority(sourceType) {
|
|
143
|
+
switch (sourceType) {
|
|
144
|
+
case "official-docs":
|
|
145
|
+
return 5;
|
|
146
|
+
case "repo":
|
|
147
|
+
return 4;
|
|
148
|
+
case "maintainer-blog":
|
|
149
|
+
return 3;
|
|
150
|
+
case "website":
|
|
151
|
+
return 2;
|
|
152
|
+
case "community":
|
|
153
|
+
return 1;
|
|
154
|
+
case "news":
|
|
155
|
+
return 0;
|
|
156
|
+
default:
|
|
157
|
+
return 0;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
export function bestRank(source) {
|
|
162
|
+
const ranks = Object.values(source.perEngine || {}).map((v) => v?.rank || 99);
|
|
163
|
+
return ranks.length ? Math.min(...ranks) : 99;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
export function inferPreferredDomains(query) {
|
|
167
|
+
const normalized = query.toLowerCase();
|
|
168
|
+
const matches = [];
|
|
169
|
+
|
|
170
|
+
if (
|
|
171
|
+
normalized.includes("openai") ||
|
|
172
|
+
normalized.includes("gpt") ||
|
|
173
|
+
normalized.includes("chatgpt")
|
|
174
|
+
) {
|
|
175
|
+
matches.push("openai.com", "platform.openai.com", "help.openai.com");
|
|
176
|
+
}
|
|
177
|
+
if (normalized.includes("anthropic") || normalized.includes("claude")) {
|
|
178
|
+
matches.push("anthropic.com", "docs.anthropic.com");
|
|
179
|
+
}
|
|
180
|
+
if (normalized.includes("bun")) {
|
|
181
|
+
matches.push("bun.sh", "bun.com");
|
|
182
|
+
}
|
|
183
|
+
if (normalized.includes("next.js") || normalized.includes("nextjs")) {
|
|
184
|
+
matches.push("nextjs.org", "vercel.com");
|
|
185
|
+
}
|
|
186
|
+
if (normalized.includes("playwright")) {
|
|
187
|
+
matches.push("playwright.dev");
|
|
188
|
+
}
|
|
189
|
+
if (normalized.includes("supabase")) {
|
|
190
|
+
matches.push("supabase.com", "supabase.io");
|
|
191
|
+
}
|
|
192
|
+
if (normalized.includes("prisma")) {
|
|
193
|
+
matches.push("prisma.io");
|
|
194
|
+
}
|
|
195
|
+
if (normalized.includes("tailwind")) {
|
|
196
|
+
matches.push("tailwindcss.com");
|
|
197
|
+
}
|
|
198
|
+
if (normalized.includes("vite")) {
|
|
199
|
+
matches.push("vitejs.dev", "vite.dev");
|
|
200
|
+
}
|
|
201
|
+
if (normalized.includes("astro")) {
|
|
202
|
+
matches.push("astro.build");
|
|
203
|
+
}
|
|
204
|
+
if (normalized.includes("svelte")) {
|
|
205
|
+
matches.push("svelte.dev");
|
|
206
|
+
}
|
|
207
|
+
if (normalized.includes("solid")) {
|
|
208
|
+
matches.push("solidjs.com");
|
|
209
|
+
}
|
|
210
|
+
if (normalized.includes("vue") || normalized.includes("nuxt")) {
|
|
211
|
+
matches.push("vuejs.org", "nuxt.com");
|
|
212
|
+
}
|
|
213
|
+
if (normalized.includes("react") || normalized.includes("react native")) {
|
|
214
|
+
matches.push("react.dev", "reactnative.dev");
|
|
215
|
+
}
|
|
216
|
+
if (normalized.includes("angular")) {
|
|
217
|
+
matches.push("angular.io", "angular.dev");
|
|
218
|
+
}
|
|
219
|
+
if (normalized.includes("node.js") || normalized.includes("nodejs")) {
|
|
220
|
+
matches.push("nodejs.org", "nodejs.dev", "npmjs.com");
|
|
221
|
+
}
|
|
222
|
+
if (/\bgo\b/.test(normalized) || normalized.includes("golang")) {
|
|
223
|
+
matches.push("go.dev", "golang.org", "pkg.go.dev");
|
|
224
|
+
}
|
|
225
|
+
if (normalized.includes("deno")) {
|
|
226
|
+
matches.push("deno.land", "deno.com");
|
|
227
|
+
}
|
|
228
|
+
if (normalized.includes("fresh")) {
|
|
229
|
+
matches.push("fresh.deno.dev");
|
|
230
|
+
}
|
|
231
|
+
if (normalized.includes("typescript") || normalized.includes("ts")) {
|
|
232
|
+
matches.push("typescriptlang.org");
|
|
233
|
+
}
|
|
234
|
+
if (normalized.includes("python")) {
|
|
235
|
+
matches.push("python.org", "docs.python.org");
|
|
236
|
+
}
|
|
237
|
+
if (normalized.includes("rust")) {
|
|
238
|
+
matches.push("rust-lang.org", "docs.rs", "crates.io");
|
|
239
|
+
}
|
|
240
|
+
if (normalized.includes("zig")) {
|
|
241
|
+
matches.push("ziglang.org");
|
|
242
|
+
}
|
|
243
|
+
if (normalized.includes("docker")) {
|
|
244
|
+
matches.push("docker.com", "docs.docker.com", "hub.docker.com");
|
|
245
|
+
}
|
|
246
|
+
if (normalized.includes("kubernetes") || normalized.includes("k8s")) {
|
|
247
|
+
matches.push("kubernetes.io", "k8s.io");
|
|
248
|
+
}
|
|
249
|
+
if (normalized.includes("postgres") || normalized.includes("postgresql")) {
|
|
250
|
+
matches.push("postgresql.org", "neon.tech", "supabase.com");
|
|
251
|
+
}
|
|
252
|
+
if (normalized.includes("redis")) {
|
|
253
|
+
matches.push("redis.io");
|
|
254
|
+
}
|
|
255
|
+
if (normalized.includes("sqlite")) {
|
|
256
|
+
matches.push("sqlite.org");
|
|
257
|
+
}
|
|
258
|
+
if (normalized.includes("cloudflare")) {
|
|
259
|
+
matches.push("developers.cloudflare.com", "cloudflare.com");
|
|
260
|
+
}
|
|
261
|
+
if (normalized.includes("vercel")) {
|
|
262
|
+
matches.push("vercel.com", "nextjs.org");
|
|
263
|
+
}
|
|
264
|
+
if (normalized.includes("netlify")) {
|
|
265
|
+
matches.push("netlify.com", "docs.netlify.com");
|
|
266
|
+
}
|
|
267
|
+
if (normalized.includes("stripe")) {
|
|
268
|
+
matches.push("stripe.com", "docs.stripe.com");
|
|
269
|
+
}
|
|
270
|
+
if (normalized.includes("github")) {
|
|
271
|
+
matches.push("github.com", "docs.github.com");
|
|
272
|
+
}
|
|
273
|
+
if (normalized.includes("gitlab")) {
|
|
274
|
+
matches.push("gitlab.com", "docs.gitlab.com");
|
|
275
|
+
}
|
|
276
|
+
if (normalized.includes("aws")) {
|
|
277
|
+
matches.push("aws.amazon.com", "docs.aws.amazon.com");
|
|
278
|
+
}
|
|
279
|
+
if (normalized.includes("azure")) {
|
|
280
|
+
matches.push("azure.microsoft.com", "learn.microsoft.com");
|
|
281
|
+
}
|
|
282
|
+
if (normalized.includes("gcp") || normalized.includes("google cloud")) {
|
|
283
|
+
matches.push("cloud.google.com", "developers.google.com");
|
|
284
|
+
}
|
|
285
|
+
if (normalized.includes("gemini") || normalized.includes("google ai")) {
|
|
286
|
+
matches.push("ai.google.dev", "developers.google.com");
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
return [...new Set(matches)];
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
export function domainMatches(hostname, candidate) {
|
|
293
|
+
return hostname === candidate || hostname.endsWith(`.${candidate}`);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
export function buildSourceRegistry(out, query = "") {
|
|
297
|
+
const seen = new Map();
|
|
298
|
+
const engineOrder = ["perplexity", "bing", "google"];
|
|
299
|
+
|
|
300
|
+
// Get preferred domains for this query
|
|
301
|
+
const preferredDomains = inferPreferredDomains(query);
|
|
302
|
+
|
|
303
|
+
for (const engine of engineOrder) {
|
|
304
|
+
const result = out[engine];
|
|
305
|
+
if (!result?.sources) continue;
|
|
306
|
+
|
|
307
|
+
for (let i = 0; i < result.sources.length; i++) {
|
|
308
|
+
const source = result.sources[i];
|
|
309
|
+
const canonicalUrl = normalizeUrl(source.url);
|
|
310
|
+
if (!canonicalUrl || canonicalUrl.length < 10) continue;
|
|
311
|
+
|
|
312
|
+
const title = normalizeSourceTitle(source.title || "");
|
|
313
|
+
const domain = getDomain(canonicalUrl);
|
|
314
|
+
const sourceType = classifySourceType(domain, title, canonicalUrl);
|
|
315
|
+
|
|
316
|
+
// Calculate smart score boost
|
|
317
|
+
let smartScore = 0;
|
|
318
|
+
|
|
319
|
+
// Boost preferred domains for this query
|
|
320
|
+
if (preferredDomains.some((pd) => domainMatches(domain, pd))) {
|
|
321
|
+
smartScore += 10; // Strong boost for query-relevant official docs
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// Boost docs/developer sites
|
|
325
|
+
if (sourceType === "official-docs") {
|
|
326
|
+
smartScore += 3;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Boost based on URL path patterns
|
|
330
|
+
const lowerUrl = canonicalUrl.toLowerCase();
|
|
331
|
+
if (
|
|
332
|
+
/\/docs\/|\/documentation\/|\.dev\/|\/api\/|\/reference\//.test(
|
|
333
|
+
lowerUrl,
|
|
334
|
+
)
|
|
335
|
+
) {
|
|
336
|
+
smartScore += 2;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Penalize community/discussion sites for technical queries
|
|
340
|
+
if (sourceType === "community" && preferredDomains.length > 0) {
|
|
341
|
+
smartScore -= 2;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
const existing = seen.get(canonicalUrl) || {
|
|
345
|
+
id: "",
|
|
346
|
+
canonicalUrl,
|
|
347
|
+
displayUrl: source.url || canonicalUrl,
|
|
348
|
+
domain,
|
|
349
|
+
title: "",
|
|
350
|
+
engines: [],
|
|
351
|
+
engineCount: 0,
|
|
352
|
+
perEngine: {},
|
|
353
|
+
sourceType,
|
|
354
|
+
isOfficial: sourceType === "official-docs",
|
|
355
|
+
smartScore: 0,
|
|
356
|
+
};
|
|
357
|
+
|
|
358
|
+
existing.title = pickPreferredTitle(existing.title, title);
|
|
359
|
+
existing.displayUrl = existing.displayUrl || source.url || canonicalUrl;
|
|
360
|
+
existing.sourceType = existing.sourceType || sourceType;
|
|
361
|
+
existing.isOfficial =
|
|
362
|
+
existing.isOfficial || sourceType === "official-docs";
|
|
363
|
+
existing.smartScore = Math.max(existing.smartScore, smartScore);
|
|
364
|
+
|
|
365
|
+
if (!existing.engines.includes(engine)) {
|
|
366
|
+
existing.engines.push(engine);
|
|
367
|
+
}
|
|
368
|
+
existing.perEngine[engine] = {
|
|
369
|
+
rank: i + 1,
|
|
370
|
+
title: pickPreferredTitle(
|
|
371
|
+
existing.perEngine[engine]?.title || "",
|
|
372
|
+
title,
|
|
373
|
+
),
|
|
374
|
+
};
|
|
375
|
+
|
|
376
|
+
seen.set(canonicalUrl, existing);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
const sources = Array.from(seen.values())
|
|
381
|
+
.map((source) => ({
|
|
382
|
+
...source,
|
|
383
|
+
engineCount: source.engines.length,
|
|
384
|
+
}))
|
|
385
|
+
.sort((a, b) => {
|
|
386
|
+
// Primary: smart score (query-aware domain boosting)
|
|
387
|
+
if (b.smartScore !== a.smartScore) return b.smartScore - a.smartScore;
|
|
388
|
+
|
|
389
|
+
// Secondary: consensus (sources found by more engines)
|
|
390
|
+
if (b.engineCount !== a.engineCount) return b.engineCount - a.engineCount;
|
|
391
|
+
|
|
392
|
+
// Tertiary: source type priority
|
|
393
|
+
if (
|
|
394
|
+
sourceTypePriority(b.sourceType) !== sourceTypePriority(a.sourceType)
|
|
395
|
+
) {
|
|
396
|
+
return (
|
|
397
|
+
sourceTypePriority(b.sourceType) - sourceTypePriority(a.sourceType)
|
|
398
|
+
);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// Quaternary: best rank across engines
|
|
402
|
+
if (bestRank(a) !== bestRank(b)) return bestRank(a) - bestRank(b);
|
|
403
|
+
|
|
404
|
+
return a.domain.localeCompare(b.domain);
|
|
405
|
+
})
|
|
406
|
+
.slice(0, 12)
|
|
407
|
+
.map((source, index) => ({
|
|
408
|
+
...source,
|
|
409
|
+
id: `S${index + 1}`,
|
|
410
|
+
title: source.title || source.domain || source.canonicalUrl,
|
|
411
|
+
}));
|
|
412
|
+
|
|
413
|
+
return sources;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
export function mergeFetchDataIntoSources(sources, fetchedSources) {
|
|
417
|
+
const byId = new Map(fetchedSources.map((source) => [source.id, source]));
|
|
418
|
+
return sources.map((source) => {
|
|
419
|
+
const fetched = byId.get(source.id);
|
|
420
|
+
if (!fetched) return source;
|
|
421
|
+
|
|
422
|
+
const title = pickPreferredTitle(source.title, fetched.title || "");
|
|
423
|
+
return {
|
|
424
|
+
...source,
|
|
425
|
+
title: title || source.title,
|
|
426
|
+
fetch: {
|
|
427
|
+
attempted: true,
|
|
428
|
+
ok: !fetched.error && fetched.contentChars > 100,
|
|
429
|
+
status: fetched.status || null,
|
|
430
|
+
finalUrl: fetched.finalUrl || fetched.url || source.canonicalUrl,
|
|
431
|
+
contentType: fetched.contentType || "",
|
|
432
|
+
lastModified: fetched.lastModified || "",
|
|
433
|
+
publishedTime: fetched.publishedTime || "",
|
|
434
|
+
byline: fetched.byline || "",
|
|
435
|
+
siteName: fetched.siteName || "",
|
|
436
|
+
lang: fetched.lang || "",
|
|
437
|
+
title: fetched.title || "",
|
|
438
|
+
snippet: fetched.snippet || "",
|
|
439
|
+
contentChars: fetched.contentChars || 0,
|
|
440
|
+
source: fetched.source || "unknown", // "http" | "browser"
|
|
441
|
+
duration: fetched.duration || 0,
|
|
442
|
+
error: fetched.error || "",
|
|
443
|
+
},
|
|
444
|
+
};
|
|
445
|
+
});
|
|
446
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
// src/search/synthesis-runner.mjs — Run Gemini synthesis via CDP
|
|
2
|
+
//
|
|
3
|
+
// Extracted from search.mjs.
|
|
4
|
+
|
|
5
|
+
import { spawn } from "node:child_process";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
import { GREEDY_PROFILE_DIR } from "./constants.mjs";
|
|
8
|
+
import { parseStructuredJson, normalizeSynthesisPayload, buildSynthesisPrompt } from "./synthesis.mjs";
|
|
9
|
+
import { cdp, openNewTab, closeTab, activateTab } from "./chrome.mjs";
|
|
10
|
+
import { trimText } from "./sources.mjs";
|
|
11
|
+
|
|
12
|
+
const __dir = import.meta.dirname || new URL(".", import.meta.url).pathname.replace(/^\/([A-Z]:)/, "$1");
|
|
13
|
+
|
|
14
|
+
export async function synthesizeWithGemini(
|
|
15
|
+
query,
|
|
16
|
+
results,
|
|
17
|
+
{ grounded = false, tabPrefix = null } = {},
|
|
18
|
+
) {
|
|
19
|
+
const sources = Array.isArray(results._sources)
|
|
20
|
+
? results._sources
|
|
21
|
+
: buildSourceRegistry(results);
|
|
22
|
+
const prompt = buildSynthesisPrompt(query, results, sources, { grounded });
|
|
23
|
+
|
|
24
|
+
return new Promise((resolve, reject) => {
|
|
25
|
+
const extraArgs = tabPrefix ? ["--tab", String(tabPrefix)] : [];
|
|
26
|
+
const proc = spawn(
|
|
27
|
+
"node",
|
|
28
|
+
[join(__dir, "..", "..", "extractors", "gemini.mjs"), prompt, ...extraArgs],
|
|
29
|
+
{
|
|
30
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
31
|
+
env: { ...process.env, CDP_PROFILE_DIR: GREEDY_PROFILE_DIR },
|
|
32
|
+
},
|
|
33
|
+
);
|
|
34
|
+
let out = "";
|
|
35
|
+
let err = "";
|
|
36
|
+
proc.stdout.on("data", (d) => (out += d));
|
|
37
|
+
proc.stderr.on("data", (d) => (err += d));
|
|
38
|
+
const t = setTimeout(() => {
|
|
39
|
+
proc.kill();
|
|
40
|
+
reject(new Error("Gemini synthesis timed out after 180s"));
|
|
41
|
+
}, 180000);
|
|
42
|
+
proc.on("close", (code) => {
|
|
43
|
+
clearTimeout(t);
|
|
44
|
+
if (code !== 0)
|
|
45
|
+
reject(new Error(err.trim() || "gemini extractor failed"));
|
|
46
|
+
else {
|
|
47
|
+
try {
|
|
48
|
+
const raw = JSON.parse(out.trim());
|
|
49
|
+
const structured = parseStructuredJson(raw.answer || "");
|
|
50
|
+
resolve({
|
|
51
|
+
...normalizeSynthesisPayload(structured, sources, raw.answer || ""),
|
|
52
|
+
rawAnswer: raw.answer || "",
|
|
53
|
+
geminiSources: raw.sources || [],
|
|
54
|
+
});
|
|
55
|
+
} catch {
|
|
56
|
+
reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`));
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Need to import buildSourceRegistry for fallback
|
|
64
|
+
import { buildSourceRegistry } from "./sources.mjs";
|