@apmantza/greedysearch-pi 1.9.1 → 1.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -13
- package/README.md +11 -1
- package/bin/launch.mjs +2 -0
- package/bin/search.mjs +757 -674
- package/extractors/bing-copilot.mjs +490 -374
- package/extractors/common.mjs +703 -645
- package/extractors/consent.mjs +421 -388
- package/index.ts +2 -1
- package/package.json +8 -4
- package/skills/greedy-search/skill.md +5 -14
- package/src/search/research.mjs +1581 -0
- package/src/search/sources.mjs +26 -4
- package/src/search/synthesis-runner.mjs +52 -46
- package/src/tools/greedy-search-handler.ts +85 -13
- package/test.mjs +971 -534
package/src/search/sources.mjs
CHANGED
|
@@ -37,6 +37,16 @@ export const NEWS_HOSTS = [
|
|
|
37
37
|
"zdnet.com",
|
|
38
38
|
];
|
|
39
39
|
|
|
40
|
+
export const SOCIAL_HOSTS = [
|
|
41
|
+
"facebook.com",
|
|
42
|
+
"instagram.com",
|
|
43
|
+
"linkedin.com",
|
|
44
|
+
"pinterest.com",
|
|
45
|
+
"tiktok.com",
|
|
46
|
+
"twitter.com",
|
|
47
|
+
"x.com",
|
|
48
|
+
];
|
|
49
|
+
|
|
40
50
|
export function trimText(text = "", maxChars = 240) {
|
|
41
51
|
const clean = String(text).replaceAll(/\s+/g, " ").trim();
|
|
42
52
|
if (clean.length <= maxChars) return clean;
|
|
@@ -122,6 +132,7 @@ export function classifySourceType(domain, title = "", rawUrl = "") {
|
|
|
122
132
|
const lowerUrl = rawUrl.toLowerCase();
|
|
123
133
|
|
|
124
134
|
if (domain === "github.com" || domain === "gitlab.com") return "repo";
|
|
135
|
+
if (matchesDomain(domain, SOCIAL_HOSTS)) return "social";
|
|
125
136
|
if (matchesDomain(domain, COMMUNITY_HOSTS)) return "community";
|
|
126
137
|
if (matchesDomain(domain, NEWS_HOSTS)) return "news";
|
|
127
138
|
if (
|
|
@@ -157,6 +168,8 @@ export function sourceTypePriority(sourceType) {
|
|
|
157
168
|
return 1;
|
|
158
169
|
case "news":
|
|
159
170
|
return 0;
|
|
171
|
+
case "social":
|
|
172
|
+
return -6;
|
|
160
173
|
default:
|
|
161
174
|
return 0;
|
|
162
175
|
}
|
|
@@ -308,6 +321,10 @@ export function inferPreferredDomains(query) {
|
|
|
308
321
|
if (normalized.includes("gemini") || normalized.includes("google ai")) {
|
|
309
322
|
matches.push("ai.google.dev", "developers.google.com");
|
|
310
323
|
}
|
|
324
|
+
for (const socialHost of SOCIAL_HOSTS) {
|
|
325
|
+
const bareName = socialHost.replace(/\.com$/, "");
|
|
326
|
+
if (normalized.includes(bareName)) matches.push(socialHost);
|
|
327
|
+
}
|
|
311
328
|
|
|
312
329
|
return [...new Set(matches)];
|
|
313
330
|
}
|
|
@@ -359,10 +376,15 @@ export function buildSourceRegistry(out, query = "") {
|
|
|
359
376
|
smartScore += 2;
|
|
360
377
|
}
|
|
361
378
|
|
|
362
|
-
// Penalize discussion
|
|
363
|
-
//
|
|
364
|
-
//
|
|
365
|
-
|
|
379
|
+
// Penalize discussion/social sites for technical queries — high noise,
|
|
380
|
+
// hard to fetch cleanly, and rarely canonical. Q&A sites (StackOverflow,
|
|
381
|
+
// StackExchange) are excluded from the community penalty.
|
|
382
|
+
const queryTargetsSocialHost = preferredDomains.some((pd) =>
|
|
383
|
+
domainMatches(domain, pd),
|
|
384
|
+
);
|
|
385
|
+
if (sourceType === "social" && !queryTargetsSocialHost) {
|
|
386
|
+
smartScore -= 12;
|
|
387
|
+
}
|
|
366
388
|
if (preferredDomains.length > 0) {
|
|
367
389
|
if (matchesDomain(domain, DISCUSSION_HOSTS)) {
|
|
368
390
|
smartScore -= 3;
|
|
@@ -15,16 +15,10 @@ const __dir =
|
|
|
15
15
|
import.meta.dirname ||
|
|
16
16
|
new URL(".", import.meta.url).pathname.replace(/^\/([A-Z]:)/, "$1");
|
|
17
17
|
|
|
18
|
-
export async function
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
{ grounded = false, tabPrefix = null } = {},
|
|
18
|
+
export async function runGeminiPrompt(
|
|
19
|
+
prompt,
|
|
20
|
+
{ tabPrefix = null, timeoutMs = 180000 } = {},
|
|
22
21
|
) {
|
|
23
|
-
const sources = Array.isArray(results._sources)
|
|
24
|
-
? results._sources
|
|
25
|
-
: buildSourceRegistry(results);
|
|
26
|
-
const prompt = buildSynthesisPrompt(query, results, sources, { grounded });
|
|
27
|
-
|
|
28
22
|
return new Promise((resolve, reject) => {
|
|
29
23
|
const extraArgs = tabPrefix ? ["--tab", String(tabPrefix)] : [];
|
|
30
24
|
const proc = spawn(
|
|
@@ -39,7 +33,7 @@ export async function synthesizeWithGemini(
|
|
|
39
33
|
env: { ...process.env, CDP_PROFILE_DIR: GREEDY_PROFILE_DIR },
|
|
40
34
|
},
|
|
41
35
|
);
|
|
42
|
-
// Pipe
|
|
36
|
+
// Pipe prompts via stdin to avoid leaking them in process tables.
|
|
43
37
|
proc.stdin.write(prompt);
|
|
44
38
|
proc.stdin.end();
|
|
45
39
|
let out = "";
|
|
@@ -48,49 +42,61 @@ export async function synthesizeWithGemini(
|
|
|
48
42
|
proc.stderr.on("data", (d) => (err += d));
|
|
49
43
|
const t = setTimeout(() => {
|
|
50
44
|
proc.kill();
|
|
51
|
-
reject(new Error(
|
|
52
|
-
},
|
|
45
|
+
reject(new Error(`Gemini prompt timed out after ${timeoutMs / 1000}s`));
|
|
46
|
+
}, timeoutMs);
|
|
53
47
|
proc.on("close", (code) => {
|
|
54
48
|
clearTimeout(t);
|
|
55
|
-
if (code !== 0)
|
|
49
|
+
if (code !== 0) {
|
|
56
50
|
reject(new Error(err.trim() || "gemini extractor failed"));
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
// Happens when Gemini can't synthesize (e.g. only 1 engine responded) and
|
|
64
|
-
// echoes the prompt JSON. The engine summary JSON has per-engine keys
|
|
65
|
-
// (perplexity/bing/google) but no synthesis fields (answer/agreement).
|
|
66
|
-
const SYNTHESIS_FIELDS = [
|
|
67
|
-
"answer",
|
|
68
|
-
"agreement",
|
|
69
|
-
"claims",
|
|
70
|
-
"differences",
|
|
71
|
-
"caveats",
|
|
72
|
-
];
|
|
73
|
-
const hasSynthesisFields =
|
|
74
|
-
structured && SYNTHESIS_FIELDS.some((f) => f in structured);
|
|
75
|
-
const hasEngineKeys =
|
|
76
|
-
structured &&
|
|
77
|
-
["perplexity", "bing", "google"].some((e) => e in structured);
|
|
78
|
-
if (hasEngineKeys && !hasSynthesisFields) {
|
|
79
|
-
structured = null; // Treat as parse failure — Gemini echoed input
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
resolve({
|
|
83
|
-
...normalizeSynthesisPayload(structured, sources, raw.answer || ""),
|
|
84
|
-
rawAnswer: raw.answer || "",
|
|
85
|
-
geminiSources: raw.sources || [],
|
|
86
|
-
});
|
|
87
|
-
} catch {
|
|
88
|
-
reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`));
|
|
89
|
-
}
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
try {
|
|
54
|
+
resolve(JSON.parse(out.trim()));
|
|
55
|
+
} catch {
|
|
56
|
+
reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`));
|
|
90
57
|
}
|
|
91
58
|
});
|
|
92
59
|
});
|
|
93
60
|
}
|
|
94
61
|
|
|
62
|
+
export async function synthesizeWithGemini(
|
|
63
|
+
query,
|
|
64
|
+
results,
|
|
65
|
+
{ grounded = false, tabPrefix = null } = {},
|
|
66
|
+
) {
|
|
67
|
+
const sources = Array.isArray(results._sources)
|
|
68
|
+
? results._sources
|
|
69
|
+
: buildSourceRegistry(results);
|
|
70
|
+
const prompt = buildSynthesisPrompt(query, results, sources, { grounded });
|
|
71
|
+
|
|
72
|
+
const raw = await runGeminiPrompt(prompt, { tabPrefix, timeoutMs: 180000 });
|
|
73
|
+
let structured = parseStructuredJson(raw.answer || "");
|
|
74
|
+
|
|
75
|
+
// Detect if Gemini echoed back the engine summaries instead of a synthesis.
|
|
76
|
+
// Happens when Gemini can't synthesize (e.g. only 1 engine responded) and
|
|
77
|
+
// echoes the prompt JSON. The engine summary JSON has per-engine keys
|
|
78
|
+
// (perplexity/bing/google) but no synthesis fields (answer/agreement).
|
|
79
|
+
const SYNTHESIS_FIELDS = [
|
|
80
|
+
"answer",
|
|
81
|
+
"agreement",
|
|
82
|
+
"claims",
|
|
83
|
+
"differences",
|
|
84
|
+
"caveats",
|
|
85
|
+
];
|
|
86
|
+
const hasSynthesisFields =
|
|
87
|
+
structured && SYNTHESIS_FIELDS.some((f) => f in structured);
|
|
88
|
+
const hasEngineKeys =
|
|
89
|
+
structured && ["perplexity", "bing", "google"].some((e) => e in structured);
|
|
90
|
+
if (hasEngineKeys && !hasSynthesisFields) {
|
|
91
|
+
structured = null; // Treat as parse failure — Gemini echoed input
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
...normalizeSynthesisPayload(structured, sources, raw.answer || ""),
|
|
96
|
+
rawAnswer: raw.answer || "",
|
|
97
|
+
geminiSources: raw.sources || [],
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
95
101
|
// Need to import buildSourceRegistry for fallback
|
|
96
102
|
import { buildSourceRegistry } from "./sources.mjs";
|
|
@@ -2,9 +2,11 @@
|
|
|
2
2
|
* greedy_search tool handler — multi-engine AI web search
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
6
|
-
import { Text } from "@earendil-works/pi-tui";
|
|
7
5
|
import { Type } from "@sinclair/typebox";
|
|
6
|
+
|
|
7
|
+
type ExtensionAPI = {
|
|
8
|
+
registerTool(tool: Record<string, unknown>): void;
|
|
9
|
+
};
|
|
8
10
|
import { formatResults } from "../formatters/results.js";
|
|
9
11
|
import {
|
|
10
12
|
ALL_ENGINES,
|
|
@@ -16,6 +18,35 @@ import {
|
|
|
16
18
|
stripQuotes,
|
|
17
19
|
} from "./shared.js";
|
|
18
20
|
|
|
21
|
+
class Text {
|
|
22
|
+
constructor(
|
|
23
|
+
private text: string,
|
|
24
|
+
private paddingX = 0,
|
|
25
|
+
private paddingY = 0,
|
|
26
|
+
) {}
|
|
27
|
+
|
|
28
|
+
render(width: number): string[] {
|
|
29
|
+
const horizontal = " ".repeat(this.paddingX);
|
|
30
|
+
const blank = "";
|
|
31
|
+
const contentWidth = Math.max(1, width - this.paddingX * 2);
|
|
32
|
+
const lines = this.text.split("\n").flatMap((line) => {
|
|
33
|
+
if (line.length <= contentWidth) return [`${horizontal}${line}`];
|
|
34
|
+
const wrapped: string[] = [];
|
|
35
|
+
for (let i = 0; i < line.length; i += contentWidth) {
|
|
36
|
+
wrapped.push(`${horizontal}${line.slice(i, i + contentWidth)}`);
|
|
37
|
+
}
|
|
38
|
+
return wrapped;
|
|
39
|
+
});
|
|
40
|
+
return [
|
|
41
|
+
...Array.from({ length: this.paddingY }, () => blank),
|
|
42
|
+
...lines,
|
|
43
|
+
...Array.from({ length: this.paddingY }, () => blank),
|
|
44
|
+
];
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
invalidate() {}
|
|
48
|
+
}
|
|
49
|
+
|
|
19
50
|
export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
|
|
20
51
|
pi.registerTool({
|
|
21
52
|
name: "greedy_search",
|
|
@@ -35,9 +66,29 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
|
|
|
35
66
|
}),
|
|
36
67
|
depth: Type.String({
|
|
37
68
|
description:
|
|
38
|
-
'Search depth: "fast" (no synthesis/source fetch, ~15-30s), "standard" (synthesis + sources, ~30-90s), "deep" (
|
|
69
|
+
'Search depth: "fast" (no synthesis/source fetch, ~15-30s), "standard" (synthesis + sources, ~30-90s), "deep" (stronger grounding, ~60-180s), "research" (iterative query/learnings loop; slowest). Default: "standard". Note: single-engine searches default to fast unless depth is "research".',
|
|
39
70
|
default: "standard",
|
|
40
71
|
}),
|
|
72
|
+
breadth: Type.Optional(
|
|
73
|
+
Type.Number({
|
|
74
|
+
description:
|
|
75
|
+
'Only for depth="research": number of parallel research directions per round, 1-5 (default: 3).',
|
|
76
|
+
default: 3,
|
|
77
|
+
}),
|
|
78
|
+
),
|
|
79
|
+
iterations: Type.Optional(
|
|
80
|
+
Type.Number({
|
|
81
|
+
description:
|
|
82
|
+
'Only for depth="research": number of iterative research rounds, 1-3 (default: 2).',
|
|
83
|
+
default: 2,
|
|
84
|
+
}),
|
|
85
|
+
),
|
|
86
|
+
maxSources: Type.Optional(
|
|
87
|
+
Type.Number({
|
|
88
|
+
description:
|
|
89
|
+
'Only for depth="research": maximum fetched sources for the final report, 3-12.',
|
|
90
|
+
}),
|
|
91
|
+
),
|
|
41
92
|
fullAnswer: Type.Optional(
|
|
42
93
|
Type.Boolean({
|
|
43
94
|
description:
|
|
@@ -71,7 +122,10 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
|
|
|
71
122
|
const { query, fullAnswer: fullAnswerParam } = params as {
|
|
72
123
|
query: string;
|
|
73
124
|
engine: string;
|
|
74
|
-
depth?: "fast" | "standard" | "deep";
|
|
125
|
+
depth?: "fast" | "standard" | "deep" | "research";
|
|
126
|
+
breadth?: number;
|
|
127
|
+
iterations?: number;
|
|
128
|
+
maxSources?: number;
|
|
75
129
|
fullAnswer?: boolean;
|
|
76
130
|
headless?: boolean;
|
|
77
131
|
visible?: boolean;
|
|
@@ -79,7 +133,8 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
|
|
|
79
133
|
};
|
|
80
134
|
const engine = stripQuotes((params as any).engine ?? "all") || "all";
|
|
81
135
|
const depth = (stripQuotes((params as any).depth ?? "standard") ||
|
|
82
|
-
"standard") as "fast" | "standard" | "deep";
|
|
136
|
+
"standard") as "fast" | "standard" | "deep" | "research";
|
|
137
|
+
const effectiveEngine = depth === "research" ? "all" : engine;
|
|
83
138
|
const visible =
|
|
84
139
|
(params as any).visible === true ||
|
|
85
140
|
(params as any).alwaysVisible === true ||
|
|
@@ -91,21 +146,34 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
|
|
|
91
146
|
if (!cdpAvailable(baseDir)) return cdpMissingResult();
|
|
92
147
|
|
|
93
148
|
const flags: string[] = [];
|
|
94
|
-
const fullAnswer = fullAnswerParam ??
|
|
149
|
+
const fullAnswer = fullAnswerParam ?? effectiveEngine !== "all";
|
|
95
150
|
if (fullAnswer) flags.push("--full");
|
|
96
|
-
if (depth === "
|
|
151
|
+
if (depth === "research") {
|
|
152
|
+
flags.push("--depth", "research");
|
|
153
|
+
if (typeof (params as any).breadth === "number")
|
|
154
|
+
flags.push("--breadth", String((params as any).breadth));
|
|
155
|
+
if (typeof (params as any).iterations === "number")
|
|
156
|
+
flags.push("--iterations", String((params as any).iterations));
|
|
157
|
+
if (typeof (params as any).maxSources === "number")
|
|
158
|
+
flags.push("--max-sources", String((params as any).maxSources));
|
|
159
|
+
} else if (depth === "deep") flags.push("--depth", "deep");
|
|
97
160
|
else if (depth === "fast") flags.push("--fast");
|
|
98
161
|
else if (depth === "standard" && engine === "all")
|
|
99
162
|
flags.push("--synthesize");
|
|
100
163
|
|
|
101
164
|
const onProgress =
|
|
102
|
-
|
|
103
|
-
? makeProgressTracker(
|
|
165
|
+
effectiveEngine === "all"
|
|
166
|
+
? makeProgressTracker(
|
|
167
|
+
ALL_ENGINES,
|
|
168
|
+
onUpdate,
|
|
169
|
+
depth === "research" ? "Researching" : "Searching",
|
|
170
|
+
depth,
|
|
171
|
+
)
|
|
104
172
|
: undefined;
|
|
105
173
|
|
|
106
174
|
try {
|
|
107
175
|
const data = await runSearch(
|
|
108
|
-
|
|
176
|
+
effectiveEngine,
|
|
109
177
|
query,
|
|
110
178
|
flags,
|
|
111
179
|
`${baseDir}/bin/search.mjs`,
|
|
@@ -113,7 +181,7 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
|
|
|
113
181
|
onProgress,
|
|
114
182
|
headless,
|
|
115
183
|
);
|
|
116
|
-
const text = formatResults(
|
|
184
|
+
const text = formatResults(effectiveEngine, data);
|
|
117
185
|
return {
|
|
118
186
|
content: [{ type: "text", text: text || "No results returned." }],
|
|
119
187
|
details: { raw: data },
|
|
@@ -139,7 +207,9 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
|
|
|
139
207
|
|
|
140
208
|
renderResult(result, { expanded, isPartial }, theme) {
|
|
141
209
|
if (isPartial) {
|
|
142
|
-
const progressText = (
|
|
210
|
+
const progressText = (
|
|
211
|
+
result.content.find((c) => c.type === "text") as any
|
|
212
|
+
)?.text as string | undefined;
|
|
143
213
|
const display = progressText
|
|
144
214
|
? progressText.replace(/\*\*/g, "")
|
|
145
215
|
: "Searching...";
|
|
@@ -170,7 +240,9 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
|
|
|
170
240
|
const sources = raw?._sources as Array<unknown> | undefined;
|
|
171
241
|
if (synthesis) {
|
|
172
242
|
const sourceCount = Array.isArray(sources) ? sources.length : 0;
|
|
173
|
-
const agreement = (
|
|
243
|
+
const agreement = (
|
|
244
|
+
synthesis.agreement as Record<string, unknown> | undefined
|
|
245
|
+
)?.level as string | undefined;
|
|
174
246
|
let summary = " → Synthesized";
|
|
175
247
|
if (sourceCount > 0)
|
|
176
248
|
summary += ` · ${sourceCount} source${sourceCount > 1 ? "s" : ""}`;
|