@apmantza/greedysearch-pi 1.9.1 → 1.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,6 +37,16 @@ export const NEWS_HOSTS = [
37
37
  "zdnet.com",
38
38
  ];
39
39
 
40
+ export const SOCIAL_HOSTS = [
41
+ "facebook.com",
42
+ "instagram.com",
43
+ "linkedin.com",
44
+ "pinterest.com",
45
+ "tiktok.com",
46
+ "twitter.com",
47
+ "x.com",
48
+ ];
49
+
40
50
  export function trimText(text = "", maxChars = 240) {
41
51
  const clean = String(text).replaceAll(/\s+/g, " ").trim();
42
52
  if (clean.length <= maxChars) return clean;
@@ -122,6 +132,7 @@ export function classifySourceType(domain, title = "", rawUrl = "") {
122
132
  const lowerUrl = rawUrl.toLowerCase();
123
133
 
124
134
  if (domain === "github.com" || domain === "gitlab.com") return "repo";
135
+ if (matchesDomain(domain, SOCIAL_HOSTS)) return "social";
125
136
  if (matchesDomain(domain, COMMUNITY_HOSTS)) return "community";
126
137
  if (matchesDomain(domain, NEWS_HOSTS)) return "news";
127
138
  if (
@@ -157,6 +168,8 @@ export function sourceTypePriority(sourceType) {
157
168
  return 1;
158
169
  case "news":
159
170
  return 0;
171
+ case "social":
172
+ return -6;
160
173
  default:
161
174
  return 0;
162
175
  }
@@ -308,6 +321,10 @@ export function inferPreferredDomains(query) {
308
321
  if (normalized.includes("gemini") || normalized.includes("google ai")) {
309
322
  matches.push("ai.google.dev", "developers.google.com");
310
323
  }
324
+ for (const socialHost of SOCIAL_HOSTS) {
325
+ const bareName = socialHost.replace(/\.com$/, "");
326
+ if (normalized.includes(bareName)) matches.push(socialHost);
327
+ }
311
328
 
312
329
  return [...new Set(matches)];
313
330
  }
@@ -359,10 +376,15 @@ export function buildSourceRegistry(out, query = "") {
359
376
  smartScore += 2;
360
377
  }
361
378
 
362
- // Penalize discussion forums for technical queries — high noise, rarely canonical.
363
- // Q&A sites (stackoverflow, stackexchange) are excluded: they often have the
364
- // best practical answer and shouldn't be penalised just because an official
365
- // domain also exists.
379
+ // Penalize discussion/social sites for technical queries — high noise,
380
+ // hard to fetch cleanly, and rarely canonical. Q&A sites (StackOverflow,
381
+ // StackExchange) are excluded from the community penalty.
382
+ const queryTargetsSocialHost = preferredDomains.some((pd) =>
383
+ domainMatches(domain, pd),
384
+ );
385
+ if (sourceType === "social" && !queryTargetsSocialHost) {
386
+ smartScore -= 12;
387
+ }
366
388
  if (preferredDomains.length > 0) {
367
389
  if (matchesDomain(domain, DISCUSSION_HOSTS)) {
368
390
  smartScore -= 3;
@@ -15,16 +15,10 @@ const __dir =
15
15
  import.meta.dirname ||
16
16
  new URL(".", import.meta.url).pathname.replace(/^\/([A-Z]:)/, "$1");
17
17
 
18
- export async function synthesizeWithGemini(
19
- query,
20
- results,
21
- { grounded = false, tabPrefix = null } = {},
18
+ export async function runGeminiPrompt(
19
+ prompt,
20
+ { tabPrefix = null, timeoutMs = 180000 } = {},
22
21
  ) {
23
- const sources = Array.isArray(results._sources)
24
- ? results._sources
25
- : buildSourceRegistry(results);
26
- const prompt = buildSynthesisPrompt(query, results, sources, { grounded });
27
-
28
22
  return new Promise((resolve, reject) => {
29
23
  const extraArgs = tabPrefix ? ["--tab", String(tabPrefix)] : [];
30
24
  const proc = spawn(
@@ -39,7 +33,7 @@ export async function synthesizeWithGemini(
39
33
  env: { ...process.env, CDP_PROFILE_DIR: GREEDY_PROFILE_DIR },
40
34
  },
41
35
  );
42
- // Pipe synthesis prompt via stdin to avoid leaking the full prompt in process table
36
+ // Pipe prompts via stdin to avoid leaking them in process tables.
43
37
  proc.stdin.write(prompt);
44
38
  proc.stdin.end();
45
39
  let out = "";
@@ -48,49 +42,61 @@ export async function synthesizeWithGemini(
48
42
  proc.stderr.on("data", (d) => (err += d));
49
43
  const t = setTimeout(() => {
50
44
  proc.kill();
51
- reject(new Error("Gemini synthesis timed out after 180s"));
52
- }, 180000);
45
+ reject(new Error(`Gemini prompt timed out after ${timeoutMs / 1000}s`));
46
+ }, timeoutMs);
53
47
  proc.on("close", (code) => {
54
48
  clearTimeout(t);
55
- if (code !== 0)
49
+ if (code !== 0) {
56
50
  reject(new Error(err.trim() || "gemini extractor failed"));
57
- else {
58
- try {
59
- const raw = JSON.parse(out.trim());
60
- let structured = parseStructuredJson(raw.answer || "");
61
-
62
- // Detect if Gemini echoed back the engine summaries instead of a synthesis.
63
- // Happens when Gemini can't synthesize (e.g. only 1 engine responded) and
64
- // echoes the prompt JSON. The engine summary JSON has per-engine keys
65
- // (perplexity/bing/google) but no synthesis fields (answer/agreement).
66
- const SYNTHESIS_FIELDS = [
67
- "answer",
68
- "agreement",
69
- "claims",
70
- "differences",
71
- "caveats",
72
- ];
73
- const hasSynthesisFields =
74
- structured && SYNTHESIS_FIELDS.some((f) => f in structured);
75
- const hasEngineKeys =
76
- structured &&
77
- ["perplexity", "bing", "google"].some((e) => e in structured);
78
- if (hasEngineKeys && !hasSynthesisFields) {
79
- structured = null; // Treat as parse failure — Gemini echoed input
80
- }
81
-
82
- resolve({
83
- ...normalizeSynthesisPayload(structured, sources, raw.answer || ""),
84
- rawAnswer: raw.answer || "",
85
- geminiSources: raw.sources || [],
86
- });
87
- } catch {
88
- reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`));
89
- }
51
+ return;
52
+ }
53
+ try {
54
+ resolve(JSON.parse(out.trim()));
55
+ } catch {
56
+ reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`));
90
57
  }
91
58
  });
92
59
  });
93
60
  }
94
61
 
62
+ export async function synthesizeWithGemini(
63
+ query,
64
+ results,
65
+ { grounded = false, tabPrefix = null } = {},
66
+ ) {
67
+ const sources = Array.isArray(results._sources)
68
+ ? results._sources
69
+ : buildSourceRegistry(results);
70
+ const prompt = buildSynthesisPrompt(query, results, sources, { grounded });
71
+
72
+ const raw = await runGeminiPrompt(prompt, { tabPrefix, timeoutMs: 180000 });
73
+ let structured = parseStructuredJson(raw.answer || "");
74
+
75
+ // Detect if Gemini echoed back the engine summaries instead of a synthesis.
76
+ // Happens when Gemini can't synthesize (e.g. only 1 engine responded) and
77
+ // echoes the prompt JSON. The engine summary JSON has per-engine keys
78
+ // (perplexity/bing/google) but no synthesis fields (answer/agreement).
79
+ const SYNTHESIS_FIELDS = [
80
+ "answer",
81
+ "agreement",
82
+ "claims",
83
+ "differences",
84
+ "caveats",
85
+ ];
86
+ const hasSynthesisFields =
87
+ structured && SYNTHESIS_FIELDS.some((f) => f in structured);
88
+ const hasEngineKeys =
89
+ structured && ["perplexity", "bing", "google"].some((e) => e in structured);
90
+ if (hasEngineKeys && !hasSynthesisFields) {
91
+ structured = null; // Treat as parse failure — Gemini echoed input
92
+ }
93
+
94
+ return {
95
+ ...normalizeSynthesisPayload(structured, sources, raw.answer || ""),
96
+ rawAnswer: raw.answer || "",
97
+ geminiSources: raw.sources || [],
98
+ };
99
+ }
100
+
95
101
  // Need to import buildSourceRegistry for fallback
96
102
  import { buildSourceRegistry } from "./sources.mjs";
@@ -2,9 +2,11 @@
2
2
  * greedy_search tool handler — multi-engine AI web search
3
3
  */
4
4
 
5
- import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
6
- import { Text } from "@earendil-works/pi-tui";
7
5
  import { Type } from "@sinclair/typebox";
6
+
7
+ type ExtensionAPI = {
8
+ registerTool(tool: Record<string, unknown>): void;
9
+ };
8
10
  import { formatResults } from "../formatters/results.js";
9
11
  import {
10
12
  ALL_ENGINES,
@@ -16,6 +18,35 @@ import {
16
18
  stripQuotes,
17
19
  } from "./shared.js";
18
20
 
21
+ class Text {
22
+ constructor(
23
+ private text: string,
24
+ private paddingX = 0,
25
+ private paddingY = 0,
26
+ ) {}
27
+
28
+ render(width: number): string[] {
29
+ const horizontal = " ".repeat(this.paddingX);
30
+ const blank = "";
31
+ const contentWidth = Math.max(1, width - this.paddingX * 2);
32
+ const lines = this.text.split("\n").flatMap((line) => {
33
+ if (line.length <= contentWidth) return [`${horizontal}${line}`];
34
+ const wrapped: string[] = [];
35
+ for (let i = 0; i < line.length; i += contentWidth) {
36
+ wrapped.push(`${horizontal}${line.slice(i, i + contentWidth)}`);
37
+ }
38
+ return wrapped;
39
+ });
40
+ return [
41
+ ...Array.from({ length: this.paddingY }, () => blank),
42
+ ...lines,
43
+ ...Array.from({ length: this.paddingY }, () => blank),
44
+ ];
45
+ }
46
+
47
+ invalidate() {}
48
+ }
49
+
19
50
  export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
20
51
  pi.registerTool({
21
52
  name: "greedy_search",
@@ -35,9 +66,29 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
35
66
  }),
36
67
  depth: Type.String({
37
68
  description:
38
- 'Search depth: "fast" (no synthesis/source fetch, ~15-30s), "standard" (synthesis + sources, ~30-90s), "deep" (synthesis + source fetching + confidence, ~60-180s). Default: "standard". Note: single-engine searches always run in fast mode regardless of this setting — synthesis requires multiple engines.',
69
+ 'Search depth: "fast" (no synthesis/source fetch, ~15-30s), "standard" (synthesis + sources, ~30-90s), "deep" (stronger grounding, ~60-180s), "research" (iterative query/learnings loop; slowest). Default: "standard". Note: single-engine searches default to fast unless depth is "research".',
39
70
  default: "standard",
40
71
  }),
72
+ breadth: Type.Optional(
73
+ Type.Number({
74
+ description:
75
+ 'Only for depth="research": number of parallel research directions per round, 1-5 (default: 3).',
76
+ default: 3,
77
+ }),
78
+ ),
79
+ iterations: Type.Optional(
80
+ Type.Number({
81
+ description:
82
+ 'Only for depth="research": number of iterative research rounds, 1-3 (default: 2).',
83
+ default: 2,
84
+ }),
85
+ ),
86
+ maxSources: Type.Optional(
87
+ Type.Number({
88
+ description:
89
+ 'Only for depth="research": maximum fetched sources for the final report, 3-12.',
90
+ }),
91
+ ),
41
92
  fullAnswer: Type.Optional(
42
93
  Type.Boolean({
43
94
  description:
@@ -71,7 +122,10 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
71
122
  const { query, fullAnswer: fullAnswerParam } = params as {
72
123
  query: string;
73
124
  engine: string;
74
- depth?: "fast" | "standard" | "deep";
125
+ depth?: "fast" | "standard" | "deep" | "research";
126
+ breadth?: number;
127
+ iterations?: number;
128
+ maxSources?: number;
75
129
  fullAnswer?: boolean;
76
130
  headless?: boolean;
77
131
  visible?: boolean;
@@ -79,7 +133,8 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
79
133
  };
80
134
  const engine = stripQuotes((params as any).engine ?? "all") || "all";
81
135
  const depth = (stripQuotes((params as any).depth ?? "standard") ||
82
- "standard") as "fast" | "standard" | "deep";
136
+ "standard") as "fast" | "standard" | "deep" | "research";
137
+ const effectiveEngine = depth === "research" ? "all" : engine;
83
138
  const visible =
84
139
  (params as any).visible === true ||
85
140
  (params as any).alwaysVisible === true ||
@@ -91,21 +146,34 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
91
146
  if (!cdpAvailable(baseDir)) return cdpMissingResult();
92
147
 
93
148
  const flags: string[] = [];
94
- const fullAnswer = fullAnswerParam ?? engine !== "all";
149
+ const fullAnswer = fullAnswerParam ?? effectiveEngine !== "all";
95
150
  if (fullAnswer) flags.push("--full");
96
- if (depth === "deep") flags.push("--depth", "deep");
151
+ if (depth === "research") {
152
+ flags.push("--depth", "research");
153
+ if (typeof (params as any).breadth === "number")
154
+ flags.push("--breadth", String((params as any).breadth));
155
+ if (typeof (params as any).iterations === "number")
156
+ flags.push("--iterations", String((params as any).iterations));
157
+ if (typeof (params as any).maxSources === "number")
158
+ flags.push("--max-sources", String((params as any).maxSources));
159
+ } else if (depth === "deep") flags.push("--depth", "deep");
97
160
  else if (depth === "fast") flags.push("--fast");
98
161
  else if (depth === "standard" && engine === "all")
99
162
  flags.push("--synthesize");
100
163
 
101
164
  const onProgress =
102
- engine === "all"
103
- ? makeProgressTracker(ALL_ENGINES, onUpdate, "Searching", depth)
165
+ effectiveEngine === "all"
166
+ ? makeProgressTracker(
167
+ ALL_ENGINES,
168
+ onUpdate,
169
+ depth === "research" ? "Researching" : "Searching",
170
+ depth,
171
+ )
104
172
  : undefined;
105
173
 
106
174
  try {
107
175
  const data = await runSearch(
108
- engine,
176
+ effectiveEngine,
109
177
  query,
110
178
  flags,
111
179
  `${baseDir}/bin/search.mjs`,
@@ -113,7 +181,7 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
113
181
  onProgress,
114
182
  headless,
115
183
  );
116
- const text = formatResults(engine, data);
184
+ const text = formatResults(effectiveEngine, data);
117
185
  return {
118
186
  content: [{ type: "text", text: text || "No results returned." }],
119
187
  details: { raw: data },
@@ -139,7 +207,9 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
139
207
 
140
208
  renderResult(result, { expanded, isPartial }, theme) {
141
209
  if (isPartial) {
142
- const progressText = (result.content.find((c) => c.type === "text") as any)?.text as string | undefined;
210
+ const progressText = (
211
+ result.content.find((c) => c.type === "text") as any
212
+ )?.text as string | undefined;
143
213
  const display = progressText
144
214
  ? progressText.replace(/\*\*/g, "")
145
215
  : "Searching...";
@@ -170,7 +240,9 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
170
240
  const sources = raw?._sources as Array<unknown> | undefined;
171
241
  if (synthesis) {
172
242
  const sourceCount = Array.isArray(sources) ? sources.length : 0;
173
- const agreement = (synthesis.agreement as Record<string, unknown> | undefined)?.level as string | undefined;
243
+ const agreement = (
244
+ synthesis.agreement as Record<string, unknown> | undefined
245
+ )?.level as string | undefined;
174
246
  let summary = " → Synthesized";
175
247
  if (sourceCount > 0)
176
248
  summary += ` · ${sourceCount} source${sourceCount > 1 ? "s" : ""}`;