@apmantza/greedysearch-pi 1.8.0 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,64 +1,64 @@
1
- // src/search/synthesis-runner.mjs — Run Gemini synthesis via CDP
2
- //
3
- // Extracted from search.mjs.
4
-
5
- import { spawn } from "node:child_process";
6
- import { join } from "node:path";
7
- import { GREEDY_PROFILE_DIR } from "./constants.mjs";
8
- import { parseStructuredJson, normalizeSynthesisPayload, buildSynthesisPrompt } from "./synthesis.mjs";
9
- import { cdp, openNewTab, closeTab, activateTab } from "./chrome.mjs";
10
- import { trimText } from "./sources.mjs";
11
-
12
- const __dir = import.meta.dirname || new URL(".", import.meta.url).pathname.replace(/^\/([A-Z]:)/, "$1");
13
-
14
- export async function synthesizeWithGemini(
15
- query,
16
- results,
17
- { grounded = false, tabPrefix = null } = {},
18
- ) {
19
- const sources = Array.isArray(results._sources)
20
- ? results._sources
21
- : buildSourceRegistry(results);
22
- const prompt = buildSynthesisPrompt(query, results, sources, { grounded });
23
-
24
- return new Promise((resolve, reject) => {
25
- const extraArgs = tabPrefix ? ["--tab", String(tabPrefix)] : [];
26
- const proc = spawn(
27
- "node",
28
- [join(__dir, "..", "..", "extractors", "gemini.mjs"), prompt, ...extraArgs],
29
- {
30
- stdio: ["ignore", "pipe", "pipe"],
31
- env: { ...process.env, CDP_PROFILE_DIR: GREEDY_PROFILE_DIR },
32
- },
33
- );
34
- let out = "";
35
- let err = "";
36
- proc.stdout.on("data", (d) => (out += d));
37
- proc.stderr.on("data", (d) => (err += d));
38
- const t = setTimeout(() => {
39
- proc.kill();
40
- reject(new Error("Gemini synthesis timed out after 180s"));
41
- }, 180000);
42
- proc.on("close", (code) => {
43
- clearTimeout(t);
44
- if (code !== 0)
45
- reject(new Error(err.trim() || "gemini extractor failed"));
46
- else {
47
- try {
48
- const raw = JSON.parse(out.trim());
49
- const structured = parseStructuredJson(raw.answer || "");
50
- resolve({
51
- ...normalizeSynthesisPayload(structured, sources, raw.answer || ""),
52
- rawAnswer: raw.answer || "",
53
- geminiSources: raw.sources || [],
54
- });
55
- } catch {
56
- reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`));
57
- }
58
- }
59
- });
60
- });
61
- }
62
-
63
- // Need to import buildSourceRegistry for fallback
1
+ // src/search/synthesis-runner.mjs — Run Gemini synthesis via CDP
2
+ //
3
+ // Extracted from search.mjs.
4
+
5
+ import { spawn } from "node:child_process";
6
+ import { join } from "node:path";
7
+ import { GREEDY_PROFILE_DIR } from "./constants.mjs";
8
+ import { parseStructuredJson, normalizeSynthesisPayload, buildSynthesisPrompt } from "./synthesis.mjs";
9
+ import { cdp, openNewTab, closeTab, activateTab } from "./chrome.mjs";
10
+ import { trimText } from "./sources.mjs";
11
+
12
+ const __dir = import.meta.dirname || new URL(".", import.meta.url).pathname.replace(/^\/([A-Z]:)/, "$1");
13
+
14
+ export async function synthesizeWithGemini(
15
+ query,
16
+ results,
17
+ { grounded = false, tabPrefix = null } = {},
18
+ ) {
19
+ const sources = Array.isArray(results._sources)
20
+ ? results._sources
21
+ : buildSourceRegistry(results);
22
+ const prompt = buildSynthesisPrompt(query, results, sources, { grounded });
23
+
24
+ return new Promise((resolve, reject) => {
25
+ const extraArgs = tabPrefix ? ["--tab", String(tabPrefix)] : [];
26
+ const proc = spawn(
27
+ "node",
28
+ [join(__dir, "..", "..", "extractors", "gemini.mjs"), prompt, ...extraArgs],
29
+ {
30
+ stdio: ["ignore", "pipe", "pipe"],
31
+ env: { ...process.env, CDP_PROFILE_DIR: GREEDY_PROFILE_DIR },
32
+ },
33
+ );
34
+ let out = "";
35
+ let err = "";
36
+ proc.stdout.on("data", (d) => (out += d));
37
+ proc.stderr.on("data", (d) => (err += d));
38
+ const t = setTimeout(() => {
39
+ proc.kill();
40
+ reject(new Error("Gemini synthesis timed out after 180s"));
41
+ }, 180000);
42
+ proc.on("close", (code) => {
43
+ clearTimeout(t);
44
+ if (code !== 0)
45
+ reject(new Error(err.trim() || "gemini extractor failed"));
46
+ else {
47
+ try {
48
+ const raw = JSON.parse(out.trim());
49
+ const structured = parseStructuredJson(raw.answer || "");
50
+ resolve({
51
+ ...normalizeSynthesisPayload(structured, sources, raw.answer || ""),
52
+ rawAnswer: raw.answer || "",
53
+ geminiSources: raw.sources || [],
54
+ });
55
+ } catch {
56
+ reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`));
57
+ }
58
+ }
59
+ });
60
+ });
61
+ }
62
+
63
+ // Need to import buildSourceRegistry for fallback
64
64
  import { buildSourceRegistry } from "./sources.mjs";
@@ -3,12 +3,26 @@
3
3
  //
4
4
  // Extracted from search.mjs to reduce file complexity.
5
5
 
6
- import { trimText } from "./sources.mjs";
7
6
  import { ALL_ENGINES } from "./constants.mjs";
7
+ import { trimText } from "./sources.mjs";
8
8
 
9
9
  export function parseStructuredJson(text) {
10
10
  if (!text) return null;
11
- const trimmed = String(text).trim();
11
+ let trimmed = String(text).trim();
12
+
13
+ // Look for BEGIN_JSON/END_JSON markers first
14
+ const beginIdx = trimmed.indexOf("BEGIN_JSON");
15
+ const endIdx = trimmed.indexOf("END_JSON");
16
+ if (beginIdx !== -1 && endIdx !== -1 && beginIdx < endIdx) {
17
+ trimmed = trimmed.slice(beginIdx + "BEGIN_JSON".length, endIdx).trim();
18
+ } else {
19
+ // Strip out common LLM preamble text before the actual JSON
20
+ const jsonStart = trimmed.indexOf("{");
21
+ if (jsonStart > 0) {
22
+ trimmed = trimmed.slice(jsonStart);
23
+ }
24
+ }
25
+
12
26
  const candidates = [
13
27
  trimmed,
14
28
  trimmed
@@ -18,7 +32,7 @@ export function parseStructuredJson(text) {
18
32
  .trim(),
19
33
  ];
20
34
 
21
- const objectMatch = trimmed.match(/\{[\s\S]*\}/);
35
+ const objectMatch = trimmed.match(/\{[\s\S]*\}$/);
22
36
  if (objectMatch) candidates.push(objectMatch[0]);
23
37
 
24
38
  for (const candidate of candidates) {
@@ -31,7 +45,11 @@ export function parseStructuredJson(text) {
31
45
  return null;
32
46
  }
33
47
 
34
- export function normalizeSynthesisPayload(payload, sources, fallbackAnswer = "") {
48
+ export function normalizeSynthesisPayload(
49
+ payload,
50
+ sources,
51
+ fallbackAnswer = "",
52
+ ) {
35
53
  const sourceIds = new Set(sources.map((source) => source.id));
36
54
  const agreementLevel = [
37
55
  "high",
@@ -61,8 +79,13 @@ export function normalizeSynthesisPayload(payload, sources, fallbackAnswer = "")
61
79
  ? payload.recommendedSources.filter((id) => sourceIds.has(id)).slice(0, 6)
62
80
  : [];
63
81
 
82
+ // Clean up fallback answer if it contains preamble text
83
+ const cleanFallback = fallbackAnswer
84
+ ? fallbackAnswer.replace(/^[\s\S]*?\{/m, "{").replace(/}\s*[\s\S]*$/m, "}")
85
+ : "";
86
+
64
87
  return {
65
- answer: trimText(payload?.answer || fallbackAnswer, 4000),
88
+ answer: trimText(payload?.answer || cleanFallback || fallbackAnswer, 4000),
66
89
  agreement: {
67
90
  level: agreementLevel,
68
91
  summary: trimText(payload?.agreement?.summary || "", 280),
@@ -127,44 +150,32 @@ export function buildSynthesisPrompt(
127
150
  engines: source.engines,
128
151
  engineCount: source.engineCount,
129
152
  perEngine: source.perEngine,
130
- fetch:
131
- source.fetch?.attempted
132
- ? {
133
- ok: source.fetch.ok,
134
- status: source.fetch.status,
135
- publishedTime: source.fetch.publishedTime || "",
136
- lastModified: source.fetch.lastModified || "",
137
- byline: source.fetch.byline || "",
138
- siteName: source.fetch.siteName || "",
139
- ...(grounded ? { snippet: trimText(source.fetch.snippet || "", 700) } : {}),
140
- }
141
- : undefined,
153
+ fetch: source.fetch?.attempted
154
+ ? {
155
+ ok: source.fetch.ok,
156
+ status: source.fetch.status,
157
+ publishedTime: source.fetch.publishedTime || "",
158
+ lastModified: source.fetch.lastModified || "",
159
+ byline: source.fetch.byline || "",
160
+ siteName: source.fetch.siteName || "",
161
+ ...(grounded
162
+ ? { snippet: trimText(source.fetch.snippet || "", 700) }
163
+ : {}),
164
+ }
165
+ : undefined,
142
166
  }));
143
167
 
144
168
  return [
145
- "You are synthesizing results from Perplexity, Bing Copilot, and Google AI.",
146
- grounded
147
- ? "Use the fetched source snippets as the strongest evidence. Use engine answers for perspective and conflict detection."
148
- : "Use the engine answers for perspective. Use the source registry for provenance and citations.",
149
- "Prefer official docs, release notes, repositories, and maintainer-authored sources when available.",
150
- "When publishedTime or lastModified is available, flag sources older than 2 years as potentially stale in caveats.",
151
- "If the engines disagree, say so explicitly.",
152
- "Do not invent sources. Only reference source IDs from the source registry.",
153
- "Return valid JSON only. No markdown fences, no prose outside the JSON object.",
169
+ "Synthesize the following search results into a concise answer.",
170
+ "Compare the three engine responses (Perplexity, Bing, Google) and identify:",
171
+ "1. The main answer to the query",
172
+ "2. Where the engines agree",
173
+ "3. Where they disagree (if anywhere)",
174
+ "4. Any caveats or limitations",
175
+ "Use source IDs like S1, S2 when citing sources.",
176
+ "Format: Start with a brief answer, then list key points.",
154
177
  "",
155
- "JSON schema:",
156
- "{",
157
- ' "answer": "short direct answer",',
158
- ' "agreement": { "level": "high|medium|low|mixed|conflicting", "summary": "..." },',
159
- ' "differences": ["..."],',
160
- ' "caveats": ["..."],',
161
- ' "claims": [',
162
- ' { "claim": "...", "support": "strong|moderate|weak|conflicting", "sourceIds": ["S1"] }',
163
- " ],",
164
- ' "recommendedSources": ["S1", "S2"]',
165
- "}",
166
- "",
167
- `User query: ${query}`,
178
+ `Query: ${query}`,
168
179
  "",
169
180
  `Engine results:\n${JSON.stringify(engineSummaries, null, 2)}`,
170
181
  "",
@@ -209,4 +220,4 @@ export function buildConfidence(out) {
209
220
  : 0,
210
221
  sourceTypeBreakdown,
211
222
  };
212
- }
223
+ }
@@ -1,37 +1,37 @@
1
- /**
2
- * deep_research tool handler — legacy alias to greedy_search with depth: deep
3
- */
4
-
5
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
6
- import { Type } from "@sinclair/typebox";
7
- import { formatDeepResearch } from "../formatters/results.js";
8
- import { ALL_ENGINES, cdpAvailable, cdpMissingResult, errorResult, makeProgressTracker, runSearch } from "./shared.js";
9
-
10
- export function registerDeepResearchTool(pi: ExtensionAPI, baseDir: string) {
11
- pi.registerTool({
12
- name: "deep_research",
13
- label: "Deep Research (legacy)",
14
- description:
15
- "DEPRECATED — Use greedy_search with depth: 'deep' instead. " +
16
- "Comprehensive multi-engine research with source fetching and synthesis.",
17
- promptSnippet: "Deep multi-engine research (legacy alias to greedy_search)",
18
- parameters: Type.Object({
19
- query: Type.String({ description: "The research question" }),
20
- }),
21
- execute: async (_toolCallId, params, signal, onUpdate) => {
22
- const { query } = params as { query: string };
23
-
24
- if (!cdpAvailable(baseDir)) return cdpMissingResult();
25
-
26
- const onProgress = makeProgressTracker(ALL_ENGINES, onUpdate, "Researching", "standard");
27
-
28
- try {
29
- const data = await runSearch("all", query, ["--deep"], `${baseDir}/bin/search.mjs`, signal, onProgress);
30
- const text = formatDeepResearch(data);
31
- return { content: [{ type: "text", text: text || "No results returned." }], details: { raw: data } };
32
- } catch (e) {
33
- return errorResult("Deep research failed", e);
34
- }
35
- },
36
- });
1
+ /**
2
+ * deep_research tool handler — legacy alias to greedy_search with depth: deep
3
+ */
4
+
5
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
6
+ import { Type } from "@sinclair/typebox";
7
+ import { formatDeepResearch } from "../formatters/results.js";
8
+ import { ALL_ENGINES, cdpAvailable, cdpMissingResult, errorResult, makeProgressTracker, runSearch } from "./shared.js";
9
+
10
+ export function registerDeepResearchTool(pi: ExtensionAPI, baseDir: string) {
11
+ pi.registerTool({
12
+ name: "deep_research",
13
+ label: "Deep Research (legacy)",
14
+ description:
15
+ "DEPRECATED — Use greedy_search with depth: 'deep' instead. " +
16
+ "Comprehensive multi-engine research with source fetching and synthesis.",
17
+ promptSnippet: "Deep multi-engine research (legacy alias to greedy_search)",
18
+ parameters: Type.Object({
19
+ query: Type.String({ description: "The research question" }),
20
+ }),
21
+ execute: async (_toolCallId, params, signal, onUpdate) => {
22
+ const { query } = params as { query: string };
23
+
24
+ if (!cdpAvailable(baseDir)) return cdpMissingResult();
25
+
26
+ const onProgress = makeProgressTracker(ALL_ENGINES, onUpdate, "Researching", "standard");
27
+
28
+ try {
29
+ const data = await runSearch("all", query, ["--deep"], `${baseDir}/bin/search.mjs`, signal, onProgress);
30
+ const text = formatDeepResearch(data);
31
+ return { content: [{ type: "text", text: text || "No results returned." }], details: { raw: data } };
32
+ } catch (e) {
33
+ return errorResult("Deep research failed", e);
34
+ }
35
+ },
36
+ });
37
37
  }
@@ -1,58 +1,58 @@
1
- /**
2
- * greedy_search tool handler — multi-engine AI web search
3
- */
4
-
5
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
6
- import { Type } from "@sinclair/typebox";
7
- import { formatResults } from "../formatters/results.js";
8
- import { ALL_ENGINES, cdpAvailable, cdpMissingResult, errorResult, makeProgressTracker, runSearch } from "./shared.js";
9
-
10
- export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
11
- pi.registerTool({
12
- name: "greedy_search",
13
- label: "Greedy Search",
14
- description:
15
- "WEB SEARCH ONLY — searches live web via Perplexity, Bing Copilot, and Google AI in parallel. " +
16
- "Optionally synthesizes results with Gemini, deduplicates sources by consensus. " +
17
- "Use for: library docs, recent framework changes, error messages, best practices, current events. " +
18
- "Reports streaming progress as each engine completes.",
19
- promptSnippet: "Multi-engine AI web search with streaming progress",
20
- parameters: Type.Object({
21
- query: Type.String({ description: "The search query" }),
22
- engine: Type.Union(
23
- [Type.Literal("all"), Type.Literal("perplexity"), Type.Literal("bing"), Type.Literal("google"), Type.Literal("gemini"), Type.Literal("gem")],
24
- { description: 'Engine to use. "all" fans out to Perplexity, Bing, and Google in parallel (default).', default: "all" },
25
- ),
26
- depth: Type.Union(
27
- [Type.Literal("fast"), Type.Literal("standard"), Type.Literal("deep")],
28
- { description: "Search depth: fast (single engine, ~15-30s), standard (3 engines + synthesis, ~30-90s), deep (3 engines + source fetching + synthesis + confidence, ~60-180s). Default: standard.", default: "standard" },
29
- ),
30
- fullAnswer: Type.Optional(Type.Boolean({ description: "When true, returns the complete answer instead of a truncated preview (default: false, answers are shortened to ~300 chars to save tokens).", default: false })),
31
- }),
32
- execute: async (_toolCallId, params, signal, onUpdate) => {
33
- const { query, engine = "all", depth = "standard", fullAnswer: fullAnswerParam } = params as {
34
- query: string; engine: string; depth?: "fast" | "standard" | "deep"; fullAnswer?: boolean;
35
- };
36
-
37
- if (!cdpAvailable(baseDir)) return cdpMissingResult();
38
-
39
- const flags: string[] = [];
40
- const fullAnswer = fullAnswerParam ?? (engine !== "all");
41
- if (fullAnswer) flags.push("--full");
42
- if (depth === "deep") flags.push("--depth", "deep");
43
- else if (depth === "standard" && engine === "all") flags.push("--synthesize");
44
-
45
- const onProgress = engine === "all"
46
- ? makeProgressTracker(ALL_ENGINES, onUpdate, "Searching", depth)
47
- : undefined;
48
-
49
- try {
50
- const data = await runSearch(engine, query, flags, `${baseDir}/bin/search.mjs`, signal, onProgress);
51
- const text = formatResults(engine, data);
52
- return { content: [{ type: "text", text: text || "No results returned." }], details: { raw: data } };
53
- } catch (e) {
54
- return errorResult("Search failed", e);
55
- }
56
- },
57
- });
1
+ /**
2
+ * greedy_search tool handler — multi-engine AI web search
3
+ */
4
+
5
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
6
+ import { Type } from "@sinclair/typebox";
7
+ import { formatResults } from "../formatters/results.js";
8
+ import { ALL_ENGINES, cdpAvailable, cdpMissingResult, errorResult, makeProgressTracker, runSearch } from "./shared.js";
9
+
10
+ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
11
+ pi.registerTool({
12
+ name: "greedy_search",
13
+ label: "Greedy Search",
14
+ description:
15
+ "WEB SEARCH ONLY — searches live web via Perplexity, Bing Copilot, and Google AI in parallel. " +
16
+ "Optionally synthesizes results with Gemini, deduplicates sources by consensus. " +
17
+ "Use for: library docs, recent framework changes, error messages, best practices, current events. " +
18
+ "Reports streaming progress as each engine completes.",
19
+ promptSnippet: "Multi-engine AI web search with streaming progress",
20
+ parameters: Type.Object({
21
+ query: Type.String({ description: "The search query" }),
22
+ engine: Type.Union(
23
+ [Type.Literal("all"), Type.Literal("perplexity"), Type.Literal("bing"), Type.Literal("google"), Type.Literal("gemini"), Type.Literal("gem")],
24
+ { description: 'Engine to use. "all" fans out to Perplexity, Bing, and Google in parallel (default).', default: "all" },
25
+ ),
26
+ depth: Type.Union(
27
+ [Type.Literal("fast"), Type.Literal("standard"), Type.Literal("deep")],
28
+ { description: "Search depth: fast (single engine, ~15-30s), standard (3 engines + synthesis, ~30-90s), deep (3 engines + source fetching + synthesis + confidence, ~60-180s). Default: standard.", default: "standard" },
29
+ ),
30
+ fullAnswer: Type.Optional(Type.Boolean({ description: "When true, returns the complete answer instead of a truncated preview (default: false, answers are shortened to ~300 chars to save tokens).", default: false })),
31
+ }),
32
+ execute: async (_toolCallId, params, signal, onUpdate) => {
33
+ const { query, engine = "all", depth = "standard", fullAnswer: fullAnswerParam } = params as {
34
+ query: string; engine: string; depth?: "fast" | "standard" | "deep"; fullAnswer?: boolean;
35
+ };
36
+
37
+ if (!cdpAvailable(baseDir)) return cdpMissingResult();
38
+
39
+ const flags: string[] = [];
40
+ const fullAnswer = fullAnswerParam ?? (engine !== "all");
41
+ if (fullAnswer) flags.push("--full");
42
+ if (depth === "deep") flags.push("--depth", "deep");
43
+ else if (depth === "standard" && engine === "all") flags.push("--synthesize");
44
+
45
+ const onProgress = engine === "all"
46
+ ? makeProgressTracker(ALL_ENGINES, onUpdate, "Searching", depth)
47
+ : undefined;
48
+
49
+ try {
50
+ const data = await runSearch(engine, query, flags, `${baseDir}/bin/search.mjs`, signal, onProgress);
51
+ const text = formatResults(engine, data);
52
+ return { content: [{ type: "text", text: text || "No results returned." }], details: { raw: data } };
53
+ } catch (e) {
54
+ return errorResult("Search failed", e);
55
+ }
56
+ },
57
+ });
58
58
  }