npm - @apmantza/greedysearch-pi - Versions diffs - 1.8.0 → 1.8.2 - Mend

@apmantza/greedysearch-pi 1.8.0 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +10 -0
package/README.md +17 -1
package/bin/launch.mjs +366 -288
package/bin/search.mjs +148 -20
package/extractors/common.mjs +291 -279
package/extractors/gemini.mjs +146 -145
package/extractors/google-ai.mjs +125 -124
package/extractors/perplexity.mjs +145 -141
package/extractors/selectors.mjs +54 -52
package/index.ts +179 -35
package/package.json +53 -46
package/src/github.mjs +237 -237
package/src/search/chrome.mjs +222 -222
package/src/search/constants.mjs +37 -37
package/src/search/defaults.mjs +14 -14
package/src/search/engines.mjs +6 -2
package/src/search/fetch-source.mjs +229 -229
package/src/search/output.mjs +58 -58
package/src/search/sources.mjs +445 -445
package/src/search/synthesis-runner.mjs +63 -63
package/src/search/synthesis.mjs +51 -40
package/src/tools/deep-research-handler.ts +36 -36
package/src/tools/greedy-search-handler.ts +57 -57
package/src/tools/shared.ts +130 -130
package/src/types.ts +103 -103
package/test.mjs +377 -0

package/src/search/synthesis-runner.mjs CHANGED Viewed

@@ -1,64 +1,64 @@
-// src/search/synthesis-runner.mjs — Run Gemini synthesis via CDP
-//
-// Extracted from search.mjs.
-import { spawn } from "node:child_process";
-import { join } from "node:path";
-import { GREEDY_PROFILE_DIR } from "./constants.mjs";
-import { parseStructuredJson, normalizeSynthesisPayload, buildSynthesisPrompt } from "./synthesis.mjs";
-import { cdp, openNewTab, closeTab, activateTab } from "./chrome.mjs";
-import { trimText } from "./sources.mjs";
-const __dir = import.meta.dirname || new URL(".", import.meta.url).pathname.replace(/^\/([A-Z]:)/, "$1");
-export async function synthesizeWithGemini(
-	query,
-	results,
-	{ grounded = false, tabPrefix = null } = {},
-) {
-	const sources = Array.isArray(results._sources)
-		? results._sources
-		: buildSourceRegistry(results);
-	const prompt = buildSynthesisPrompt(query, results, sources, { grounded });
-	return new Promise((resolve, reject) => {
-		const extraArgs = tabPrefix ? ["--tab", String(tabPrefix)] : [];
-		const proc = spawn(
-			"node",
-			[join(__dir, "..", "..", "extractors", "gemini.mjs"), prompt, ...extraArgs],
-			{
-				stdio: ["ignore", "pipe", "pipe"],
-				env: { ...process.env, CDP_PROFILE_DIR: GREEDY_PROFILE_DIR },
-			},
-		);
-		let out = "";
-		let err = "";
-		proc.stdout.on("data", (d) => (out += d));
-		proc.stderr.on("data", (d) => (err += d));
-		const t = setTimeout(() => {
-			proc.kill();
-			reject(new Error("Gemini synthesis timed out after 180s"));
-		}, 180000);
-		proc.on("close", (code) => {
-			clearTimeout(t);
-			if (code !== 0)
-				reject(new Error(err.trim() || "gemini extractor failed"));
-			else {
-				try {
-					const raw = JSON.parse(out.trim());
-					const structured = parseStructuredJson(raw.answer || "");
-					resolve({
-						...normalizeSynthesisPayload(structured, sources, raw.answer || ""),
-						rawAnswer: raw.answer || "",
-						geminiSources: raw.sources || [],
-					});
-				} catch {
-					reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`));
-				}
-			}
-		});
-	});
-}
-// Need to import buildSourceRegistry for fallback
+// src/search/synthesis-runner.mjs — Run Gemini synthesis via CDP
+//
+// Extracted from search.mjs.
+import { spawn } from "node:child_process";
+import { join } from "node:path";
+import { GREEDY_PROFILE_DIR } from "./constants.mjs";
+import { parseStructuredJson, normalizeSynthesisPayload, buildSynthesisPrompt } from "./synthesis.mjs";
+import { cdp, openNewTab, closeTab, activateTab } from "./chrome.mjs";
+import { trimText } from "./sources.mjs";
+const __dir = import.meta.dirname || new URL(".", import.meta.url).pathname.replace(/^\/([A-Z]:)/, "$1");
+export async function synthesizeWithGemini(
+	query,
+	results,
+	{ grounded = false, tabPrefix = null } = {},
+) {
+	const sources = Array.isArray(results._sources)
+		? results._sources
+		: buildSourceRegistry(results);
+	const prompt = buildSynthesisPrompt(query, results, sources, { grounded });
+	return new Promise((resolve, reject) => {
+		const extraArgs = tabPrefix ? ["--tab", String(tabPrefix)] : [];
+		const proc = spawn(
+			"node",
+			[join(__dir, "..", "..", "extractors", "gemini.mjs"), prompt, ...extraArgs],
+			{
+				stdio: ["ignore", "pipe", "pipe"],
+				env: { ...process.env, CDP_PROFILE_DIR: GREEDY_PROFILE_DIR },
+			},
+		);
+		let out = "";
+		let err = "";
+		proc.stdout.on("data", (d) => (out += d));
+		proc.stderr.on("data", (d) => (err += d));
+		const t = setTimeout(() => {
+			proc.kill();
+			reject(new Error("Gemini synthesis timed out after 180s"));
+		}, 180000);
+		proc.on("close", (code) => {
+			clearTimeout(t);
+			if (code !== 0)
+				reject(new Error(err.trim() || "gemini extractor failed"));
+			else {
+				try {
+					const raw = JSON.parse(out.trim());
+					const structured = parseStructuredJson(raw.answer || "");
+					resolve({
+						...normalizeSynthesisPayload(structured, sources, raw.answer || ""),
+						rawAnswer: raw.answer || "",
+						geminiSources: raw.sources || [],
+					});
+				} catch {
+					reject(new Error(`bad JSON from gemini: ${out.slice(0, 100)}`));
+				}
+			}
+		});
+	});
+}
+// Need to import buildSourceRegistry for fallback
 import { buildSourceRegistry } from "./sources.mjs";

package/src/search/synthesis.mjs CHANGED Viewed

@@ -3,12 +3,26 @@
 //
 // Extracted from search.mjs to reduce file complexity.
-import { trimText } from "./sources.mjs";
 import { ALL_ENGINES } from "./constants.mjs";
+import { trimText } from "./sources.mjs";
 export function parseStructuredJson(text) {
 	if (!text) return null;
-	const trimmed = String(text).trim();
+	let trimmed = String(text).trim();
+	// Look for BEGIN_JSON/END_JSON markers first
+	const beginIdx = trimmed.indexOf("BEGIN_JSON");
+	const endIdx = trimmed.indexOf("END_JSON");
+	if (beginIdx !== -1 && endIdx !== -1 && beginIdx < endIdx) {
+		trimmed = trimmed.slice(beginIdx + "BEGIN_JSON".length, endIdx).trim();
+	} else {
+		// Strip out common LLM preamble text before the actual JSON
+		const jsonStart = trimmed.indexOf("{");
+		if (jsonStart > 0) {
+			trimmed = trimmed.slice(jsonStart);
+		}
+	}
 	const candidates = [
 		trimmed,
 		trimmed
@@ -18,7 +32,7 @@ export function parseStructuredJson(text) {
 			.trim(),
 	];
-	const objectMatch = trimmed.match(/\{[\s\S]*\}/);
+	const objectMatch = trimmed.match(/\{[\s\S]*\}$/);
 	if (objectMatch) candidates.push(objectMatch[0]);
 	for (const candidate of candidates) {
@@ -31,7 +45,11 @@ export function parseStructuredJson(text) {
 	return null;
 }
-export function normalizeSynthesisPayload(payload, sources, fallbackAnswer = "") {
+export function normalizeSynthesisPayload(
+	payload,
+	sources,
+	fallbackAnswer = "",
+) {
 	const sourceIds = new Set(sources.map((source) => source.id));
 	const agreementLevel = [
 		"high",
@@ -61,8 +79,13 @@ export function normalizeSynthesisPayload(payload, sources, fallbackAnswer = "")
 		? payload.recommendedSources.filter((id) => sourceIds.has(id)).slice(0, 6)
 		: [];
+	// Clean up fallback answer if it contains preamble text
+	const cleanFallback = fallbackAnswer
+		? fallbackAnswer.replace(/^[\s\S]*?\{/m, "{").replace(/}\s*[\s\S]*$/m, "}")
+		: "";
 	return {
-		answer: trimText(payload?.answer || fallbackAnswer, 4000),
+		answer: trimText(payload?.answer || cleanFallback || fallbackAnswer, 4000),
 		agreement: {
 			level: agreementLevel,
 			summary: trimText(payload?.agreement?.summary || "", 280),
@@ -127,44 +150,32 @@ export function buildSynthesisPrompt(
 		engines: source.engines,
 		engineCount: source.engineCount,
 		perEngine: source.perEngine,
-		fetch:
-			source.fetch?.attempted
-				? {
-						ok: source.fetch.ok,
-						status: source.fetch.status,
-						publishedTime: source.fetch.publishedTime || "",
-						lastModified: source.fetch.lastModified || "",
-						byline: source.fetch.byline || "",
-						siteName: source.fetch.siteName || "",
-						...(grounded ? { snippet: trimText(source.fetch.snippet || "", 700) } : {}),
-					}
-				: undefined,
+		fetch: source.fetch?.attempted
+			? {
+					ok: source.fetch.ok,
+					status: source.fetch.status,
+					publishedTime: source.fetch.publishedTime || "",
+					lastModified: source.fetch.lastModified || "",
+					byline: source.fetch.byline || "",
+					siteName: source.fetch.siteName || "",
+					...(grounded
+						? { snippet: trimText(source.fetch.snippet || "", 700) }
+						: {}),
+				}
+			: undefined,
 	}));
 	return [
-		"You are synthesizing results from Perplexity, Bing Copilot, and Google AI.",
-		grounded
-			? "Use the fetched source snippets as the strongest evidence. Use engine answers for perspective and conflict detection."
-			: "Use the engine answers for perspective. Use the source registry for provenance and citations.",
-		"Prefer official docs, release notes, repositories, and maintainer-authored sources when available.",
-		"When publishedTime or lastModified is available, flag sources older than 2 years as potentially stale in caveats.",
-		"If the engines disagree, say so explicitly.",
-		"Do not invent sources. Only reference source IDs from the source registry.",
-		"Return valid JSON only. No markdown fences, no prose outside the JSON object.",
+		"Synthesize the following search results into a concise answer.",
+		"Compare the three engine responses (Perplexity, Bing, Google) and identify:",
+		"1. The main answer to the query",
+		"2. Where the engines agree",
+		"3. Where they disagree (if anywhere)",
+		"4. Any caveats or limitations",
+		"Use source IDs like S1, S2 when citing sources.",
+		"Format: Start with a brief answer, then list key points.",
 		"",
-		"JSON schema:",
-		"{",
-		'  "answer": "short direct answer",',
-		'  "agreement": { "level": "high|medium|low|mixed|conflicting", "summary": "..." },',
-		'  "differences": ["..."],',
-		'  "caveats": ["..."],',
-		'  "claims": [',
-		'    { "claim": "...", "support": "strong|moderate|weak|conflicting", "sourceIds": ["S1"] }',
-		"  ],",
-		'  "recommendedSources": ["S1", "S2"]',
-		"}",
-		"",
-		`User query: ${query}`,
+		`Query: ${query}`,
 		"",
 		`Engine results:\n${JSON.stringify(engineSummaries, null, 2)}`,
 		"",
@@ -209,4 +220,4 @@ export function buildConfidence(out) {
 				: 0,
 		sourceTypeBreakdown,
 	};
-}
+}

package/src/tools/deep-research-handler.ts CHANGED Viewed

@@ -1,37 +1,37 @@
-/**
- * deep_research tool handler — legacy alias to greedy_search with depth: deep
- */
-import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
-import { Type } from "@sinclair/typebox";
-import { formatDeepResearch } from "../formatters/results.js";
-import { ALL_ENGINES, cdpAvailable, cdpMissingResult, errorResult, makeProgressTracker, runSearch } from "./shared.js";
-export function registerDeepResearchTool(pi: ExtensionAPI, baseDir: string) {
-	pi.registerTool({
-		name: "deep_research",
-		label: "Deep Research (legacy)",
-		description:
-			"DEPRECATED — Use greedy_search with depth: 'deep' instead. " +
-			"Comprehensive multi-engine research with source fetching and synthesis.",
-		promptSnippet: "Deep multi-engine research (legacy alias to greedy_search)",
-		parameters: Type.Object({
-			query: Type.String({ description: "The research question" }),
-		}),
-		execute: async (_toolCallId, params, signal, onUpdate) => {
-			const { query } = params as { query: string };
-			if (!cdpAvailable(baseDir)) return cdpMissingResult();
-			const onProgress = makeProgressTracker(ALL_ENGINES, onUpdate, "Researching", "standard");
-			try {
-				const data = await runSearch("all", query, ["--deep"], `${baseDir}/bin/search.mjs`, signal, onProgress);
-				const text = formatDeepResearch(data);
-				return { content: [{ type: "text", text: text || "No results returned." }], details: { raw: data } };
-			} catch (e) {
-				return errorResult("Deep research failed", e);
-			}
-		},
-	});
+/**
+ * deep_research tool handler — legacy alias to greedy_search with depth: deep
+ */
+import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+import { Type } from "@sinclair/typebox";
+import { formatDeepResearch } from "../formatters/results.js";
+import { ALL_ENGINES, cdpAvailable, cdpMissingResult, errorResult, makeProgressTracker, runSearch } from "./shared.js";
+export function registerDeepResearchTool(pi: ExtensionAPI, baseDir: string) {
+	pi.registerTool({
+		name: "deep_research",
+		label: "Deep Research (legacy)",
+		description:
+			"DEPRECATED — Use greedy_search with depth: 'deep' instead. " +
+			"Comprehensive multi-engine research with source fetching and synthesis.",
+		promptSnippet: "Deep multi-engine research (legacy alias to greedy_search)",
+		parameters: Type.Object({
+			query: Type.String({ description: "The research question" }),
+		}),
+		execute: async (_toolCallId, params, signal, onUpdate) => {
+			const { query } = params as { query: string };
+			if (!cdpAvailable(baseDir)) return cdpMissingResult();
+			const onProgress = makeProgressTracker(ALL_ENGINES, onUpdate, "Researching", "standard");
+			try {
+				const data = await runSearch("all", query, ["--deep"], `${baseDir}/bin/search.mjs`, signal, onProgress);
+				const text = formatDeepResearch(data);
+				return { content: [{ type: "text", text: text || "No results returned." }], details: { raw: data } };
+			} catch (e) {
+				return errorResult("Deep research failed", e);
+			}
+		},
+	});
 }

package/src/tools/greedy-search-handler.ts CHANGED Viewed

@@ -1,58 +1,58 @@
-/**
- * greedy_search tool handler — multi-engine AI web search
- */
-import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
-import { Type } from "@sinclair/typebox";
-import { formatResults } from "../formatters/results.js";
-import { ALL_ENGINES, cdpAvailable, cdpMissingResult, errorResult, makeProgressTracker, runSearch } from "./shared.js";
-export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
-	pi.registerTool({
-		name: "greedy_search",
-		label: "Greedy Search",
-		description:
-			"WEB SEARCH ONLY — searches live web via Perplexity, Bing Copilot, and Google AI in parallel. " +
-			"Optionally synthesizes results with Gemini, deduplicates sources by consensus. " +
-			"Use for: library docs, recent framework changes, error messages, best practices, current events. " +
-			"Reports streaming progress as each engine completes.",
-		promptSnippet: "Multi-engine AI web search with streaming progress",
-		parameters: Type.Object({
-			query: Type.String({ description: "The search query" }),
-			engine: Type.Union(
-				[Type.Literal("all"), Type.Literal("perplexity"), Type.Literal("bing"), Type.Literal("google"), Type.Literal("gemini"), Type.Literal("gem")],
-				{ description: 'Engine to use. "all" fans out to Perplexity, Bing, and Google in parallel (default).', default: "all" },
-			),
-			depth: Type.Union(
-				[Type.Literal("fast"), Type.Literal("standard"), Type.Literal("deep")],
-				{ description: "Search depth: fast (single engine, ~15-30s), standard (3 engines + synthesis, ~30-90s), deep (3 engines + source fetching + synthesis + confidence, ~60-180s). Default: standard.", default: "standard" },
-			),
-			fullAnswer: Type.Optional(Type.Boolean({ description: "When true, returns the complete answer instead of a truncated preview (default: false, answers are shortened to ~300 chars to save tokens).", default: false })),
-		}),
-		execute: async (_toolCallId, params, signal, onUpdate) => {
-			const { query, engine = "all", depth = "standard", fullAnswer: fullAnswerParam } = params as {
-				query: string; engine: string; depth?: "fast" | "standard" | "deep"; fullAnswer?: boolean;
-			};
-			if (!cdpAvailable(baseDir)) return cdpMissingResult();
-			const flags: string[] = [];
-			const fullAnswer = fullAnswerParam ?? (engine !== "all");
-			if (fullAnswer) flags.push("--full");
-			if (depth === "deep") flags.push("--depth", "deep");
-			else if (depth === "standard" && engine === "all") flags.push("--synthesize");
-			const onProgress = engine === "all"
-				? makeProgressTracker(ALL_ENGINES, onUpdate, "Searching", depth)
-				: undefined;
-			try {
-				const data = await runSearch(engine, query, flags, `${baseDir}/bin/search.mjs`, signal, onProgress);
-				const text = formatResults(engine, data);
-				return { content: [{ type: "text", text: text || "No results returned." }], details: { raw: data } };
-			} catch (e) {
-				return errorResult("Search failed", e);
-			}
-		},
-	});
+/**
+ * greedy_search tool handler — multi-engine AI web search
+ */
+import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+import { Type } from "@sinclair/typebox";
+import { formatResults } from "../formatters/results.js";
+import { ALL_ENGINES, cdpAvailable, cdpMissingResult, errorResult, makeProgressTracker, runSearch } from "./shared.js";
+export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
+	pi.registerTool({
+		name: "greedy_search",
+		label: "Greedy Search",
+		description:
+			"WEB SEARCH ONLY — searches live web via Perplexity, Bing Copilot, and Google AI in parallel. " +
+			"Optionally synthesizes results with Gemini, deduplicates sources by consensus. " +
+			"Use for: library docs, recent framework changes, error messages, best practices, current events. " +
+			"Reports streaming progress as each engine completes.",
+		promptSnippet: "Multi-engine AI web search with streaming progress",
+		parameters: Type.Object({
+			query: Type.String({ description: "The search query" }),
+			engine: Type.Union(
+				[Type.Literal("all"), Type.Literal("perplexity"), Type.Literal("bing"), Type.Literal("google"), Type.Literal("gemini"), Type.Literal("gem")],
+				{ description: 'Engine to use. "all" fans out to Perplexity, Bing, and Google in parallel (default).', default: "all" },
+			),
+			depth: Type.Union(
+				[Type.Literal("fast"), Type.Literal("standard"), Type.Literal("deep")],
+				{ description: "Search depth: fast (single engine, ~15-30s), standard (3 engines + synthesis, ~30-90s), deep (3 engines + source fetching + synthesis + confidence, ~60-180s). Default: standard.", default: "standard" },
+			),
+			fullAnswer: Type.Optional(Type.Boolean({ description: "When true, returns the complete answer instead of a truncated preview (default: false, answers are shortened to ~300 chars to save tokens).", default: false })),
+		}),
+		execute: async (_toolCallId, params, signal, onUpdate) => {
+			const { query, engine = "all", depth = "standard", fullAnswer: fullAnswerParam } = params as {
+				query: string; engine: string; depth?: "fast" | "standard" | "deep"; fullAnswer?: boolean;
+			};
+			if (!cdpAvailable(baseDir)) return cdpMissingResult();
+			const flags: string[] = [];
+			const fullAnswer = fullAnswerParam ?? (engine !== "all");
+			if (fullAnswer) flags.push("--full");
+			if (depth === "deep") flags.push("--depth", "deep");
+			else if (depth === "standard" && engine === "all") flags.push("--synthesize");
+			const onProgress = engine === "all"
+				? makeProgressTracker(ALL_ENGINES, onUpdate, "Searching", depth)
+				: undefined;
+			try {
+				const data = await runSearch(engine, query, flags, `${baseDir}/bin/search.mjs`, signal, onProgress);
+				const text = formatResults(engine, data);
+				return { content: [{ type: "text", text: text || "No results returned." }], details: { raw: data } };
+			} catch (e) {
+				return errorResult("Search failed", e);
+			}
+		},
+	});
 }