npm - gsd-pi - Versions diffs - 2.3.4 → 2.3.5 - Mend

gsd-pi 2.3.4 → 2.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "gsd-pi",
-  "version": "2.3.4",
+  "version": "2.3.5",
   "description": "GSD — Get Shit Done coding agent",
   "license": "MIT",
   "repository": {

package/src/resources/extensions/google-search/index.ts ADDED Viewed

@@ -0,0 +1,323 @@
+/**
+ * Google Search Extension
+ *
+ * Provides a `google_search` tool that performs web searches via Gemini's
+ * Google Search grounding feature. Uses the user's existing GEMINI_API_KEY
+ * and Google Cloud GenAI credits.
+ *
+ * The tool sends queries to Gemini Flash with `googleSearch: {}` enabled.
+ * Gemini internally performs Google searches, synthesizes an answer, and
+ * returns it with source URLs from grounding metadata.
+ */
+import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+import {
+	DEFAULT_MAX_BYTES,
+	DEFAULT_MAX_LINES,
+	formatSize,
+	truncateHead,
+} from "@mariozechner/pi-coding-agent";
+import { Text } from "@mariozechner/pi-tui";
+import { Type } from "@sinclair/typebox";
+import { GoogleGenAI } from "@google/genai";
+// ── Types ────────────────────────────────────────────────────────────────────
+interface SearchSource {
+	title: string;
+	uri: string;
+	domain: string;
+}
+interface SearchResult {
+	answer: string;
+	sources: SearchSource[];
+	searchQueries: string[];
+	cached: boolean;
+}
+interface SearchDetails {
+	query: string;
+	sourceCount: number;
+	cached: boolean;
+	durationMs: number;
+	error?: string;
+}
+// ── Lazy singleton client ────────────────────────────────────────────────────
+let client: GoogleGenAI | null = null;
+function getClient(): GoogleGenAI {
+	if (!client) {
+		client = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
+	}
+	return client;
+}
+// ── In-session cache ─────────────────────────────────────────────────────────
+const resultCache = new Map<string, SearchResult>();
+function cacheKey(query: string): string {
+	return query.toLowerCase().trim();
+}
+// ── Extension ────────────────────────────────────────────────────────────────
+export default function (pi: ExtensionAPI) {
+	pi.registerTool({
+		name: "google_search",
+		label: "Google Search",
+		description:
+			"Search the web using Google Search via Gemini. " +
+			"Returns an AI-synthesized answer grounded in Google Search results, plus source URLs. " +
+			"Use this when you need current information from the web: recent events, documentation, " +
+			"product details, technical references, news, etc. " +
+			"Requires GEMINI_API_KEY. Alternative to Brave-based search tools for users with Google Cloud credits.",
+		promptSnippet: "Search the web via Google Search to get current information with sources",
+		promptGuidelines: [
+			"Use google_search when you need up-to-date web information that isn't in your training data.",
+			"Be specific with queries for better results, e.g. 'Next.js 15 app router migration guide' not just 'Next.js'.",
+			"The tool returns both an answer and source URLs. Cite sources when sharing results with the user.",
+			"Results are cached per-session, so repeated identical queries are free.",
+			"You can still use fetch_page to read a specific URL if needed after getting results from google_search.",
+		],
+		parameters: Type.Object({
+			query: Type.String({
+				description: "The search query, e.g. 'latest Node.js LTS version' or 'how to configure Tailwind v4'",
+			}),
+			maxSources: Type.Optional(
+				Type.Number({
+					description: "Maximum number of source URLs to include (default 5, max 10).",
+					minimum: 1,
+					maximum: 10,
+				}),
+			),
+		}),
+		async execute(_toolCallId, params, signal, _onUpdate, _ctx) {
+			const startTime = Date.now();
+			const maxSources = Math.min(Math.max(params.maxSources ?? 5, 1), 10);
+			// Check for API key
+			if (!process.env.GEMINI_API_KEY) {
+				return {
+					content: [
+						{
+							type: "text",
+							text: "Error: GEMINI_API_KEY is not set. Please set this environment variable to use Google Search.\n\nExample: export GEMINI_API_KEY=your_key",
+						},
+					],
+					isError: true,
+					details: {
+						query: params.query,
+						sourceCount: 0,
+						cached: false,
+						durationMs: Date.now() - startTime,
+						error: "auth_error: GEMINI_API_KEY not set",
+					} as SearchDetails,
+				};
+			}
+			// Check cache
+			const key = cacheKey(params.query);
+			if (resultCache.has(key)) {
+				const cached = resultCache.get(key)!;
+				const output = formatOutput(cached, maxSources);
+				return {
+					content: [{ type: "text", text: output }],
+					details: {
+						query: params.query,
+						sourceCount: cached.sources.length,
+						cached: true,
+						durationMs: Date.now() - startTime,
+					} as SearchDetails,
+				};
+			}
+			// Call Gemini with Google Search grounding
+			let result: SearchResult;
+			try {
+				const ai = getClient();
+				const response = await ai.models.generateContent({
+					model: "gemini-3-flash-preview",
+					contents: params.query,
+					config: {
+						tools: [{ googleSearch: {} }],
+						abortSignal: signal,
+					},
+				});
+				// Extract answer text
+				const answer = response.text ?? "";
+				// Extract grounding metadata
+				const candidate = response.candidates?.[0];
+				const grounding = candidate?.groundingMetadata;
+				// Parse sources from grounding chunks
+				const sources: SearchSource[] = [];
+				const seenTitles = new Set<string>();
+				if (grounding?.groundingChunks) {
+					for (const chunk of grounding.groundingChunks) {
+						if (chunk.web) {
+							const title = chunk.web.title ?? "Untitled";
+							// Dedupe by title since URIs are redirect URLs that differ per call
+							if (seenTitles.has(title)) continue;
+							seenTitles.add(title);
+							// domain field is not available via Gemini API, use title as fallback
+							// (title is typically the domain name, e.g. "wikipedia.org")
+							const domain = chunk.web.domain ?? title;
+							sources.push({
+								title,
+								uri: chunk.web.uri ?? "",
+								domain,
+							});
+						}
+					}
+				}
+				// Extract search queries Gemini actually performed
+				const searchQueries = grounding?.webSearchQueries ?? [];
+				result = { answer, sources, searchQueries, cached: false };
+			} catch (err: unknown) {
+				const msg = err instanceof Error ? err.message : String(err);
+				let errorType = "api_error";
+				if (msg.includes("401") || msg.includes("UNAUTHENTICATED")) {
+					errorType = "auth_error";
+				} else if (msg.includes("429") || msg.includes("RESOURCE_EXHAUSTED") || msg.includes("quota")) {
+					errorType = "rate_limit";
+				}
+				return {
+					content: [
+						{
+							type: "text",
+							text: `Google Search failed (${errorType}): ${msg}`,
+						},
+					],
+					isError: true,
+					details: {
+						query: params.query,
+						sourceCount: 0,
+						cached: false,
+						durationMs: Date.now() - startTime,
+						error: `${errorType}: ${msg}`,
+					} as SearchDetails,
+				};
+			}
+			// Cache the result
+			resultCache.set(key, result);
+			// Format and truncate output
+			const rawOutput = formatOutput(result, maxSources);
+			const truncation = truncateHead(rawOutput, {
+				maxLines: DEFAULT_MAX_LINES,
+				maxBytes: DEFAULT_MAX_BYTES,
+			});
+			let finalText = truncation.content;
+			if (truncation.truncated) {
+				finalText +=
+					`\n\n[Truncated: showing ${truncation.outputLines}/${truncation.totalLines} lines` +
+					` (${formatSize(truncation.outputBytes)} of ${formatSize(truncation.totalBytes)})]`;
+			}
+			return {
+				content: [{ type: "text", text: finalText }],
+				details: {
+					query: params.query,
+					sourceCount: result.sources.length,
+					cached: false,
+					durationMs: Date.now() - startTime,
+				} as SearchDetails,
+			};
+		},
+		renderCall(args, theme) {
+			let text = theme.fg("toolTitle", theme.bold("google_search "));
+			text += theme.fg("accent", `"${args.query}"`);
+			return new Text(text, 0, 0);
+		},
+		renderResult(result, { isPartial, expanded }, theme) {
+			const d = result.details as SearchDetails | undefined;
+			if (isPartial) return new Text(theme.fg("warning", "Searching Google..."), 0, 0);
+			if (result.isError || d?.error) {
+				return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0);
+			}
+			let text = theme.fg("success", `${d?.sourceCount ?? 0} sources`);
+			text += theme.fg("dim", ` (${d?.durationMs ?? 0}ms)`);
+			if (d?.cached) text += theme.fg("dim", " · cached");
+			if (expanded) {
+				const content = result.content[0];
+				if (content?.type === "text") {
+					const preview = content.text.split("\n").slice(0, 8).join("\n");
+					text += "\n\n" + theme.fg("dim", preview);
+					if (content.text.split("\n").length > 8) {
+						text += "\n" + theme.fg("muted", "...");
+					}
+				}
+			}
+			return new Text(text, 0, 0);
+		},
+	});
+	// ── Startup notification ─────────────────────────────────────────────────
+	pi.on("session_start", async (_event, ctx) => {
+		if (!process.env.GEMINI_API_KEY) {
+			ctx.ui.notify(
+				"Google Search: No GEMINI_API_KEY set. The google_search tool will not work until this is configured.",
+				"warning",
+			);
+		}
+	});
+}
+// ── Output formatting ────────────────────────────────────────────────────────
+function formatOutput(result: SearchResult, maxSources: number): string {
+	const lines: string[] = [];
+	// Answer
+	if (result.answer) {
+		lines.push(result.answer);
+	} else {
+		lines.push("(No answer text returned from search)");
+	}
+	// Sources
+	if (result.sources.length > 0) {
+		lines.push("");
+		lines.push("Sources:");
+		const sourcesToShow = result.sources.slice(0, maxSources);
+		for (let i = 0; i < sourcesToShow.length; i++) {
+			const s = sourcesToShow[i];
+			lines.push(`[${i + 1}] ${s.title} - ${s.domain}`);
+			lines.push(`    ${s.uri}`);
+		}
+		if (result.sources.length > maxSources) {
+			lines.push(`(${result.sources.length - maxSources} more sources omitted)`);
+		}
+	} else {
+		lines.push("");
+		lines.push("(No source URLs found in grounding metadata)");
+	}
+	// Search queries
+	if (result.searchQueries.length > 0) {
+		lines.push("");
+		lines.push(`Searches performed: ${result.searchQueries.map((q) => `"${q}"`).join(", ")}`);
+	}
+	return lines.join("\n");
+}

package/src/resources/extensions/google-search/package.json ADDED Viewed

@@ -0,0 +1,9 @@
+{
+  "name": "pi-extension-google-search",
+  "private": true,
+  "version": "1.0.0",
+  "type": "module",
+  "pi": {
+    "extensions": ["./index.ts"]
+  }
+}

package/src/resources/extensions/voice/index.ts CHANGED Viewed

@@ -26,7 +26,6 @@ export default function (pi: ExtensionAPI) {
 	let active = false;
 	let recognizerProcess: ChildProcess | null = null;
-	let finalized = "";
 	let flashOn = true;
 	let flashTimer: ReturnType<typeof setInterval> | null = null;
 	let footerTui: { requestRender: () => void } | null = null;
@@ -122,7 +121,6 @@ export default function (pi: ExtensionAPI) {
 		}
 		active = true;
-		finalized = "";
 		setVoiceFooter(ctx, true);
 		await runVoiceSession(ctx);
 	}
@@ -161,14 +159,15 @@ export default function (pi: ExtensionAPI) {
 	async function runVoiceSession(ctx: ExtensionContext): Promise<void> {
 		return new Promise<void>((resolve) => {
+			// The Swift recognizer handles accumulation across pause-induced
+			// transcription resets. Both PARTIAL and FINAL messages contain
+			// the full accumulated text, so we just pass them through.
 			startRecognizer(
 				(text) => {
-					const full = finalized + (finalized && text ? " " : "") + text;
-					ctx.ui.setEditorText(full);
+					ctx.ui.setEditorText(text);
 				},
 				(text) => {
-					finalized = (finalized ? finalized + " " : "") + text;
-					ctx.ui.setEditorText(finalized);
+					ctx.ui.setEditorText(text);
 				},
 				(msg) => ctx.ui.notify(`Voice: ${msg}`, "error"),
 				() => {},

package/src/resources/extensions/voice/speech-recognizer.swift CHANGED Viewed

@@ -45,15 +45,93 @@ do {
     exit(1)
 }
-var lastText = ""
+// Accumulated finalized text from previous recognition segments.
+// On-device recognition (especially macOS/iOS 18+) can reset
+// bestTranscription.formattedString after a pause, discarding
+// previous text. We detect this by tracking the last known good
+// text and noticing when the new text is shorter / doesn't start
+// with the previous text. When that happens we treat the previous
+// text as finalized and start accumulating the new segment on top.
+var accumulated = ""
+var lastPartialText = ""
+var lastEmitted = ""
 recognizer.recognitionTask(with: request) { result, error in
     if let result = result {
         let text = result.bestTranscription.formattedString
-        if text != lastText {
-            lastText = text
-            let prefix = result.isFinal ? "FINAL" : "PARTIAL"
-            print("\(prefix):\(text)")
+        if result.isFinal {
+            // True final from the recognizer — commit everything
+            let full: String
+            // Check if the final text already includes accumulated content
+            // (some OS versions give cumulative finals, others reset)
+            if !accumulated.isEmpty && !text.lowercased().hasPrefix(accumulated.lowercased()) {
+                full = accumulated + " " + text
+            } else if !accumulated.isEmpty && text.count < accumulated.count {
+                // Final is shorter than what we accumulated — use accumulated + new
+                full = accumulated + " " + text
+            } else {
+                full = text
+            }
+            accumulated = ""
+            lastPartialText = ""
+            if full != lastEmitted {
+                lastEmitted = full
+                print("FINAL:\(full)")
+            }
+            return
+        }
+        // Detect transcription reset: if the new partial text is significantly
+        // shorter than what we had, or doesn't start with the previous text,
+        // the recognizer has reset after a pause. Finalize what we had.
+        let prevText = lastPartialText
+        if !prevText.isEmpty && !text.isEmpty {
+            let prevWords = prevText.split(separator: " ")
+            let newWords = text.split(separator: " ")
+            // Reset detection: new text has fewer words than previous AND
+            // the first few words don't match (i.e. it's truly new speech,
+            // not just the recognizer revising the last word)
+            let looksLikeReset: Bool
+            if newWords.count < prevWords.count / 2 {
+                // Significant drop in word count — likely a reset
+                looksLikeReset = true
+            } else if newWords.count < prevWords.count &&
+                      !prevWords.isEmpty && !newWords.isEmpty &&
+                      newWords[0] != prevWords[0] {
+                // Different starting word + fewer words — reset
+                looksLikeReset = true
+            } else {
+                looksLikeReset = false
+            }
+            if looksLikeReset {
+                // Commit the previous partial text to accumulated
+                if accumulated.isEmpty {
+                    accumulated = prevText
+                } else {
+                    accumulated = accumulated + " " + prevText
+                }
+                // Emit a FINAL for the committed text so the TS side updates
+                print("FINAL:\(accumulated)")
+                lastEmitted = accumulated
+            }
+        }
+        lastPartialText = text
+        // Build the full display text
+        let displayText: String
+        if accumulated.isEmpty {
+            displayText = text
+        } else {
+            displayText = accumulated + " " + text
+        }
+        if displayText != lastEmitted {
+            lastEmitted = displayText
+            print("PARTIAL:\(displayText)")
         }
     }
     if let error = error {