gsd-pi 2.3.4 → 2.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gsd-pi",
3
- "version": "2.3.4",
3
+ "version": "2.3.5",
4
4
  "description": "GSD — Get Shit Done coding agent",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -0,0 +1,323 @@
1
+ /**
2
+ * Google Search Extension
3
+ *
4
+ * Provides a `google_search` tool that performs web searches via Gemini's
5
+ * Google Search grounding feature. Uses the user's existing GEMINI_API_KEY
6
+ * and Google Cloud GenAI credits.
7
+ *
8
+ * The tool sends queries to Gemini Flash with `googleSearch: {}` enabled.
9
+ * Gemini internally performs Google searches, synthesizes an answer, and
10
+ * returns it with source URLs from grounding metadata.
11
+ */
12
+
13
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
14
+ import {
15
+ DEFAULT_MAX_BYTES,
16
+ DEFAULT_MAX_LINES,
17
+ formatSize,
18
+ truncateHead,
19
+ } from "@mariozechner/pi-coding-agent";
20
+ import { Text } from "@mariozechner/pi-tui";
21
+ import { Type } from "@sinclair/typebox";
22
+ import { GoogleGenAI } from "@google/genai";
23
+
24
+ // ── Types ────────────────────────────────────────────────────────────────────
25
+
26
+ interface SearchSource {
27
+ title: string;
28
+ uri: string;
29
+ domain: string;
30
+ }
31
+
32
+ interface SearchResult {
33
+ answer: string;
34
+ sources: SearchSource[];
35
+ searchQueries: string[];
36
+ cached: boolean;
37
+ }
38
+
39
+ interface SearchDetails {
40
+ query: string;
41
+ sourceCount: number;
42
+ cached: boolean;
43
+ durationMs: number;
44
+ error?: string;
45
+ }
46
+
47
+ // ── Lazy singleton client ────────────────────────────────────────────────────
48
+
49
+ let client: GoogleGenAI | null = null;
50
+
51
+ function getClient(): GoogleGenAI {
52
+ if (!client) {
53
+ client = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
54
+ }
55
+ return client;
56
+ }
57
+
58
+ // ── In-session cache ─────────────────────────────────────────────────────────
59
+
60
+ const resultCache = new Map<string, SearchResult>();
61
+
62
+ function cacheKey(query: string): string {
63
+ return query.toLowerCase().trim();
64
+ }
65
+
66
+ // ── Extension ────────────────────────────────────────────────────────────────
67
+
68
+ export default function (pi: ExtensionAPI) {
69
+ pi.registerTool({
70
+ name: "google_search",
71
+ label: "Google Search",
72
+ description:
73
+ "Search the web using Google Search via Gemini. " +
74
+ "Returns an AI-synthesized answer grounded in Google Search results, plus source URLs. " +
75
+ "Use this when you need current information from the web: recent events, documentation, " +
76
+ "product details, technical references, news, etc. " +
77
+ "Requires GEMINI_API_KEY. Alternative to Brave-based search tools for users with Google Cloud credits.",
78
+ promptSnippet: "Search the web via Google Search to get current information with sources",
79
+ promptGuidelines: [
80
+ "Use google_search when you need up-to-date web information that isn't in your training data.",
81
+ "Be specific with queries for better results, e.g. 'Next.js 15 app router migration guide' not just 'Next.js'.",
82
+ "The tool returns both an answer and source URLs. Cite sources when sharing results with the user.",
83
+ "Results are cached per-session, so repeated identical queries are free.",
84
+ "You can still use fetch_page to read a specific URL if needed after getting results from google_search.",
85
+ ],
86
+ parameters: Type.Object({
87
+ query: Type.String({
88
+ description: "The search query, e.g. 'latest Node.js LTS version' or 'how to configure Tailwind v4'",
89
+ }),
90
+ maxSources: Type.Optional(
91
+ Type.Number({
92
+ description: "Maximum number of source URLs to include (default 5, max 10).",
93
+ minimum: 1,
94
+ maximum: 10,
95
+ }),
96
+ ),
97
+ }),
98
+
99
+ async execute(_toolCallId, params, signal, _onUpdate, _ctx) {
100
+ const startTime = Date.now();
101
+ const maxSources = Math.min(Math.max(params.maxSources ?? 5, 1), 10);
102
+
103
+ // Check for API key
104
+ if (!process.env.GEMINI_API_KEY) {
105
+ return {
106
+ content: [
107
+ {
108
+ type: "text",
109
+ text: "Error: GEMINI_API_KEY is not set. Please set this environment variable to use Google Search.\n\nExample: export GEMINI_API_KEY=your_key",
110
+ },
111
+ ],
112
+ isError: true,
113
+ details: {
114
+ query: params.query,
115
+ sourceCount: 0,
116
+ cached: false,
117
+ durationMs: Date.now() - startTime,
118
+ error: "auth_error: GEMINI_API_KEY not set",
119
+ } as SearchDetails,
120
+ };
121
+ }
122
+
123
+ // Check cache
124
+ const key = cacheKey(params.query);
125
+ if (resultCache.has(key)) {
126
+ const cached = resultCache.get(key)!;
127
+ const output = formatOutput(cached, maxSources);
128
+ return {
129
+ content: [{ type: "text", text: output }],
130
+ details: {
131
+ query: params.query,
132
+ sourceCount: cached.sources.length,
133
+ cached: true,
134
+ durationMs: Date.now() - startTime,
135
+ } as SearchDetails,
136
+ };
137
+ }
138
+
139
+ // Call Gemini with Google Search grounding
140
+ let result: SearchResult;
141
+ try {
142
+ const ai = getClient();
143
+ const response = await ai.models.generateContent({
144
+ model: "gemini-3-flash-preview",
145
+ contents: params.query,
146
+ config: {
147
+ tools: [{ googleSearch: {} }],
148
+ abortSignal: signal,
149
+ },
150
+ });
151
+
152
+ // Extract answer text
153
+ const answer = response.text ?? "";
154
+
155
+ // Extract grounding metadata
156
+ const candidate = response.candidates?.[0];
157
+ const grounding = candidate?.groundingMetadata;
158
+
159
+ // Parse sources from grounding chunks
160
+ const sources: SearchSource[] = [];
161
+ const seenTitles = new Set<string>();
162
+ if (grounding?.groundingChunks) {
163
+ for (const chunk of grounding.groundingChunks) {
164
+ if (chunk.web) {
165
+ const title = chunk.web.title ?? "Untitled";
166
+ // Dedupe by title since URIs are redirect URLs that differ per call
167
+ if (seenTitles.has(title)) continue;
168
+ seenTitles.add(title);
169
+ // domain field is not available via Gemini API, use title as fallback
170
+ // (title is typically the domain name, e.g. "wikipedia.org")
171
+ const domain = chunk.web.domain ?? title;
172
+ sources.push({
173
+ title,
174
+ uri: chunk.web.uri ?? "",
175
+ domain,
176
+ });
177
+ }
178
+ }
179
+ }
180
+
181
+ // Extract search queries Gemini actually performed
182
+ const searchQueries = grounding?.webSearchQueries ?? [];
183
+
184
+ result = { answer, sources, searchQueries, cached: false };
185
+ } catch (err: unknown) {
186
+ const msg = err instanceof Error ? err.message : String(err);
187
+
188
+ let errorType = "api_error";
189
+ if (msg.includes("401") || msg.includes("UNAUTHENTICATED")) {
190
+ errorType = "auth_error";
191
+ } else if (msg.includes("429") || msg.includes("RESOURCE_EXHAUSTED") || msg.includes("quota")) {
192
+ errorType = "rate_limit";
193
+ }
194
+
195
+ return {
196
+ content: [
197
+ {
198
+ type: "text",
199
+ text: `Google Search failed (${errorType}): ${msg}`,
200
+ },
201
+ ],
202
+ isError: true,
203
+ details: {
204
+ query: params.query,
205
+ sourceCount: 0,
206
+ cached: false,
207
+ durationMs: Date.now() - startTime,
208
+ error: `${errorType}: ${msg}`,
209
+ } as SearchDetails,
210
+ };
211
+ }
212
+
213
+ // Cache the result
214
+ resultCache.set(key, result);
215
+
216
+ // Format and truncate output
217
+ const rawOutput = formatOutput(result, maxSources);
218
+ const truncation = truncateHead(rawOutput, {
219
+ maxLines: DEFAULT_MAX_LINES,
220
+ maxBytes: DEFAULT_MAX_BYTES,
221
+ });
222
+
223
+ let finalText = truncation.content;
224
+ if (truncation.truncated) {
225
+ finalText +=
226
+ `\n\n[Truncated: showing ${truncation.outputLines}/${truncation.totalLines} lines` +
227
+ ` (${formatSize(truncation.outputBytes)} of ${formatSize(truncation.totalBytes)})]`;
228
+ }
229
+
230
+ return {
231
+ content: [{ type: "text", text: finalText }],
232
+ details: {
233
+ query: params.query,
234
+ sourceCount: result.sources.length,
235
+ cached: false,
236
+ durationMs: Date.now() - startTime,
237
+ } as SearchDetails,
238
+ };
239
+ },
240
+
241
+ renderCall(args, theme) {
242
+ let text = theme.fg("toolTitle", theme.bold("google_search "));
243
+ text += theme.fg("accent", `"${args.query}"`);
244
+ return new Text(text, 0, 0);
245
+ },
246
+
247
+ renderResult(result, { isPartial, expanded }, theme) {
248
+ const d = result.details as SearchDetails | undefined;
249
+
250
+ if (isPartial) return new Text(theme.fg("warning", "Searching Google..."), 0, 0);
251
+ if (result.isError || d?.error) {
252
+ return new Text(theme.fg("error", `Error: ${d?.error ?? "unknown"}`), 0, 0);
253
+ }
254
+
255
+ let text = theme.fg("success", `${d?.sourceCount ?? 0} sources`);
256
+ text += theme.fg("dim", ` (${d?.durationMs ?? 0}ms)`);
257
+ if (d?.cached) text += theme.fg("dim", " · cached");
258
+
259
+ if (expanded) {
260
+ const content = result.content[0];
261
+ if (content?.type === "text") {
262
+ const preview = content.text.split("\n").slice(0, 8).join("\n");
263
+ text += "\n\n" + theme.fg("dim", preview);
264
+ if (content.text.split("\n").length > 8) {
265
+ text += "\n" + theme.fg("muted", "...");
266
+ }
267
+ }
268
+ }
269
+
270
+ return new Text(text, 0, 0);
271
+ },
272
+ });
273
+
274
+ // ── Startup notification ─────────────────────────────────────────────────
275
+
276
+ pi.on("session_start", async (_event, ctx) => {
277
+ if (!process.env.GEMINI_API_KEY) {
278
+ ctx.ui.notify(
279
+ "Google Search: No GEMINI_API_KEY set. The google_search tool will not work until this is configured.",
280
+ "warning",
281
+ );
282
+ }
283
+ });
284
+ }
285
+
286
+ // ── Output formatting ────────────────────────────────────────────────────────
287
+
288
+ function formatOutput(result: SearchResult, maxSources: number): string {
289
+ const lines: string[] = [];
290
+
291
+ // Answer
292
+ if (result.answer) {
293
+ lines.push(result.answer);
294
+ } else {
295
+ lines.push("(No answer text returned from search)");
296
+ }
297
+
298
+ // Sources
299
+ if (result.sources.length > 0) {
300
+ lines.push("");
301
+ lines.push("Sources:");
302
+ const sourcesToShow = result.sources.slice(0, maxSources);
303
+ for (let i = 0; i < sourcesToShow.length; i++) {
304
+ const s = sourcesToShow[i];
305
+ lines.push(`[${i + 1}] ${s.title} - ${s.domain}`);
306
+ lines.push(` ${s.uri}`);
307
+ }
308
+ if (result.sources.length > maxSources) {
309
+ lines.push(`(${result.sources.length - maxSources} more sources omitted)`);
310
+ }
311
+ } else {
312
+ lines.push("");
313
+ lines.push("(No source URLs found in grounding metadata)");
314
+ }
315
+
316
+ // Search queries
317
+ if (result.searchQueries.length > 0) {
318
+ lines.push("");
319
+ lines.push(`Searches performed: ${result.searchQueries.map((q) => `"${q}"`).join(", ")}`);
320
+ }
321
+
322
+ return lines.join("\n");
323
+ }
@@ -0,0 +1,9 @@
1
+ {
2
+ "name": "pi-extension-google-search",
3
+ "private": true,
4
+ "version": "1.0.0",
5
+ "type": "module",
6
+ "pi": {
7
+ "extensions": ["./index.ts"]
8
+ }
9
+ }
@@ -26,7 +26,6 @@ export default function (pi: ExtensionAPI) {
26
26
 
27
27
  let active = false;
28
28
  let recognizerProcess: ChildProcess | null = null;
29
- let finalized = "";
30
29
  let flashOn = true;
31
30
  let flashTimer: ReturnType<typeof setInterval> | null = null;
32
31
  let footerTui: { requestRender: () => void } | null = null;
@@ -122,7 +121,6 @@ export default function (pi: ExtensionAPI) {
122
121
  }
123
122
 
124
123
  active = true;
125
- finalized = "";
126
124
  setVoiceFooter(ctx, true);
127
125
  await runVoiceSession(ctx);
128
126
  }
@@ -161,14 +159,15 @@ export default function (pi: ExtensionAPI) {
161
159
 
162
160
  async function runVoiceSession(ctx: ExtensionContext): Promise<void> {
163
161
  return new Promise<void>((resolve) => {
162
+ // The Swift recognizer handles accumulation across pause-induced
163
+ // transcription resets. Both PARTIAL and FINAL messages contain
164
+ // the full accumulated text, so we just pass them through.
164
165
  startRecognizer(
165
166
  (text) => {
166
- const full = finalized + (finalized && text ? " " : "") + text;
167
- ctx.ui.setEditorText(full);
167
+ ctx.ui.setEditorText(text);
168
168
  },
169
169
  (text) => {
170
- finalized = (finalized ? finalized + " " : "") + text;
171
- ctx.ui.setEditorText(finalized);
170
+ ctx.ui.setEditorText(text);
172
171
  },
173
172
  (msg) => ctx.ui.notify(`Voice: ${msg}`, "error"),
174
173
  () => {},
@@ -45,15 +45,93 @@ do {
45
45
  exit(1)
46
46
  }
47
47
 
48
- var lastText = ""
48
+ // Accumulated finalized text from previous recognition segments.
49
+ // On-device recognition (especially macOS/iOS 18+) can reset
50
+ // bestTranscription.formattedString after a pause, discarding
51
+ // previous text. We detect this by tracking the last known good
52
+ // text and noticing when the new text is shorter / doesn't start
53
+ // with the previous text. When that happens we treat the previous
54
+ // text as finalized and start accumulating the new segment on top.
55
+ var accumulated = ""
56
+ var lastPartialText = ""
57
+ var lastEmitted = ""
49
58
 
50
59
  recognizer.recognitionTask(with: request) { result, error in
51
60
  if let result = result {
52
61
  let text = result.bestTranscription.formattedString
53
- if text != lastText {
54
- lastText = text
55
- let prefix = result.isFinal ? "FINAL" : "PARTIAL"
56
- print("\(prefix):\(text)")
62
+
63
+ if result.isFinal {
64
+ // True final from the recognizer commit everything
65
+ let full: String
66
+ // Check if the final text already includes accumulated content
67
+ // (some OS versions give cumulative finals, others reset)
68
+ if !accumulated.isEmpty && !text.lowercased().hasPrefix(accumulated.lowercased()) {
69
+ full = accumulated + " " + text
70
+ } else if !accumulated.isEmpty && text.count < accumulated.count {
71
+ // Final is shorter than what we accumulated — use accumulated + new
72
+ full = accumulated + " " + text
73
+ } else {
74
+ full = text
75
+ }
76
+ accumulated = ""
77
+ lastPartialText = ""
78
+ if full != lastEmitted {
79
+ lastEmitted = full
80
+ print("FINAL:\(full)")
81
+ }
82
+ return
83
+ }
84
+
85
+ // Detect transcription reset: if the new partial text is significantly
86
+ // shorter than what we had, or doesn't start with the previous text,
87
+ // the recognizer has reset after a pause. Finalize what we had.
88
+ let prevText = lastPartialText
89
+ if !prevText.isEmpty && !text.isEmpty {
90
+ let prevWords = prevText.split(separator: " ")
91
+ let newWords = text.split(separator: " ")
92
+
93
+ // Reset detection: new text has fewer words than previous AND
94
+ // the first few words don't match (i.e. it's truly new speech,
95
+ // not just the recognizer revising the last word)
96
+ let looksLikeReset: Bool
97
+ if newWords.count < prevWords.count / 2 {
98
+ // Significant drop in word count — likely a reset
99
+ looksLikeReset = true
100
+ } else if newWords.count < prevWords.count &&
101
+ !prevWords.isEmpty && !newWords.isEmpty &&
102
+ newWords[0] != prevWords[0] {
103
+ // Different starting word + fewer words — reset
104
+ looksLikeReset = true
105
+ } else {
106
+ looksLikeReset = false
107
+ }
108
+
109
+ if looksLikeReset {
110
+ // Commit the previous partial text to accumulated
111
+ if accumulated.isEmpty {
112
+ accumulated = prevText
113
+ } else {
114
+ accumulated = accumulated + " " + prevText
115
+ }
116
+ // Emit a FINAL for the committed text so the TS side updates
117
+ print("FINAL:\(accumulated)")
118
+ lastEmitted = accumulated
119
+ }
120
+ }
121
+
122
+ lastPartialText = text
123
+
124
+ // Build the full display text
125
+ let displayText: String
126
+ if accumulated.isEmpty {
127
+ displayText = text
128
+ } else {
129
+ displayText = accumulated + " " + text
130
+ }
131
+
132
+ if displayText != lastEmitted {
133
+ lastEmitted = displayText
134
+ print("PARTIAL:\(displayText)")
57
135
  }
58
136
  }
59
137
  if let error = error {