@codexstar/pi-listen 1.0.12 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,8 @@ export interface VoiceConfig {
31
31
  scope: VoiceSettingsScope;
32
32
  btwEnabled: boolean;
33
33
  onboarding: VoiceOnboardingState;
34
+ /** Deepgram API key — stored in config so it's available even when env var isn't set */
35
+ deepgramApiKey?: string;
34
36
  }
35
37
 
36
38
  export interface LoadedVoiceConfig {
@@ -60,6 +62,7 @@ export const DEFAULT_CONFIG: VoiceConfig = {
60
62
  model: "small",
61
63
  scope: "global",
62
64
  btwEnabled: true,
65
+ deepgramApiKey: undefined,
63
66
  onboarding: {
64
67
  completed: false,
65
68
  schemaVersion: VOICE_CONFIG_VERSION,
@@ -121,6 +124,7 @@ function migrateConfig(rawVoice: any, source: VoiceConfigSource): VoiceConfig {
121
124
  model: typeof rawVoice.model === "string" ? rawVoice.model : DEFAULT_CONFIG.model,
122
125
  scope: (rawVoice.scope as VoiceSettingsScope | undefined) ?? (source === "project" ? "project" : "global"),
123
126
  btwEnabled: typeof rawVoice.btwEnabled === "boolean" ? rawVoice.btwEnabled : DEFAULT_CONFIG.btwEnabled,
127
+ deepgramApiKey: typeof rawVoice.deepgramApiKey === "string" ? rawVoice.deepgramApiKey : undefined,
124
128
  onboarding: normalizeOnboarding(rawVoice.onboarding, fallbackCompleted),
125
129
  };
126
130
  }
@@ -1,22 +1,27 @@
1
1
  /**
2
- * pi-voice — Voice input + BTW side conversations for Pi CLI.
2
+ * pi-voice — Deepgram WebSocket streaming STT for Pi CLI.
3
3
  *
4
- * Features:
5
- * 1. Hold-spacebar to talk (Kitty protocol key release detection)
6
- * Fallback: Ctrl+Shift+V toggle for non-Kitty terminals
7
- * 2. BTW side conversations (/btw <msg>, /btw:new, /btw:clear, /btw:inject, /btw:summarize)
8
- * 3. Voice BTW glue: Ctrl+Shift+B = hold to record → auto-send as /btw
4
+ * Architecture (modeled after Claude Code's voice pipeline):
5
+ * 1. SoX `rec` captures mic audio as raw PCM (16kHz, mono, 16-bit)
6
+ * and pipes it to stdout (no file).
7
+ * 2. Raw PCM chunks are streamed over a WebSocket to Deepgram Nova 3.
8
+ * 3. Deepgram returns interim + final transcripts in real-time.
9
+ * 4. Interim transcripts update a live widget above the editor.
10
+ * 5. On key-release (or toggle stop), a CloseStream message is sent;
11
+ * final transcript is injected into the editor.
9
12
  *
10
- * Records audio via SoX, transcribes via persistent daemon (daemon.py) or fallback subprocess.
11
- * STT backends: faster-whisper, moonshine, whisper.cpp, deepgram, parakeet.
13
+ * Activation:
14
+ * - Hold SPACE (empty editor) release to finalize
15
+ * - Ctrl+Shift+V → toggle start/stop (fallback for non-Kitty terminals)
16
+ * - Ctrl+Shift+B → hold to record → auto-send as /btw
12
17
  *
13
- * Config in ~/.pi/agent/settings.json or <project>/.pi/settings.json:
18
+ * Config in ~/.pi/agent/settings.json:
14
19
  * {
15
20
  * "voice": {
16
21
  * "enabled": true,
17
22
  * "language": "en",
18
- * "backend": "faster-whisper",
19
- * "model": "small"
23
+ * "backend": "deepgram",
24
+ * "model": "nova-3"
20
25
  * }
21
26
  * }
22
27
  */
@@ -65,6 +70,14 @@ interface BtwExchange {
65
70
  // ─── Constants ───────────────────────────────────────────────────────────────
66
71
 
67
72
  const SAMPLE_RATE = 16000;
73
+ const CHANNELS = 1;
74
+ const ENCODING = "linear16";
75
+ const DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
76
+ const KEEPALIVE_INTERVAL_MS = 8000;
77
+ const FINALIZE_SAFETY_TIMEOUT_MS = 5000;
78
+ const FINALIZE_NO_DATA_TIMEOUT_MS = 1500;
79
+ const MAX_RECORDING_SECS = 120; // 2 minutes safety cap (streaming is efficient)
80
+
68
81
  const EXT_DIR = path.dirname(new URL(import.meta.url).pathname);
69
82
  const PROJECT_ROOT = path.join(EXT_DIR, "..");
70
83
  const DAEMON_SCRIPT = path.join(PROJECT_ROOT, "daemon.py");
@@ -74,7 +87,7 @@ function commandExists(cmd: string): boolean {
74
87
  return spawnSync("which", [cmd], { stdio: "pipe", timeout: 3000 }).status === 0;
75
88
  }
76
89
 
77
- // ─── Daemon Communication ────────────────────────────────────────────────────
90
+ // ─── Daemon Communication (kept for non-deepgram local backends) ─────────────
78
91
 
79
92
  let activeSocketPath = getSocketPath({
80
93
  scope: DEFAULT_CONFIG.scope,
@@ -135,8 +148,6 @@ async function isDaemonRunning(socketPath = activeSocketPath): Promise<boolean>
135
148
  async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
136
149
  if (await isDaemonRunning(activeSocketPath)) {
137
150
  const status = await daemonSend({ cmd: "status" }, 3000, activeSocketPath);
138
- // When backend is 'auto', accept any loaded backend — the daemon already
139
- // resolved 'auto' to a concrete backend, so we don't need to reload.
140
151
  if (config.backend === "auto" || (status.backend === config.backend && status.model === config.model)) return true;
141
152
  const reloaded = await daemonSend({
142
153
  cmd: "load",
@@ -175,7 +186,6 @@ async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
175
186
 
176
187
  proc.on("error", () => resolve(false));
177
188
 
178
- // Timeout: if daemon doesn't start in 10s, kill orphan and fall back
179
189
  setTimeout(() => {
180
190
  if (!started) {
181
191
  try { proc.kill(); } catch {}
@@ -185,46 +195,40 @@ async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
185
195
  });
186
196
  }
187
197
 
188
- // ─── Audio Recording ─────────────────────────────────────────────────────────
198
+ // ─── Legacy file-based transcription (for non-deepgram backends) ─────────────
189
199
 
190
- let recProcess: ChildProcess | null = null;
200
+ let legacyRecProcess: ChildProcess | null = null;
191
201
 
192
- function startRecordingToFile(outPath: string): boolean {
193
- if (recProcess) {
194
- recProcess.kill("SIGTERM");
195
- recProcess = null;
202
+ function startLegacyRecordingToFile(outPath: string): boolean {
203
+ if (legacyRecProcess) {
204
+ legacyRecProcess.kill("SIGTERM");
205
+ legacyRecProcess = null;
196
206
  }
197
-
198
207
  if (!commandExists("rec")) return false;
199
-
200
- recProcess = spawn("rec", [
208
+ legacyRecProcess = spawn("rec", [
201
209
  "-q", "-r", String(SAMPLE_RATE), "-c", "1", "-b", "16", outPath,
202
210
  ], { stdio: ["pipe", "pipe", "pipe"] });
203
-
204
- recProcess.stderr?.on("data", () => {});
205
- recProcess.on("error", () => { recProcess = null; });
211
+ legacyRecProcess.stderr?.on("data", () => {});
212
+ legacyRecProcess.on("error", () => { legacyRecProcess = null; });
206
213
  return true;
207
214
  }
208
215
 
209
- function stopRecording(): Promise<void> {
216
+ function stopLegacyRecording(): Promise<void> {
210
217
  return new Promise((resolve) => {
211
- if (!recProcess) { resolve(); return; }
212
- recProcess.on("close", () => { recProcess = null; resolve(); });
213
- recProcess.kill("SIGTERM");
218
+ if (!legacyRecProcess) { resolve(); return; }
219
+ legacyRecProcess.on("close", () => { legacyRecProcess = null; resolve(); });
220
+ legacyRecProcess.kill("SIGTERM");
214
221
  setTimeout(() => {
215
- if (recProcess) { recProcess.kill("SIGKILL"); recProcess = null; }
222
+ if (legacyRecProcess) { legacyRecProcess.kill("SIGKILL"); legacyRecProcess = null; }
216
223
  resolve();
217
224
  }, 2000);
218
225
  });
219
226
  }
220
227
 
221
- // ─── Transcription (daemon or fallback) ──────────────────────────────────────
222
-
223
- async function transcribeAudio(
228
+ async function transcribeAudioFile(
224
229
  audioPath: string,
225
230
  config: VoiceConfig,
226
231
  ): Promise<{ text: string; duration: number; error?: string }> {
227
- // Try daemon first
228
232
  if (await isDaemonRunning()) {
229
233
  const resp = await daemonSend({
230
234
  cmd: "transcribe",
@@ -238,13 +242,10 @@ async function transcribeAudio(
238
242
  return resp as { text: string; duration: number };
239
243
  }
240
244
  }
241
-
242
- // Fallback: direct subprocess
243
245
  return new Promise((resolve) => {
244
246
  const args = [TRANSCRIBE_SCRIPT, "--language", config.language, audioPath];
245
247
  if (config.backend !== "auto") args.splice(1, 0, "--backend", config.backend);
246
248
  if (config.model) args.splice(1, 0, "--model", config.model);
247
-
248
249
  const proc = spawn("python3", args, { stdio: ["pipe", "pipe", "pipe"] });
249
250
  let stdout = "";
250
251
  let stderr = "";
@@ -258,6 +259,250 @@ async function transcribeAudio(
258
259
  });
259
260
  }
260
261
 
262
+ // ─── Deepgram WebSocket Streaming ────────────────────────────────────────────
263
+
264
+ interface StreamingSession {
265
+ ws: WebSocket;
266
+ recProcess: ChildProcess;
267
+ interimText: string; // Current interim (partial) transcript
268
+ finalizedParts: string[]; // All finalized transcript segments
269
+ keepAliveTimer: ReturnType<typeof setInterval> | null;
270
+ closed: boolean;
271
+ onTranscript: (interim: string, finals: string[]) => void;
272
+ onDone: (fullText: string) => void;
273
+ onError: (err: string) => void;
274
+ }
275
+
276
+ function getDeepgramApiKey(): string | null {
277
+ // Priority: env var → config file → null
278
+ return process.env.DEEPGRAM_API_KEY || null;
279
+ }
280
+
281
+ /**
282
+ * Resolve the Deepgram API key from all sources:
283
+ * 1. process.env.DEEPGRAM_API_KEY (shell)
284
+ * 2. config.deepgramApiKey (settings.json, persisted at setup time)
285
+ */
286
+ function resolveDeepgramApiKey(config: VoiceConfig): string | null {
287
+ return process.env.DEEPGRAM_API_KEY || config.deepgramApiKey || null;
288
+ }
289
+
290
+ function isDeepgramStreaming(config: VoiceConfig): boolean {
291
+ const key = resolveDeepgramApiKey(config);
292
+ if (!key) return false;
293
+ // Use streaming for deepgram backend, or auto mode when deepgram key is available
294
+ return config.backend === "deepgram" || (config.backend === "auto" && !!key);
295
+ }
296
+
297
+ function buildDeepgramWsUrl(config: VoiceConfig): string {
298
+ const params = new URLSearchParams({
299
+ encoding: ENCODING,
300
+ sample_rate: String(SAMPLE_RATE),
301
+ channels: String(CHANNELS),
302
+ endpointing: "300", // ms of silence before phrase boundary
303
+ utterance_end_ms: "1000", // ms of silence before utterance is complete
304
+ language: config.language || "en",
305
+ model: config.model || "nova-3",
306
+ smart_format: "true",
307
+ interim_results: "true",
308
+ });
309
+ return `${DEEPGRAM_WS_URL}?${params.toString()}`;
310
+ }
311
+
312
+ function startStreamingSession(
313
+ config: VoiceConfig,
314
+ callbacks: {
315
+ onTranscript: (interim: string, finals: string[]) => void;
316
+ onDone: (fullText: string) => void;
317
+ onError: (err: string) => void;
318
+ },
319
+ ): StreamingSession | null {
320
+ const apiKey = resolveDeepgramApiKey(config);
321
+ if (!apiKey) {
322
+ callbacks.onError("DEEPGRAM_API_KEY not set");
323
+ return null;
324
+ }
325
+
326
+ if (!commandExists("rec")) {
327
+ callbacks.onError("Voice requires SoX. Install: brew install sox");
328
+ return null;
329
+ }
330
+
331
+ // Start SoX streaming raw PCM to stdout (no file)
332
+ const recProc = spawn("rec", [
333
+ "-q",
334
+ "-r", String(SAMPLE_RATE),
335
+ "-c", String(CHANNELS),
336
+ "-b", "16",
337
+ "-e", "signed-integer",
338
+ "-t", "raw",
339
+ "-", // output to stdout
340
+ ], { stdio: ["pipe", "pipe", "pipe"] });
341
+
342
+ recProc.stderr?.on("data", () => {}); // suppress SoX warnings
343
+
344
+ // Connect WebSocket to Deepgram
345
+ const wsUrl = buildDeepgramWsUrl(config);
346
+ const ws = new WebSocket(wsUrl, {
347
+ headers: {
348
+ "Authorization": `Token ${apiKey}`,
349
+ },
350
+ } as any);
351
+
352
+ const session: StreamingSession = {
353
+ ws,
354
+ recProcess: recProc,
355
+ interimText: "",
356
+ finalizedParts: [],
357
+ keepAliveTimer: null,
358
+ closed: false,
359
+ onTranscript: callbacks.onTranscript,
360
+ onDone: callbacks.onDone,
361
+ onError: callbacks.onError,
362
+ };
363
+
364
+ ws.onopen = () => {
365
+ // Send initial KeepAlive
366
+ try { ws.send(JSON.stringify({ type: "KeepAlive" })); } catch {}
367
+
368
+ // Start keepalive timer
369
+ session.keepAliveTimer = setInterval(() => {
370
+ if (ws.readyState === WebSocket.OPEN) {
371
+ try { ws.send(JSON.stringify({ type: "KeepAlive" })); } catch {}
372
+ }
373
+ }, KEEPALIVE_INTERVAL_MS);
374
+
375
+ // Pipe SoX stdout → WebSocket as binary frames
376
+ recProc.stdout?.on("data", (chunk: Buffer) => {
377
+ if (ws.readyState === WebSocket.OPEN) {
378
+ try { ws.send(chunk); } catch {}
379
+ }
380
+ });
381
+ };
382
+
383
+ ws.onmessage = (event: MessageEvent) => {
384
+ try {
385
+ const msg = typeof event.data === "string" ? JSON.parse(event.data) : null;
386
+ if (!msg) return;
387
+
388
+ if (msg.type === "Results") {
389
+ const alt = msg.channel?.alternatives?.[0];
390
+ const transcript = alt?.transcript || "";
391
+
392
+ if (msg.is_final) {
393
+ // Final result for this audio segment
394
+ if (transcript.trim()) {
395
+ session.finalizedParts.push(transcript.trim());
396
+ }
397
+ session.interimText = "";
398
+ } else {
399
+ // Interim result — live update
400
+ session.interimText = transcript;
401
+ }
402
+
403
+ session.onTranscript(session.interimText, session.finalizedParts);
404
+
405
+ // If speech_final is true, it's the end of an utterance
406
+ // (similar to TranscriptEndpoint in Claude Code's protocol)
407
+ if (msg.speech_final && transcript.trim()) {
408
+ // Already added to finalizedParts above when is_final was true
409
+ }
410
+ } else if (msg.type === "Metadata") {
411
+ // Connection metadata — ignore
412
+ } else if (msg.type === "UtteranceEnd") {
413
+ // Utterance boundary — Deepgram detected end of speech
414
+ // Nothing extra needed, is_final already handles finalization
415
+ } else if (msg.type === "Error" || msg.type === "error") {
416
+ session.onError(msg.message || msg.description || "Deepgram error");
417
+ }
418
+ } catch (e: any) {
419
+ // Ignore parse errors for binary data
420
+ }
421
+ };
422
+
423
+ ws.onerror = (event: Event) => {
424
+ if (!session.closed) {
425
+ session.onError("WebSocket connection error");
426
+ }
427
+ };
428
+
429
+ ws.onclose = () => {
430
+ if (!session.closed) {
431
+ finalizeSession(session);
432
+ }
433
+ };
434
+
435
+ recProc.on("error", (err) => {
436
+ session.onError(`SoX error: ${err.message}`);
437
+ });
438
+
439
+ recProc.on("close", () => {
440
+ // SoX stopped — send CloseStream to Deepgram
441
+ if (ws.readyState === WebSocket.OPEN) {
442
+ try { ws.send(JSON.stringify({ type: "CloseStream" })); } catch {}
443
+ }
444
+ });
445
+
446
+ return session;
447
+ }
448
+
449
+ function stopStreamingSession(session: StreamingSession): void {
450
+ if (session.closed) return;
451
+
452
+ // Stop the microphone
453
+ try { session.recProcess.kill("SIGTERM"); } catch {}
454
+
455
+ // CloseStream tells Deepgram to flush remaining audio
456
+ if (session.ws.readyState === WebSocket.OPEN) {
457
+ try { session.ws.send(JSON.stringify({ type: "CloseStream" })); } catch {}
458
+ }
459
+
460
+ // Safety: finalize after timeout even if Deepgram doesn't respond
461
+ setTimeout(() => {
462
+ if (!session.closed) {
463
+ finalizeSession(session);
464
+ }
465
+ }, FINALIZE_SAFETY_TIMEOUT_MS);
466
+
467
+ // Shorter timeout: if no new data arrives for 1.5s, assume done
468
+ let lastDataTime = Date.now();
469
+ const origOnMessage = session.ws.onmessage;
470
+ session.ws.onmessage = (event: MessageEvent) => {
471
+ lastDataTime = Date.now();
472
+ if (origOnMessage) origOnMessage.call(session.ws, event);
473
+ };
474
+
475
+ const noDataCheck = setInterval(() => {
476
+ if (Date.now() - lastDataTime > FINALIZE_NO_DATA_TIMEOUT_MS) {
477
+ clearInterval(noDataCheck);
478
+ if (!session.closed) {
479
+ finalizeSession(session);
480
+ }
481
+ }
482
+ }, 500);
483
+ }
484
+
485
+ function finalizeSession(session: StreamingSession): void {
486
+ if (session.closed) return;
487
+ session.closed = true;
488
+
489
+ // Clean up keepalive
490
+ if (session.keepAliveTimer) {
491
+ clearInterval(session.keepAliveTimer);
492
+ session.keepAliveTimer = null;
493
+ }
494
+
495
+ // Close WebSocket
496
+ try { session.ws.close(); } catch {}
497
+
498
+ // Kill SoX if still running
499
+ try { session.recProcess.kill("SIGKILL"); } catch {}
500
+
501
+ // Deliver final transcript
502
+ const fullText = session.finalizedParts.join(" ").trim();
503
+ session.onDone(fullText);
504
+ }
505
+
261
506
  // ─── Extension ───────────────────────────────────────────────────────────────
262
507
 
263
508
  export default function (pi: ExtensionAPI) {
@@ -272,6 +517,10 @@ export default function (pi: ExtensionAPI) {
272
517
  let terminalInputUnsub: (() => void) | null = null;
273
518
  let isHolding = false;
274
519
 
520
+ // Streaming session state
521
+ let activeSession: StreamingSession | null = null;
522
+ let currentTarget: "editor" | "btw" = "editor";
523
+
275
524
  // ─── BTW State ───────────────────────────────────────────────────────────
276
525
 
277
526
  let btwThread: BtwExchange[] = [];
@@ -289,17 +538,19 @@ export default function (pi: ExtensionAPI) {
289
538
  }
290
539
  const modeTag = !config.onboarding.completed
291
540
  ? "SETUP"
292
- : config.mode === "api"
293
- ? "API"
294
- : config.mode === "local"
295
- ? "LOCAL"
296
- : "AUTO";
541
+ : isDeepgramStreaming(config)
542
+ ? "STREAM"
543
+ : config.mode === "api"
544
+ ? "API"
545
+ : config.mode === "local"
546
+ ? "LOCAL"
547
+ : "AUTO";
297
548
  ctx.ui.setStatus("voice", `MIC ${modeTag}`);
298
549
  break;
299
550
  }
300
551
  case "recording": {
301
552
  const secs = Math.round((Date.now() - recordingStart) / 1000);
302
- ctx.ui.setStatus("voice", `REC ${secs}s`);
553
+ ctx.ui.setStatus("voice", `🔴 REC ${secs}s`);
303
554
  break;
304
555
  }
305
556
  case "transcribing":
@@ -315,7 +566,11 @@ export default function (pi: ExtensionAPI) {
315
566
 
316
567
  function voiceCleanup() {
317
568
  if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
318
- if (recProcess) { recProcess.kill("SIGTERM"); recProcess = null; }
569
+ if (activeSession) {
570
+ finalizeSession(activeSession);
571
+ activeSession = null;
572
+ }
573
+ if (legacyRecProcess) { legacyRecProcess.kill("SIGTERM"); legacyRecProcess = null; }
319
574
  if (tempFile) { try { fs.unlinkSync(tempFile); } catch {} tempFile = null; }
320
575
  isHolding = false;
321
576
  setVoiceState("idle");
@@ -332,7 +587,7 @@ export default function (pi: ExtensionAPI) {
332
587
  const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
333
588
  const provisioningPlan = buildProvisioningPlan(nextConfig, diagnostics);
334
589
  let validated = provisioningPlan.ready;
335
- if (validated && nextConfig.enabled) {
590
+ if (validated && nextConfig.enabled && !isDeepgramStreaming(nextConfig)) {
336
591
  validated = await ensureDaemon(nextConfig);
337
592
  }
338
593
 
@@ -349,53 +604,173 @@ export default function (pi: ExtensionAPI) {
349
604
  ].join("\n"), validated ? "info" : "warning");
350
605
  }
351
606
 
352
- // ─── Voice: Start / Stop / Transcribe ────────────────────────────────────
607
+ // ─── Live Transcript Widget ──────────────────────────────────────────────
353
608
 
354
- const MAX_RECORDING_SECS = 30; // Safety cap: auto-stop after 30s
609
+ function updateLiveTranscriptWidget(interim: string, finals: string[]) {
610
+ if (!ctx?.hasUI) return;
355
611
 
356
- async function startVoiceRecording(target: "editor" | "btw" = "editor"): Promise<boolean> {
357
- if (voiceState !== "idle" || !ctx) return false;
612
+ const finalized = finals.join(" ");
613
+ const displayText = finalized + (interim ? (finalized ? " " : "") + interim : "");
358
614
 
359
- tempFile = path.join(os.tmpdir(), `pi-voice-${Date.now()}.wav`);
360
- if (!startRecordingToFile(tempFile)) {
361
- ctx.ui.notify("Voice requires SoX. Install: brew install sox", "error");
362
- return false;
615
+ if (!displayText.trim()) {
616
+ ctx.ui.setWidget("voice-recording", [
617
+ " 🎙 Listening... (speak now)",
618
+ ], { placement: "aboveEditor" });
619
+ return;
620
+ }
621
+
622
+ // Show the live transcript — last 3 lines max
623
+ const words = displayText.split(" ");
624
+ const lines: string[] = [];
625
+ let currentLine = " 🎙 ";
626
+ const maxLineLen = 70;
627
+
628
+ for (const word of words) {
629
+ if ((currentLine + word).length > maxLineLen) {
630
+ lines.push(currentLine);
631
+ currentLine = " " + word + " ";
632
+ } else {
633
+ currentLine += word + " ";
634
+ }
363
635
  }
636
+ if (currentLine.trim()) lines.push(currentLine);
637
+
638
+ // Keep only last 4 lines to avoid widget overflow
639
+ const visibleLines = lines.slice(-4);
640
+ if (interim) {
641
+ // Show a blinking cursor for interim text
642
+ const lastIdx = visibleLines.length - 1;
643
+ visibleLines[lastIdx] = visibleLines[lastIdx].trimEnd() + "▍";
644
+ }
645
+
646
+ ctx.ui.setWidget("voice-recording", visibleLines, { placement: "aboveEditor" });
647
+ }
648
+
649
+ // ─── Voice: Start / Stop (Streaming or Legacy) ───────────────────────────
650
+
651
+ async function startVoiceRecording(target: "editor" | "btw" = "editor"): Promise<boolean> {
652
+ if (voiceState !== "idle" || !ctx) return false;
364
653
 
654
+ currentTarget = target;
365
655
  recordingStart = Date.now();
366
- setVoiceState("recording");
367
- statusTimer = setInterval(() => {
368
- if (voiceState === "recording") {
369
- updateVoiceStatus();
370
- // Safety: auto-stop after MAX_RECORDING_SECS
371
- const elapsed = (Date.now() - recordingStart) / 1000;
372
- if (elapsed >= MAX_RECORDING_SECS) {
373
- isHolding = false;
374
- stopVoiceRecording(target);
656
+
657
+ if (isDeepgramStreaming(config)) {
658
+ // === STREAMING PATH === (Deepgram WebSocket)
659
+ setVoiceState("recording");
660
+
661
+ const session = startStreamingSession(config, {
662
+ onTranscript: (interim, finals) => {
663
+ updateLiveTranscriptWidget(interim, finals);
664
+ updateVoiceStatus();
665
+ },
666
+ onDone: (fullText) => {
667
+ activeSession = null;
668
+ ctx?.ui.setWidget("voice-recording", undefined);
669
+
670
+ if (!fullText.trim()) {
671
+ ctx?.ui.notify("No speech detected.", "warning");
672
+ setVoiceState("idle");
673
+ return;
674
+ }
675
+
676
+ if (target === "btw") {
677
+ handleBtw(fullText);
678
+ } else {
679
+ if (ctx?.hasUI) {
680
+ const existing = ctx.ui.getEditorText();
681
+ ctx.ui.setEditorText(existing ? existing + " " + fullText : fullText);
682
+ const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
683
+ ctx.ui.notify(
684
+ `STT (${elapsed}s): ${fullText.slice(0, 80)}${fullText.length > 80 ? "..." : ""}`,
685
+ "info",
686
+ );
687
+ }
688
+ }
689
+ setVoiceState("idle");
690
+ },
691
+ onError: (err) => {
692
+ activeSession = null;
693
+ ctx?.ui.setWidget("voice-recording", undefined);
694
+ ctx?.ui.notify(`STT error: ${err}`, "error");
695
+ setVoiceState("idle");
696
+ },
697
+ });
698
+
699
+ if (!session) {
700
+ setVoiceState("idle");
701
+ return false;
702
+ }
703
+
704
+ activeSession = session;
705
+
706
+ // Status timer for elapsed time
707
+ statusTimer = setInterval(() => {
708
+ if (voiceState === "recording") {
709
+ updateVoiceStatus();
710
+ const elapsed = (Date.now() - recordingStart) / 1000;
711
+ if (elapsed >= MAX_RECORDING_SECS) {
712
+ isHolding = false;
713
+ stopVoiceRecording(target);
714
+ }
375
715
  }
716
+ }, 1000);
717
+
718
+ if (ctx.hasUI) {
719
+ ctx.ui.setWidget("voice-recording", [
720
+ " 🎙 Listening... speak now — press SPACE again to stop",
721
+ ], { placement: "aboveEditor" });
376
722
  }
377
- }, 1000);
723
+ return true;
378
724
 
379
- if (ctx.hasUI) {
380
- ctx.ui.setWidget("voice-recording", [
381
- target === "btw"
382
- ? " 🎙 BTW Recording... Ctrl+Shift+V to stop"
383
- : " 🎙 Recording... Ctrl+Shift+V to stop (or release SPACE)",
384
- ], { placement: "aboveEditor" });
725
+ } else {
726
+ // === LEGACY PATH === (file-based for local backends)
727
+ tempFile = path.join(os.tmpdir(), `pi-voice-${Date.now()}.wav`);
728
+ if (!startLegacyRecordingToFile(tempFile)) {
729
+ ctx.ui.notify("Voice requires SoX. Install: brew install sox", "error");
730
+ return false;
731
+ }
732
+
733
+ setVoiceState("recording");
734
+ statusTimer = setInterval(() => {
735
+ if (voiceState === "recording") {
736
+ updateVoiceStatus();
737
+ const elapsed = (Date.now() - recordingStart) / 1000;
738
+ if (elapsed >= MAX_RECORDING_SECS) {
739
+ isHolding = false;
740
+ stopVoiceRecording(target);
741
+ }
742
+ }
743
+ }, 1000);
744
+
745
+ if (ctx.hasUI) {
746
+ ctx.ui.setWidget("voice-recording", [
747
+ target === "btw"
748
+ ? " 🎙 BTW Recording... Ctrl+Shift+V to stop"
749
+ : " 🎙 Recording... Ctrl+Shift+V to stop (or release SPACE)",
750
+ ], { placement: "aboveEditor" });
751
+ }
752
+ return true;
385
753
  }
386
- return true;
387
754
  }
388
755
 
389
756
  async function stopVoiceRecording(target: "editor" | "btw" = "editor") {
390
757
  if (voiceState !== "recording" || !ctx) return;
391
758
  if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
392
759
 
760
+ if (activeSession) {
761
+ // === STREAMING PATH === Stop the stream, finalize will call onDone
762
+ setVoiceState("transcribing");
763
+ stopStreamingSession(activeSession);
764
+ return;
765
+ }
766
+
767
+ // === LEGACY PATH ===
393
768
  const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
394
- const audioFile = tempFile; // capture before cleanup can null it
769
+ const audioFile = tempFile;
395
770
  setVoiceState("transcribing");
396
771
  ctx.ui.setWidget("voice-recording", undefined);
397
772
 
398
- await stopRecording();
773
+ await stopLegacyRecording();
399
774
 
400
775
  if (!audioFile || !fs.existsSync(audioFile)) {
401
776
  ctx.ui.notify("No audio recorded.", "warning");
@@ -412,12 +787,9 @@ export default function (pi: ExtensionAPI) {
412
787
  return;
413
788
  }
414
789
 
415
- // Ensure daemon is up before transcribing — await so the warm path
416
- // is available for this request instead of falling through to the
417
- // cold subprocess fallback.
418
790
  await ensureDaemon(config).catch(() => {});
419
791
 
420
- const result = await transcribeAudio(audioFile, config);
792
+ const result = await transcribeAudioFile(audioFile, config);
421
793
  try { fs.unlinkSync(audioFile); } catch {}
422
794
  if (tempFile === audioFile) tempFile = null;
423
795
 
@@ -437,7 +809,6 @@ export default function (pi: ExtensionAPI) {
437
809
  if (target === "btw") {
438
810
  await handleBtw(transcript);
439
811
  } else {
440
- // Inject into editor
441
812
  if (ctx.hasUI) {
442
813
  const existing = ctx.ui.getEditorText();
443
814
  ctx.ui.setEditorText(existing ? existing + " " + transcript : transcript);
@@ -451,25 +822,38 @@ export default function (pi: ExtensionAPI) {
451
822
  setVoiceState("idle");
452
823
  }
453
824
 
454
- // ─── Hold-to-talk via Kitty protocol ─────────────────────────────────────
825
+ // ─── Hold-to-talk / Toggle-to-talk ──────────────────────────────────────
826
+ //
827
+ // Kitty protocol terminals (Ghostty, WezTerm, Kitty) send key-release
828
+ // events (":3u" sequences), enabling true hold-to-talk.
829
+ //
830
+ // Non-Kitty terminals (Apple Terminal, iTerm2 without config, basic xterm)
831
+ // only send key-press. We detect this and fall back to toggle:
832
+ // 1st SPACE press → start recording
833
+ // 2nd SPACE press → stop recording + transcribe
834
+ //
835
+ // We auto-detect Kitty support: if we see a key-release within the first
836
+ // recording, we know hold-to-talk works. Otherwise, we stay in toggle mode.
837
+
838
+ let kittyReleaseDetected = false; // have we ever seen a Kitty release event?
455
839
 
456
840
  function setupHoldToTalk() {
457
841
  if (!ctx?.hasUI) return;
458
842
 
459
- // Remove previous listener
460
843
  if (terminalInputUnsub) { terminalInputUnsub(); terminalInputUnsub = null; }
461
844
 
462
845
  terminalInputUnsub = ctx.ui.onTerminalInput((data: string) => {
463
846
  if (!config.enabled) return undefined;
464
847
 
465
- // Hold SPACE talk → release → transcribe to editor
848
+ // ── SPACE handling ──
466
849
  if (matchesKey(data, "space")) {
467
- // Only activate when editor is empty (avoid conflicting with typing)
468
850
  const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
469
851
  if (editorText && editorText.trim().length > 0) return undefined;
470
852
 
853
+ // Kitty key-release: stop recording
471
854
  if (isKeyRelease(data)) {
472
- if (isHolding) {
855
+ kittyReleaseDetected = true;
856
+ if (isHolding && voiceState === "recording") {
473
857
  isHolding = false;
474
858
  stopVoiceRecording("editor");
475
859
  return { consume: true };
@@ -477,12 +861,27 @@ export default function (pi: ExtensionAPI) {
477
861
  return undefined;
478
862
  }
479
863
 
864
+ // Kitty key-repeat: suppress while holding
480
865
  if (isKeyRepeat(data)) {
481
866
  if (isHolding) return { consume: true };
482
867
  return undefined;
483
868
  }
484
869
 
485
- // Key press — start recording
870
+ // === Key PRESS ===
871
+
872
+ // Currently recording? → this is the "stop" press (toggle mode)
873
+ if (voiceState === "recording") {
874
+ isHolding = false;
875
+ stopVoiceRecording("editor");
876
+ return { consume: true };
877
+ }
878
+
879
+ // Currently transcribing? → ignore, wait for it to finish
880
+ if (voiceState === "transcribing") {
881
+ return { consume: true };
882
+ }
883
+
884
+ // Idle → start recording
486
885
  if (voiceState === "idle" && !isHolding) {
487
886
  isHolding = true;
488
887
  startVoiceRecording("editor").then((ok) => {
@@ -495,10 +894,11 @@ export default function (pi: ExtensionAPI) {
495
894
  return undefined;
496
895
  }
497
896
 
498
- // Hold Ctrl+Shift+B talk release → auto-btw
897
+ // ── Ctrl+Shift+B handling (BTW voice) ──
499
898
  if (matchesKey(data, "ctrl+shift+b")) {
500
899
  if (isKeyRelease(data)) {
501
- if (isHolding) {
900
+ kittyReleaseDetected = true;
901
+ if (isHolding && voiceState === "recording") {
502
902
  isHolding = false;
503
903
  stopVoiceRecording("btw");
504
904
  return { consume: true };
@@ -511,6 +911,13 @@ export default function (pi: ExtensionAPI) {
511
911
  return undefined;
512
912
  }
513
913
 
914
+ // Toggle: stop if recording
915
+ if (voiceState === "recording") {
916
+ isHolding = false;
917
+ stopVoiceRecording("btw");
918
+ return { consume: true };
919
+ }
920
+
514
921
  if (voiceState === "idle" && !isHolding) {
515
922
  isHolding = true;
516
923
  startVoiceRecording("btw").then((ok) => {
@@ -523,12 +930,6 @@ export default function (pi: ExtensionAPI) {
523
930
  return undefined;
524
931
  }
525
932
 
526
- // Any other key while holding = cancel
527
- if (isHolding && voiceState === "recording") {
528
- // Don't cancel on modifier-only events
529
- return undefined;
530
- }
531
-
532
933
  return undefined;
533
934
  });
534
935
  }
@@ -536,7 +937,6 @@ export default function (pi: ExtensionAPI) {
536
937
  // ─── BTW: Side Conversations ─────────────────────────────────────────────
537
938
 
538
939
  function buildBtwContext(): string {
539
- // Build context from main session + btw thread
540
940
  const systemPrompt = ctx?.getSystemPrompt() ?? "";
541
941
  let btwContext = "You are a helpful side-channel assistant. ";
542
942
  btwContext += "The user is having a parallel conversation while their main Pi agent works. ";
@@ -570,7 +970,6 @@ export default function (pi: ExtensionAPI) {
570
970
  "",
571
971
  ];
572
972
 
573
- // Show last exchange
574
973
  lines.push(` Q: ${last.question.slice(0, 100)}${last.question.length > 100 ? "..." : ""}`);
575
974
  const answerLines = last.answer.split("\n");
576
975
  for (const line of answerLines.slice(0, 8)) {
@@ -589,7 +988,6 @@ export default function (pi: ExtensionAPI) {
589
988
 
590
989
  btwWidgetVisible = true;
591
990
 
592
- // Show thinking state
593
991
  ctx.ui.setWidget("btw", [
594
992
  " BTW",
595
993
  "",
@@ -598,10 +996,8 @@ export default function (pi: ExtensionAPI) {
598
996
  " Thinking...",
599
997
  ], { placement: "aboveEditor" });
600
998
 
601
- // Build context for LLM
602
999
  const btwContext = buildBtwContext();
603
1000
 
604
- // Use the model registry to get current model
605
1001
  const model = ctx.model;
606
1002
  if (!model) {
607
1003
  const exchange: BtwExchange = {
@@ -616,7 +1012,6 @@ export default function (pi: ExtensionAPI) {
616
1012
  }
617
1013
 
618
1014
  try {
619
- // Stream the response
620
1015
  let answer = "";
621
1016
  const eventStream = streamSimple(model, {
622
1017
  systemPrompt: btwContext,
@@ -633,7 +1028,6 @@ export default function (pi: ExtensionAPI) {
633
1028
  break;
634
1029
  }
635
1030
 
636
- // Update widget with streaming response
637
1031
  const displayLines: string[] = [
638
1032
  ` BTW`,
639
1033
  "",
@@ -657,7 +1051,6 @@ export default function (pi: ExtensionAPI) {
657
1051
  pi.appendEntry("btw", exchange);
658
1052
  updateBtwWidget();
659
1053
  } catch (err: any) {
660
- // Fallback: send as a follow-up message to the main agent
661
1054
  const exchange: BtwExchange = {
662
1055
  question: message,
663
1056
  answer: `(BTW streaming failed: ${err.message}. Falling back to sendUserMessage.)`,
@@ -667,7 +1060,6 @@ export default function (pi: ExtensionAPI) {
667
1060
  pi.appendEntry("btw", exchange);
668
1061
  updateBtwWidget();
669
1062
 
670
- // Use sendUserMessage as alternative
671
1063
  pi.sendUserMessage(
672
1064
  `[BTW question]: ${message}`,
673
1065
  { deliverAs: "followUp" },
@@ -677,7 +1069,6 @@ export default function (pi: ExtensionAPI) {
677
1069
 
678
1070
  // ─── Shortcuts ───────────────────────────────────────────────────────────
679
1071
 
680
- // Ctrl+Shift+V = toggle voice (fallback for non-Kitty terminals)
681
1072
  pi.registerShortcut("ctrl+shift+v", {
682
1073
  description: "Toggle voice recording (start/stop)",
683
1074
  handler: async (handlerCtx) => {
@@ -705,12 +1096,42 @@ export default function (pi: ExtensionAPI) {
705
1096
  configSource = loaded.source;
706
1097
  updateSocketPath(config, currentCwd);
707
1098
 
708
- // No auto-popup on startup. Users run `/voice setup` to configure.
709
- // Only activate voice features if setup has been completed previously.
1099
+ // Auto-capture DEEPGRAM_API_KEY from env into config if not already stored.
1100
+ // This ensures streaming works even when Pi is launched from a context
1101
+ // that doesn't source .zshrc (GUI app, tmux, etc.)
1102
+ if (process.env.DEEPGRAM_API_KEY && !config.deepgramApiKey) {
1103
+ config.deepgramApiKey = process.env.DEEPGRAM_API_KEY;
1104
+ if (configSource !== "default") {
1105
+ saveConfig(config, config.scope, currentCwd);
1106
+ }
1107
+ }
1108
+
1109
+ // Also try to load DEEPGRAM_API_KEY from shell if not in process.env and not in config
1110
+ if (!resolveDeepgramApiKey(config) && config.backend === "deepgram") {
1111
+ try {
1112
+ const result = spawnSync("zsh", ["-ic", "echo $DEEPGRAM_API_KEY"], {
1113
+ stdio: ["pipe", "pipe", "pipe"],
1114
+ timeout: 3000,
1115
+ env: { ...process.env, HOME: os.homedir() },
1116
+ });
1117
+ const shellKey = result.stdout?.toString().trim();
1118
+ if (shellKey && shellKey.length > 5) {
1119
+ config.deepgramApiKey = shellKey;
1120
+ process.env.DEEPGRAM_API_KEY = shellKey; // Also set for child processes
1121
+ if (configSource !== "default") {
1122
+ saveConfig(config, config.scope, currentCwd);
1123
+ }
1124
+ }
1125
+ } catch {}
1126
+ }
1127
+
710
1128
  if (config.enabled && config.onboarding.completed) {
711
1129
  updateVoiceStatus();
712
1130
  setupHoldToTalk();
713
- ensureDaemon(config).catch(() => {});
1131
+ // Only start daemon for non-streaming backends
1132
+ if (!isDeepgramStreaming(config)) {
1133
+ ensureDaemon(config).catch(() => {});
1134
+ }
714
1135
  }
715
1136
  });
716
1137
 
@@ -764,8 +1185,11 @@ export default function (pi: ExtensionAPI) {
764
1185
  config.enabled = true;
765
1186
  updateVoiceStatus();
766
1187
  setupHoldToTalk();
767
- ensureDaemon(config).catch(() => {});
768
- cmdCtx.ui.notify("Voice enabled.\n Hold SPACE (empty editor) release to transcribe\n Ctrl+Shift+V → toggle recording on/off\n Auto-stops after 30s", "info");
1188
+ if (!isDeepgramStreaming(config)) {
1189
+ ensureDaemon(config).catch(() => {});
1190
+ }
1191
+ const mode = isDeepgramStreaming(config) ? "Deepgram streaming" : config.backend;
1192
+ cmdCtx.ui.notify(`Voice enabled (${mode}).\n Hold SPACE (empty editor) → release to transcribe\n Ctrl+Shift+V → toggle recording on/off\n Live transcription shown while speaking`, "info");
769
1193
  return;
770
1194
  }
771
1195
 
@@ -779,7 +1203,6 @@ export default function (pi: ExtensionAPI) {
779
1203
  }
780
1204
 
781
1205
  if (sub === "stop") {
782
- // Emergency stop — cancel any active recording
783
1206
  if (voiceState === "recording") {
784
1207
  isHolding = false;
785
1208
  await stopVoiceRecording("editor");
@@ -793,6 +1216,8 @@ export default function (pi: ExtensionAPI) {
793
1216
  if (sub === "test") {
794
1217
  cmdCtx.ui.notify("Testing voice setup...", "info");
795
1218
  const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
1219
+ const dgKey = resolveDeepgramApiKey(config);
1220
+ const streaming = isDeepgramStreaming(config);
796
1221
  const daemonUp = await isDaemonRunning();
797
1222
  const provisioningPlan = buildProvisioningPlan(config, diagnostics);
798
1223
  const selectedBackend = diagnostics.backends.find((backend) => backend.name === config.backend);
@@ -805,6 +1230,8 @@ export default function (pi: ExtensionAPI) {
805
1230
  ` model: ${config.model}`,
806
1231
  ` model status: ${modelReadiness}`,
807
1232
  ` language: ${config.language}`,
1233
+ ` streaming: ${streaming ? "YES (Deepgram WS)" : "NO (batch)"}`,
1234
+ ` DEEPGRAM_API_KEY: ${dgKey ? "set (" + dgKey.slice(0, 8) + "...)" : "NOT SET"}`,
808
1235
  ` onboarding: ${config.onboarding.completed ? "complete" : "incomplete"}`,
809
1236
  ` python3: ${diagnostics.hasPython ? "OK" : "missing"}`,
810
1237
  ` sox/rec: ${diagnostics.hasSox ? "OK" : "missing"}`,
@@ -826,11 +1253,10 @@ export default function (pi: ExtensionAPI) {
826
1253
  }
827
1254
  }
828
1255
 
829
- lines.push("", "Suggested commands:");
830
- lines.push(...(provisioningPlan.commands.length > 0 ? provisioningPlan.commands.map((command) => ` - ${command}`) : [" - none"]));
831
- if (provisioningPlan.manualSteps.length > 0) {
832
- lines.push("", "Manual steps:");
833
- lines.push(...provisioningPlan.manualSteps.map((step) => ` - ${step}`));
1256
+ if (!dgKey && config.backend === "deepgram") {
1257
+ lines.push("");
1258
+ lines.push("⚠️ DEEPGRAM_API_KEY not set! Add to ~/.zshrc or ~/.env.secrets");
1259
+ lines.push(" export DEEPGRAM_API_KEY=your_key_here");
834
1260
  }
835
1261
 
836
1262
  cmdCtx.ui.notify(lines.join("\n"), provisioningPlan.ready ? "info" : "warning");
@@ -847,22 +1273,24 @@ export default function (pi: ExtensionAPI) {
847
1273
  const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
848
1274
  const selectedBackend = diagnostics.backends.find((backend) => backend.name === config.backend);
849
1275
  const modelReadiness = getModelReadiness(selectedBackend, config.model);
1276
+ const streaming = isDeepgramStreaming(config);
850
1277
 
851
1278
  cmdCtx.ui.notify([
852
1279
  `Voice config:`,
853
- ` enabled: ${config.enabled}`,
854
- ` mode: ${config.mode}`,
855
- ` scope: ${config.scope}`,
856
- ` backend: ${config.backend}`,
857
- ` model: ${config.model}`,
858
- ` model status: ${modelReadiness}`,
859
- ` language: ${config.language}`,
860
- ` state: ${voiceState}`,
861
- ` setup: ${config.onboarding.completed ? `complete (${config.onboarding.source ?? "unknown"})` : "incomplete"}`,
862
- ` socket: ${activeSocketPath}`,
863
- ` daemon: ${daemonUp ? "running" : "stopped"}${daemonInfo}`,
864
- ` hold-key: SPACE (editor empty) or Ctrl+Shift+V (toggle)`,
865
- ` btw-key: Ctrl+Shift+B (hold to record → auto-btw)`,
1280
+ ` enabled: ${config.enabled}`,
1281
+ ` mode: ${config.mode}`,
1282
+ ` scope: ${config.scope}`,
1283
+ ` backend: ${config.backend}`,
1284
+ ` model: ${config.model}`,
1285
+ ` model stat: ${modelReadiness}`,
1286
+ ` language: ${config.language}`,
1287
+ ` streaming: ${streaming ? "YES (Deepgram WebSocket)" : "NO (batch)"}`,
1288
+ ` state: ${voiceState}`,
1289
+ ` setup: ${config.onboarding.completed ? `complete (${config.onboarding.source ?? "unknown"})` : "incomplete"}`,
1290
+ ` socket: ${activeSocketPath}`,
1291
+ ` daemon: ${daemonUp ? "running" : "stopped"}${daemonInfo}`,
1292
+ ` hold-key: SPACE (editor empty) or Ctrl+Shift+V (toggle)`,
1293
+ ` btw-key: Ctrl+Shift+B (hold to record → auto-btw)`,
866
1294
  ].join("\n"), "info");
867
1295
  return;
868
1296
  }
@@ -905,7 +1333,6 @@ export default function (pi: ExtensionAPI) {
905
1333
  cmdCtx.ui.notify("Voice setup cancelled.", "warning");
906
1334
  return;
907
1335
  }
908
-
909
1336
  await finalizeAndSaveSetup(cmdCtx, result.config, result.selectedScope, result.summaryLines, "setup-command");
910
1337
  return;
911
1338
  }
@@ -1013,7 +1440,7 @@ export default function (pi: ExtensionAPI) {
1013
1440
  },
1014
1441
  });
1015
1442
 
1016
- // ─── Dedicated setup command (discoverable in /command list) ──────────────
1443
+ // ─── Dedicated setup command ─────────────────────────────────────────────
1017
1444
 
1018
1445
  pi.registerCommand("voice-setup", {
1019
1446
  description: "Configure voice input — select backend, model, and language",
@@ -1081,7 +1508,6 @@ export default function (pi: ExtensionAPI) {
1081
1508
 
1082
1509
  pi.sendUserMessage(content, { deliverAs: "followUp" });
1083
1510
 
1084
- // Clear after injection
1085
1511
  btwThread = [];
1086
1512
  btwWidgetVisible = false;
1087
1513
  cmdCtx.ui.setWidget("btw", undefined);
@@ -1106,7 +1532,6 @@ export default function (pi: ExtensionAPI) {
1106
1532
  threadText += `Q: ${ex.question}\nA: ${ex.answer}\n\n`;
1107
1533
  }
1108
1534
 
1109
- // Ask the model to summarize
1110
1535
  const model = ctx.model;
1111
1536
  if (!model) {
1112
1537
  cmdCtx.ui.notify("No model available for summarization.", "error");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codexstar/pi-listen",
3
- "version": "1.0.12",
3
+ "version": "1.0.13",
4
4
  "description": "Voice input, first-run onboarding, and side-channel BTW conversations for Pi",
5
5
  "type": "module",
6
6
  "keywords": [