@codexstar/pi-listen 1.0.11 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,8 @@ export interface VoiceConfig {
31
31
  scope: VoiceSettingsScope;
32
32
  btwEnabled: boolean;
33
33
  onboarding: VoiceOnboardingState;
34
+ /** Deepgram API key — stored in config so it's available even when env var isn't set */
35
+ deepgramApiKey?: string;
34
36
  }
35
37
 
36
38
  export interface LoadedVoiceConfig {
@@ -60,6 +62,7 @@ export const DEFAULT_CONFIG: VoiceConfig = {
60
62
  model: "small",
61
63
  scope: "global",
62
64
  btwEnabled: true,
65
+ deepgramApiKey: undefined,
63
66
  onboarding: {
64
67
  completed: false,
65
68
  schemaVersion: VOICE_CONFIG_VERSION,
@@ -121,6 +124,7 @@ function migrateConfig(rawVoice: any, source: VoiceConfigSource): VoiceConfig {
121
124
  model: typeof rawVoice.model === "string" ? rawVoice.model : DEFAULT_CONFIG.model,
122
125
  scope: (rawVoice.scope as VoiceSettingsScope | undefined) ?? (source === "project" ? "project" : "global"),
123
126
  btwEnabled: typeof rawVoice.btwEnabled === "boolean" ? rawVoice.btwEnabled : DEFAULT_CONFIG.btwEnabled,
127
+ deepgramApiKey: typeof rawVoice.deepgramApiKey === "string" ? rawVoice.deepgramApiKey : undefined,
124
128
  onboarding: normalizeOnboarding(rawVoice.onboarding, fallbackCompleted),
125
129
  };
126
130
  }
@@ -1,22 +1,27 @@
1
1
  /**
2
- * pi-voice — Voice input + BTW side conversations for Pi CLI.
2
+ * pi-voice — Deepgram WebSocket streaming STT for Pi CLI.
3
3
  *
4
- * Features:
5
- * 1. Hold-spacebar to talk (Kitty protocol key release detection)
6
- * Fallback: Ctrl+Shift+V toggle for non-Kitty terminals
7
- * 2. BTW side conversations (/btw <msg>, /btw:new, /btw:clear, /btw:inject, /btw:summarize)
8
- * 3. Voice BTW glue: Ctrl+Shift+B = hold to record → auto-send as /btw
4
+ * Architecture (modeled after Claude Code's voice pipeline):
5
+ * 1. SoX `rec` captures mic audio as raw PCM (16kHz, mono, 16-bit)
6
+ * and pipes it to stdout (no file).
7
+ * 2. Raw PCM chunks are streamed over a WebSocket to Deepgram Nova 3.
8
+ * 3. Deepgram returns interim + final transcripts in real-time.
9
+ * 4. Interim transcripts update a live widget above the editor.
10
+ * 5. On key-release (or toggle stop), a CloseStream message is sent;
11
+ * final transcript is injected into the editor.
9
12
  *
10
- * Records audio via SoX, transcribes via persistent daemon (daemon.py) or fallback subprocess.
11
- * STT backends: faster-whisper, moonshine, whisper.cpp, deepgram, parakeet.
13
+ * Activation:
14
+ * - Hold SPACE (empty editor) release to finalize
15
+ * - Ctrl+Shift+V → toggle start/stop (fallback for non-Kitty terminals)
16
+ * - Ctrl+Shift+B → hold to record → auto-send as /btw
12
17
  *
13
- * Config in ~/.pi/agent/settings.json or <project>/.pi/settings.json:
18
+ * Config in ~/.pi/agent/settings.json:
14
19
  * {
15
20
  * "voice": {
16
21
  * "enabled": true,
17
22
  * "language": "en",
18
- * "backend": "faster-whisper",
19
- * "model": "small"
23
+ * "backend": "deepgram",
24
+ * "model": "nova-3"
20
25
  * }
21
26
  * }
22
27
  */
@@ -65,6 +70,14 @@ interface BtwExchange {
65
70
  // ─── Constants ───────────────────────────────────────────────────────────────
66
71
 
67
72
  const SAMPLE_RATE = 16000;
73
+ const CHANNELS = 1;
74
+ const ENCODING = "linear16";
75
+ const DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
76
+ const KEEPALIVE_INTERVAL_MS = 8000;
77
+ const FINALIZE_SAFETY_TIMEOUT_MS = 5000;
78
+ const FINALIZE_NO_DATA_TIMEOUT_MS = 1500;
79
+ const MAX_RECORDING_SECS = 120; // 2 minutes safety cap (streaming is efficient)
80
+
68
81
  const EXT_DIR = path.dirname(new URL(import.meta.url).pathname);
69
82
  const PROJECT_ROOT = path.join(EXT_DIR, "..");
70
83
  const DAEMON_SCRIPT = path.join(PROJECT_ROOT, "daemon.py");
@@ -74,7 +87,7 @@ function commandExists(cmd: string): boolean {
74
87
  return spawnSync("which", [cmd], { stdio: "pipe", timeout: 3000 }).status === 0;
75
88
  }
76
89
 
77
- // ─── Daemon Communication ────────────────────────────────────────────────────
90
+ // ─── Daemon Communication (kept for non-deepgram local backends) ─────────────
78
91
 
79
92
  let activeSocketPath = getSocketPath({
80
93
  scope: DEFAULT_CONFIG.scope,
@@ -135,8 +148,6 @@ async function isDaemonRunning(socketPath = activeSocketPath): Promise<boolean>
135
148
  async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
136
149
  if (await isDaemonRunning(activeSocketPath)) {
137
150
  const status = await daemonSend({ cmd: "status" }, 3000, activeSocketPath);
138
- // When backend is 'auto', accept any loaded backend — the daemon already
139
- // resolved 'auto' to a concrete backend, so we don't need to reload.
140
151
  if (config.backend === "auto" || (status.backend === config.backend && status.model === config.model)) return true;
141
152
  const reloaded = await daemonSend({
142
153
  cmd: "load",
@@ -175,7 +186,6 @@ async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
175
186
 
176
187
  proc.on("error", () => resolve(false));
177
188
 
178
- // Timeout: if daemon doesn't start in 10s, kill orphan and fall back
179
189
  setTimeout(() => {
180
190
  if (!started) {
181
191
  try { proc.kill(); } catch {}
@@ -185,46 +195,40 @@ async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
185
195
  });
186
196
  }
187
197
 
188
- // ─── Audio Recording ─────────────────────────────────────────────────────────
198
+ // ─── Legacy file-based transcription (for non-deepgram backends) ─────────────
189
199
 
190
- let recProcess: ChildProcess | null = null;
200
+ let legacyRecProcess: ChildProcess | null = null;
191
201
 
192
- function startRecordingToFile(outPath: string): boolean {
193
- if (recProcess) {
194
- recProcess.kill("SIGTERM");
195
- recProcess = null;
202
+ function startLegacyRecordingToFile(outPath: string): boolean {
203
+ if (legacyRecProcess) {
204
+ legacyRecProcess.kill("SIGTERM");
205
+ legacyRecProcess = null;
196
206
  }
197
-
198
207
  if (!commandExists("rec")) return false;
199
-
200
- recProcess = spawn("rec", [
208
+ legacyRecProcess = spawn("rec", [
201
209
  "-q", "-r", String(SAMPLE_RATE), "-c", "1", "-b", "16", outPath,
202
210
  ], { stdio: ["pipe", "pipe", "pipe"] });
203
-
204
- recProcess.stderr?.on("data", () => {});
205
- recProcess.on("error", () => { recProcess = null; });
211
+ legacyRecProcess.stderr?.on("data", () => {});
212
+ legacyRecProcess.on("error", () => { legacyRecProcess = null; });
206
213
  return true;
207
214
  }
208
215
 
209
- function stopRecording(): Promise<void> {
216
+ function stopLegacyRecording(): Promise<void> {
210
217
  return new Promise((resolve) => {
211
- if (!recProcess) { resolve(); return; }
212
- recProcess.on("close", () => { recProcess = null; resolve(); });
213
- recProcess.kill("SIGTERM");
218
+ if (!legacyRecProcess) { resolve(); return; }
219
+ legacyRecProcess.on("close", () => { legacyRecProcess = null; resolve(); });
220
+ legacyRecProcess.kill("SIGTERM");
214
221
  setTimeout(() => {
215
- if (recProcess) { recProcess.kill("SIGKILL"); recProcess = null; }
222
+ if (legacyRecProcess) { legacyRecProcess.kill("SIGKILL"); legacyRecProcess = null; }
216
223
  resolve();
217
224
  }, 2000);
218
225
  });
219
226
  }
220
227
 
221
- // ─── Transcription (daemon or fallback) ──────────────────────────────────────
222
-
223
- async function transcribeAudio(
228
+ async function transcribeAudioFile(
224
229
  audioPath: string,
225
230
  config: VoiceConfig,
226
231
  ): Promise<{ text: string; duration: number; error?: string }> {
227
- // Try daemon first
228
232
  if (await isDaemonRunning()) {
229
233
  const resp = await daemonSend({
230
234
  cmd: "transcribe",
@@ -238,13 +242,10 @@ async function transcribeAudio(
238
242
  return resp as { text: string; duration: number };
239
243
  }
240
244
  }
241
-
242
- // Fallback: direct subprocess
243
245
  return new Promise((resolve) => {
244
246
  const args = [TRANSCRIBE_SCRIPT, "--language", config.language, audioPath];
245
247
  if (config.backend !== "auto") args.splice(1, 0, "--backend", config.backend);
246
248
  if (config.model) args.splice(1, 0, "--model", config.model);
247
-
248
249
  const proc = spawn("python3", args, { stdio: ["pipe", "pipe", "pipe"] });
249
250
  let stdout = "";
250
251
  let stderr = "";
@@ -258,6 +259,250 @@ async function transcribeAudio(
258
259
  });
259
260
  }
260
261
 
262
+ // ─── Deepgram WebSocket Streaming ────────────────────────────────────────────
263
+
264
+ interface StreamingSession {
265
+ ws: WebSocket;
266
+ recProcess: ChildProcess;
267
+ interimText: string; // Current interim (partial) transcript
268
+ finalizedParts: string[]; // All finalized transcript segments
269
+ keepAliveTimer: ReturnType<typeof setInterval> | null;
270
+ closed: boolean;
271
+ onTranscript: (interim: string, finals: string[]) => void;
272
+ onDone: (fullText: string) => void;
273
+ onError: (err: string) => void;
274
+ }
275
+
276
+ function getDeepgramApiKey(): string | null {
277
+ // Priority: env var → config file → null
278
+ return process.env.DEEPGRAM_API_KEY || null;
279
+ }
280
+
281
+ /**
282
+ * Resolve the Deepgram API key from all sources:
283
+ * 1. process.env.DEEPGRAM_API_KEY (shell)
284
+ * 2. config.deepgramApiKey (settings.json, persisted at setup time)
285
+ */
286
+ function resolveDeepgramApiKey(config: VoiceConfig): string | null {
287
+ return process.env.DEEPGRAM_API_KEY || config.deepgramApiKey || null;
288
+ }
289
+
290
+ function isDeepgramStreaming(config: VoiceConfig): boolean {
291
+ const key = resolveDeepgramApiKey(config);
292
+ if (!key) return false;
293
+ // Use streaming for deepgram backend, or auto mode when deepgram key is available
294
+ return config.backend === "deepgram" || (config.backend === "auto" && !!key);
295
+ }
296
+
297
+ function buildDeepgramWsUrl(config: VoiceConfig): string {
298
+ const params = new URLSearchParams({
299
+ encoding: ENCODING,
300
+ sample_rate: String(SAMPLE_RATE),
301
+ channels: String(CHANNELS),
302
+ endpointing: "300", // ms of silence before phrase boundary
303
+ utterance_end_ms: "1000", // ms of silence before utterance is complete
304
+ language: config.language || "en",
305
+ model: config.model || "nova-3",
306
+ smart_format: "true",
307
+ interim_results: "true",
308
+ });
309
+ return `${DEEPGRAM_WS_URL}?${params.toString()}`;
310
+ }
311
+
312
+ function startStreamingSession(
313
+ config: VoiceConfig,
314
+ callbacks: {
315
+ onTranscript: (interim: string, finals: string[]) => void;
316
+ onDone: (fullText: string) => void;
317
+ onError: (err: string) => void;
318
+ },
319
+ ): StreamingSession | null {
320
+ const apiKey = resolveDeepgramApiKey(config);
321
+ if (!apiKey) {
322
+ callbacks.onError("DEEPGRAM_API_KEY not set");
323
+ return null;
324
+ }
325
+
326
+ if (!commandExists("rec")) {
327
+ callbacks.onError("Voice requires SoX. Install: brew install sox");
328
+ return null;
329
+ }
330
+
331
+ // Start SoX streaming raw PCM to stdout (no file)
332
+ const recProc = spawn("rec", [
333
+ "-q",
334
+ "-r", String(SAMPLE_RATE),
335
+ "-c", String(CHANNELS),
336
+ "-b", "16",
337
+ "-e", "signed-integer",
338
+ "-t", "raw",
339
+ "-", // output to stdout
340
+ ], { stdio: ["pipe", "pipe", "pipe"] });
341
+
342
+ recProc.stderr?.on("data", () => {}); // suppress SoX warnings
343
+
344
+ // Connect WebSocket to Deepgram
345
+ const wsUrl = buildDeepgramWsUrl(config);
346
+ const ws = new WebSocket(wsUrl, {
347
+ headers: {
348
+ "Authorization": `Token ${apiKey}`,
349
+ },
350
+ } as any);
351
+
352
+ const session: StreamingSession = {
353
+ ws,
354
+ recProcess: recProc,
355
+ interimText: "",
356
+ finalizedParts: [],
357
+ keepAliveTimer: null,
358
+ closed: false,
359
+ onTranscript: callbacks.onTranscript,
360
+ onDone: callbacks.onDone,
361
+ onError: callbacks.onError,
362
+ };
363
+
364
+ ws.onopen = () => {
365
+ // Send initial KeepAlive
366
+ try { ws.send(JSON.stringify({ type: "KeepAlive" })); } catch {}
367
+
368
+ // Start keepalive timer
369
+ session.keepAliveTimer = setInterval(() => {
370
+ if (ws.readyState === WebSocket.OPEN) {
371
+ try { ws.send(JSON.stringify({ type: "KeepAlive" })); } catch {}
372
+ }
373
+ }, KEEPALIVE_INTERVAL_MS);
374
+
375
+ // Pipe SoX stdout → WebSocket as binary frames
376
+ recProc.stdout?.on("data", (chunk: Buffer) => {
377
+ if (ws.readyState === WebSocket.OPEN) {
378
+ try { ws.send(chunk); } catch {}
379
+ }
380
+ });
381
+ };
382
+
383
+ ws.onmessage = (event: MessageEvent) => {
384
+ try {
385
+ const msg = typeof event.data === "string" ? JSON.parse(event.data) : null;
386
+ if (!msg) return;
387
+
388
+ if (msg.type === "Results") {
389
+ const alt = msg.channel?.alternatives?.[0];
390
+ const transcript = alt?.transcript || "";
391
+
392
+ if (msg.is_final) {
393
+ // Final result for this audio segment
394
+ if (transcript.trim()) {
395
+ session.finalizedParts.push(transcript.trim());
396
+ }
397
+ session.interimText = "";
398
+ } else {
399
+ // Interim result — live update
400
+ session.interimText = transcript;
401
+ }
402
+
403
+ session.onTranscript(session.interimText, session.finalizedParts);
404
+
405
+ // If speech_final is true, it's the end of an utterance
406
+ // (similar to TranscriptEndpoint in Claude Code's protocol)
407
+ if (msg.speech_final && transcript.trim()) {
408
+ // Already added to finalizedParts above when is_final was true
409
+ }
410
+ } else if (msg.type === "Metadata") {
411
+ // Connection metadata — ignore
412
+ } else if (msg.type === "UtteranceEnd") {
413
+ // Utterance boundary — Deepgram detected end of speech
414
+ // Nothing extra needed, is_final already handles finalization
415
+ } else if (msg.type === "Error" || msg.type === "error") {
416
+ session.onError(msg.message || msg.description || "Deepgram error");
417
+ }
418
+ } catch (e: any) {
419
+ // Ignore parse errors for binary data
420
+ }
421
+ };
422
+
423
+ ws.onerror = (event: Event) => {
424
+ if (!session.closed) {
425
+ session.onError("WebSocket connection error");
426
+ }
427
+ };
428
+
429
+ ws.onclose = () => {
430
+ if (!session.closed) {
431
+ finalizeSession(session);
432
+ }
433
+ };
434
+
435
+ recProc.on("error", (err) => {
436
+ session.onError(`SoX error: ${err.message}`);
437
+ });
438
+
439
+ recProc.on("close", () => {
440
+ // SoX stopped — send CloseStream to Deepgram
441
+ if (ws.readyState === WebSocket.OPEN) {
442
+ try { ws.send(JSON.stringify({ type: "CloseStream" })); } catch {}
443
+ }
444
+ });
445
+
446
+ return session;
447
+ }
448
+
449
+ function stopStreamingSession(session: StreamingSession): void {
450
+ if (session.closed) return;
451
+
452
+ // Stop the microphone
453
+ try { session.recProcess.kill("SIGTERM"); } catch {}
454
+
455
+ // CloseStream tells Deepgram to flush remaining audio
456
+ if (session.ws.readyState === WebSocket.OPEN) {
457
+ try { session.ws.send(JSON.stringify({ type: "CloseStream" })); } catch {}
458
+ }
459
+
460
+ // Safety: finalize after timeout even if Deepgram doesn't respond
461
+ setTimeout(() => {
462
+ if (!session.closed) {
463
+ finalizeSession(session);
464
+ }
465
+ }, FINALIZE_SAFETY_TIMEOUT_MS);
466
+
467
+ // Shorter timeout: if no new data arrives for 1.5s, assume done
468
+ let lastDataTime = Date.now();
469
+ const origOnMessage = session.ws.onmessage;
470
+ session.ws.onmessage = (event: MessageEvent) => {
471
+ lastDataTime = Date.now();
472
+ if (origOnMessage) origOnMessage.call(session.ws, event);
473
+ };
474
+
475
+ const noDataCheck = setInterval(() => {
476
+ if (Date.now() - lastDataTime > FINALIZE_NO_DATA_TIMEOUT_MS) {
477
+ clearInterval(noDataCheck);
478
+ if (!session.closed) {
479
+ finalizeSession(session);
480
+ }
481
+ }
482
+ }, 500);
483
+ }
484
+
485
+ function finalizeSession(session: StreamingSession): void {
486
+ if (session.closed) return;
487
+ session.closed = true;
488
+
489
+ // Clean up keepalive
490
+ if (session.keepAliveTimer) {
491
+ clearInterval(session.keepAliveTimer);
492
+ session.keepAliveTimer = null;
493
+ }
494
+
495
+ // Close WebSocket
496
+ try { session.ws.close(); } catch {}
497
+
498
+ // Kill SoX if still running
499
+ try { session.recProcess.kill("SIGKILL"); } catch {}
500
+
501
+ // Deliver final transcript
502
+ const fullText = session.finalizedParts.join(" ").trim();
503
+ session.onDone(fullText);
504
+ }
505
+
261
506
  // ─── Extension ───────────────────────────────────────────────────────────────
262
507
 
263
508
  export default function (pi: ExtensionAPI) {
@@ -272,6 +517,10 @@ export default function (pi: ExtensionAPI) {
272
517
  let terminalInputUnsub: (() => void) | null = null;
273
518
  let isHolding = false;
274
519
 
520
+ // Streaming session state
521
+ let activeSession: StreamingSession | null = null;
522
+ let currentTarget: "editor" | "btw" = "editor";
523
+
275
524
  // ─── BTW State ───────────────────────────────────────────────────────────
276
525
 
277
526
  let btwThread: BtwExchange[] = [];
@@ -289,17 +538,19 @@ export default function (pi: ExtensionAPI) {
289
538
  }
290
539
  const modeTag = !config.onboarding.completed
291
540
  ? "SETUP"
292
- : config.mode === "api"
293
- ? "API"
294
- : config.mode === "local"
295
- ? "LOCAL"
296
- : "AUTO";
541
+ : isDeepgramStreaming(config)
542
+ ? "STREAM"
543
+ : config.mode === "api"
544
+ ? "API"
545
+ : config.mode === "local"
546
+ ? "LOCAL"
547
+ : "AUTO";
297
548
  ctx.ui.setStatus("voice", `MIC ${modeTag}`);
298
549
  break;
299
550
  }
300
551
  case "recording": {
301
552
  const secs = Math.round((Date.now() - recordingStart) / 1000);
302
- ctx.ui.setStatus("voice", `REC ${secs}s`);
553
+ ctx.ui.setStatus("voice", `🔴 REC ${secs}s`);
303
554
  break;
304
555
  }
305
556
  case "transcribing":
@@ -315,7 +566,11 @@ export default function (pi: ExtensionAPI) {
315
566
 
316
567
  function voiceCleanup() {
317
568
  if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
318
- if (recProcess) { recProcess.kill("SIGTERM"); recProcess = null; }
569
+ if (activeSession) {
570
+ finalizeSession(activeSession);
571
+ activeSession = null;
572
+ }
573
+ if (legacyRecProcess) { legacyRecProcess.kill("SIGTERM"); legacyRecProcess = null; }
319
574
  if (tempFile) { try { fs.unlinkSync(tempFile); } catch {} tempFile = null; }
320
575
  isHolding = false;
321
576
  setVoiceState("idle");
@@ -332,7 +587,7 @@ export default function (pi: ExtensionAPI) {
332
587
  const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
333
588
  const provisioningPlan = buildProvisioningPlan(nextConfig, diagnostics);
334
589
  let validated = provisioningPlan.ready;
335
- if (validated && nextConfig.enabled) {
590
+ if (validated && nextConfig.enabled && !isDeepgramStreaming(nextConfig)) {
336
591
  validated = await ensureDaemon(nextConfig);
337
592
  }
338
593
 
@@ -349,43 +604,173 @@ export default function (pi: ExtensionAPI) {
349
604
  ].join("\n"), validated ? "info" : "warning");
350
605
  }
351
606
 
352
- // ─── Voice: Start / Stop / Transcribe ────────────────────────────────────
607
+ // ─── Live Transcript Widget ──────────────────────────────────────────────
353
608
 
354
- async function startVoiceRecording(target: "editor" | "btw" = "editor"): Promise<boolean> {
355
- if (voiceState !== "idle" || !ctx) return false;
609
+ function updateLiveTranscriptWidget(interim: string, finals: string[]) {
610
+ if (!ctx?.hasUI) return;
611
+
612
+ const finalized = finals.join(" ");
613
+ const displayText = finalized + (interim ? (finalized ? " " : "") + interim : "");
356
614
 
357
- tempFile = path.join(os.tmpdir(), `pi-voice-${Date.now()}.wav`);
358
- if (!startRecordingToFile(tempFile)) {
359
- ctx.ui.notify("Voice requires SoX. Install: brew install sox", "error");
360
- return false;
615
+ if (!displayText.trim()) {
616
+ ctx.ui.setWidget("voice-recording", [
617
+ " 🎙 Listening... (speak now)",
618
+ ], { placement: "aboveEditor" });
619
+ return;
620
+ }
621
+
622
+ // Show the live transcript — last 3 lines max
623
+ const words = displayText.split(" ");
624
+ const lines: string[] = [];
625
+ let currentLine = " 🎙 ";
626
+ const maxLineLen = 70;
627
+
628
+ for (const word of words) {
629
+ if ((currentLine + word).length > maxLineLen) {
630
+ lines.push(currentLine);
631
+ currentLine = " " + word + " ";
632
+ } else {
633
+ currentLine += word + " ";
634
+ }
635
+ }
636
+ if (currentLine.trim()) lines.push(currentLine);
637
+
638
+ // Keep only last 4 lines to avoid widget overflow
639
+ const visibleLines = lines.slice(-4);
640
+ if (interim) {
641
+ // Show a blinking cursor for interim text
642
+ const lastIdx = visibleLines.length - 1;
643
+ visibleLines[lastIdx] = visibleLines[lastIdx].trimEnd() + "▍";
361
644
  }
362
645
 
646
+ ctx.ui.setWidget("voice-recording", visibleLines, { placement: "aboveEditor" });
647
+ }
648
+
649
+ // ─── Voice: Start / Stop (Streaming or Legacy) ───────────────────────────
650
+
651
+ async function startVoiceRecording(target: "editor" | "btw" = "editor"): Promise<boolean> {
652
+ if (voiceState !== "idle" || !ctx) return false;
653
+
654
+ currentTarget = target;
363
655
  recordingStart = Date.now();
364
- setVoiceState("recording");
365
- statusTimer = setInterval(() => {
366
- if (voiceState === "recording") updateVoiceStatus();
367
- }, 1000);
368
656
 
369
- if (ctx.hasUI) {
370
- ctx.ui.setWidget("voice-recording", [
371
- target === "btw"
372
- ? " BTW Recording... release to send"
373
- : " Recording... release to transcribe",
374
- ], { placement: "aboveEditor" });
657
+ if (isDeepgramStreaming(config)) {
658
+ // === STREAMING PATH === (Deepgram WebSocket)
659
+ setVoiceState("recording");
660
+
661
+ const session = startStreamingSession(config, {
662
+ onTranscript: (interim, finals) => {
663
+ updateLiveTranscriptWidget(interim, finals);
664
+ updateVoiceStatus();
665
+ },
666
+ onDone: (fullText) => {
667
+ activeSession = null;
668
+ ctx?.ui.setWidget("voice-recording", undefined);
669
+
670
+ if (!fullText.trim()) {
671
+ ctx?.ui.notify("No speech detected.", "warning");
672
+ setVoiceState("idle");
673
+ return;
674
+ }
675
+
676
+ if (target === "btw") {
677
+ handleBtw(fullText);
678
+ } else {
679
+ if (ctx?.hasUI) {
680
+ const existing = ctx.ui.getEditorText();
681
+ ctx.ui.setEditorText(existing ? existing + " " + fullText : fullText);
682
+ const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
683
+ ctx.ui.notify(
684
+ `STT (${elapsed}s): ${fullText.slice(0, 80)}${fullText.length > 80 ? "..." : ""}`,
685
+ "info",
686
+ );
687
+ }
688
+ }
689
+ setVoiceState("idle");
690
+ },
691
+ onError: (err) => {
692
+ activeSession = null;
693
+ ctx?.ui.setWidget("voice-recording", undefined);
694
+ ctx?.ui.notify(`STT error: ${err}`, "error");
695
+ setVoiceState("idle");
696
+ },
697
+ });
698
+
699
+ if (!session) {
700
+ setVoiceState("idle");
701
+ return false;
702
+ }
703
+
704
+ activeSession = session;
705
+
706
+ // Status timer for elapsed time
707
+ statusTimer = setInterval(() => {
708
+ if (voiceState === "recording") {
709
+ updateVoiceStatus();
710
+ const elapsed = (Date.now() - recordingStart) / 1000;
711
+ if (elapsed >= MAX_RECORDING_SECS) {
712
+ isHolding = false;
713
+ stopVoiceRecording(target);
714
+ }
715
+ }
716
+ }, 1000);
717
+
718
+ if (ctx.hasUI) {
719
+ ctx.ui.setWidget("voice-recording", [
720
+ " 🎙 Listening... speak now — press SPACE again to stop",
721
+ ], { placement: "aboveEditor" });
722
+ }
723
+ return true;
724
+
725
+ } else {
726
+ // === LEGACY PATH === (file-based for local backends)
727
+ tempFile = path.join(os.tmpdir(), `pi-voice-${Date.now()}.wav`);
728
+ if (!startLegacyRecordingToFile(tempFile)) {
729
+ ctx.ui.notify("Voice requires SoX. Install: brew install sox", "error");
730
+ return false;
731
+ }
732
+
733
+ setVoiceState("recording");
734
+ statusTimer = setInterval(() => {
735
+ if (voiceState === "recording") {
736
+ updateVoiceStatus();
737
+ const elapsed = (Date.now() - recordingStart) / 1000;
738
+ if (elapsed >= MAX_RECORDING_SECS) {
739
+ isHolding = false;
740
+ stopVoiceRecording(target);
741
+ }
742
+ }
743
+ }, 1000);
744
+
745
+ if (ctx.hasUI) {
746
+ ctx.ui.setWidget("voice-recording", [
747
+ target === "btw"
748
+ ? " 🎙 BTW Recording... Ctrl+Shift+V to stop"
749
+ : " 🎙 Recording... Ctrl+Shift+V to stop (or release SPACE)",
750
+ ], { placement: "aboveEditor" });
751
+ }
752
+ return true;
375
753
  }
376
- return true;
377
754
  }
378
755
 
379
756
  async function stopVoiceRecording(target: "editor" | "btw" = "editor") {
380
757
  if (voiceState !== "recording" || !ctx) return;
381
758
  if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
382
759
 
760
+ if (activeSession) {
761
+ // === STREAMING PATH === Stop the stream, finalize will call onDone
762
+ setVoiceState("transcribing");
763
+ stopStreamingSession(activeSession);
764
+ return;
765
+ }
766
+
767
+ // === LEGACY PATH ===
383
768
  const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
384
- const audioFile = tempFile; // capture before cleanup can null it
769
+ const audioFile = tempFile;
385
770
  setVoiceState("transcribing");
386
771
  ctx.ui.setWidget("voice-recording", undefined);
387
772
 
388
- await stopRecording();
773
+ await stopLegacyRecording();
389
774
 
390
775
  if (!audioFile || !fs.existsSync(audioFile)) {
391
776
  ctx.ui.notify("No audio recorded.", "warning");
@@ -402,12 +787,9 @@ export default function (pi: ExtensionAPI) {
402
787
  return;
403
788
  }
404
789
 
405
- // Ensure daemon is up before transcribing — await so the warm path
406
- // is available for this request instead of falling through to the
407
- // cold subprocess fallback.
408
790
  await ensureDaemon(config).catch(() => {});
409
791
 
410
- const result = await transcribeAudio(audioFile, config);
792
+ const result = await transcribeAudioFile(audioFile, config);
411
793
  try { fs.unlinkSync(audioFile); } catch {}
412
794
  if (tempFile === audioFile) tempFile = null;
413
795
 
@@ -427,7 +809,6 @@ export default function (pi: ExtensionAPI) {
427
809
  if (target === "btw") {
428
810
  await handleBtw(transcript);
429
811
  } else {
430
- // Inject into editor
431
812
  if (ctx.hasUI) {
432
813
  const existing = ctx.ui.getEditorText();
433
814
  ctx.ui.setEditorText(existing ? existing + " " + transcript : transcript);
@@ -441,25 +822,38 @@ export default function (pi: ExtensionAPI) {
441
822
  setVoiceState("idle");
442
823
  }
443
824
 
444
- // ─── Hold-to-talk via Kitty protocol ─────────────────────────────────────
825
+ // ─── Hold-to-talk / Toggle-to-talk ──────────────────────────────────────
826
+ //
827
+ // Kitty protocol terminals (Ghostty, WezTerm, Kitty) send key-release
828
+ // events (":3u" sequences), enabling true hold-to-talk.
829
+ //
830
+ // Non-Kitty terminals (Apple Terminal, iTerm2 without config, basic xterm)
831
+ // only send key-press. We detect this and fall back to toggle:
832
+ // 1st SPACE press → start recording
833
+ // 2nd SPACE press → stop recording + transcribe
834
+ //
835
+ // We auto-detect Kitty support: if we see a key-release within the first
836
+ // recording, we know hold-to-talk works. Otherwise, we stay in toggle mode.
837
+
838
+ let kittyReleaseDetected = false; // have we ever seen a Kitty release event?
445
839
 
446
840
  function setupHoldToTalk() {
447
841
  if (!ctx?.hasUI) return;
448
842
 
449
- // Remove previous listener
450
843
  if (terminalInputUnsub) { terminalInputUnsub(); terminalInputUnsub = null; }
451
844
 
452
845
  terminalInputUnsub = ctx.ui.onTerminalInput((data: string) => {
453
846
  if (!config.enabled) return undefined;
454
847
 
455
- // Hold SPACE talk → release → transcribe to editor
848
+ // ── SPACE handling ──
456
849
  if (matchesKey(data, "space")) {
457
- // Only activate when editor is empty (avoid conflicting with typing)
458
850
  const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
459
851
  if (editorText && editorText.trim().length > 0) return undefined;
460
852
 
853
+ // Kitty key-release: stop recording
461
854
  if (isKeyRelease(data)) {
462
- if (isHolding) {
855
+ kittyReleaseDetected = true;
856
+ if (isHolding && voiceState === "recording") {
463
857
  isHolding = false;
464
858
  stopVoiceRecording("editor");
465
859
  return { consume: true };
@@ -467,12 +861,27 @@ export default function (pi: ExtensionAPI) {
467
861
  return undefined;
468
862
  }
469
863
 
864
+ // Kitty key-repeat: suppress while holding
470
865
  if (isKeyRepeat(data)) {
471
866
  if (isHolding) return { consume: true };
472
867
  return undefined;
473
868
  }
474
869
 
475
- // Key press — start recording
870
+ // === Key PRESS ===
871
+
872
+ // Currently recording? → this is the "stop" press (toggle mode)
873
+ if (voiceState === "recording") {
874
+ isHolding = false;
875
+ stopVoiceRecording("editor");
876
+ return { consume: true };
877
+ }
878
+
879
+ // Currently transcribing? → ignore, wait for it to finish
880
+ if (voiceState === "transcribing") {
881
+ return { consume: true };
882
+ }
883
+
884
+ // Idle → start recording
476
885
  if (voiceState === "idle" && !isHolding) {
477
886
  isHolding = true;
478
887
  startVoiceRecording("editor").then((ok) => {
@@ -485,10 +894,11 @@ export default function (pi: ExtensionAPI) {
485
894
  return undefined;
486
895
  }
487
896
 
488
- // Hold Ctrl+Shift+B talk release → auto-btw
897
+ // ── Ctrl+Shift+B handling (BTW voice) ──
489
898
  if (matchesKey(data, "ctrl+shift+b")) {
490
899
  if (isKeyRelease(data)) {
491
- if (isHolding) {
900
+ kittyReleaseDetected = true;
901
+ if (isHolding && voiceState === "recording") {
492
902
  isHolding = false;
493
903
  stopVoiceRecording("btw");
494
904
  return { consume: true };
@@ -501,6 +911,13 @@ export default function (pi: ExtensionAPI) {
501
911
  return undefined;
502
912
  }
503
913
 
914
+ // Toggle: stop if recording
915
+ if (voiceState === "recording") {
916
+ isHolding = false;
917
+ stopVoiceRecording("btw");
918
+ return { consume: true };
919
+ }
920
+
504
921
  if (voiceState === "idle" && !isHolding) {
505
922
  isHolding = true;
506
923
  startVoiceRecording("btw").then((ok) => {
@@ -513,12 +930,6 @@ export default function (pi: ExtensionAPI) {
513
930
  return undefined;
514
931
  }
515
932
 
516
- // Any other key while holding = cancel
517
- if (isHolding && voiceState === "recording") {
518
- // Don't cancel on modifier-only events
519
- return undefined;
520
- }
521
-
522
933
  return undefined;
523
934
  });
524
935
  }
@@ -526,7 +937,6 @@ export default function (pi: ExtensionAPI) {
526
937
  // ─── BTW: Side Conversations ─────────────────────────────────────────────
527
938
 
528
939
  function buildBtwContext(): string {
529
- // Build context from main session + btw thread
530
940
  const systemPrompt = ctx?.getSystemPrompt() ?? "";
531
941
  let btwContext = "You are a helpful side-channel assistant. ";
532
942
  btwContext += "The user is having a parallel conversation while their main Pi agent works. ";
@@ -560,7 +970,6 @@ export default function (pi: ExtensionAPI) {
560
970
  "",
561
971
  ];
562
972
 
563
- // Show last exchange
564
973
  lines.push(` Q: ${last.question.slice(0, 100)}${last.question.length > 100 ? "..." : ""}`);
565
974
  const answerLines = last.answer.split("\n");
566
975
  for (const line of answerLines.slice(0, 8)) {
@@ -579,7 +988,6 @@ export default function (pi: ExtensionAPI) {
579
988
 
580
989
  btwWidgetVisible = true;
581
990
 
582
- // Show thinking state
583
991
  ctx.ui.setWidget("btw", [
584
992
  " BTW",
585
993
  "",
@@ -588,10 +996,8 @@ export default function (pi: ExtensionAPI) {
588
996
  " Thinking...",
589
997
  ], { placement: "aboveEditor" });
590
998
 
591
- // Build context for LLM
592
999
  const btwContext = buildBtwContext();
593
1000
 
594
- // Use the model registry to get current model
595
1001
  const model = ctx.model;
596
1002
  if (!model) {
597
1003
  const exchange: BtwExchange = {
@@ -606,7 +1012,6 @@ export default function (pi: ExtensionAPI) {
606
1012
  }
607
1013
 
608
1014
  try {
609
- // Stream the response
610
1015
  let answer = "";
611
1016
  const eventStream = streamSimple(model, {
612
1017
  systemPrompt: btwContext,
@@ -623,7 +1028,6 @@ export default function (pi: ExtensionAPI) {
623
1028
  break;
624
1029
  }
625
1030
 
626
- // Update widget with streaming response
627
1031
  const displayLines: string[] = [
628
1032
  ` BTW`,
629
1033
  "",
@@ -647,7 +1051,6 @@ export default function (pi: ExtensionAPI) {
647
1051
  pi.appendEntry("btw", exchange);
648
1052
  updateBtwWidget();
649
1053
  } catch (err: any) {
650
- // Fallback: send as a follow-up message to the main agent
651
1054
  const exchange: BtwExchange = {
652
1055
  question: message,
653
1056
  answer: `(BTW streaming failed: ${err.message}. Falling back to sendUserMessage.)`,
@@ -657,7 +1060,6 @@ export default function (pi: ExtensionAPI) {
657
1060
  pi.appendEntry("btw", exchange);
658
1061
  updateBtwWidget();
659
1062
 
660
- // Use sendUserMessage as alternative
661
1063
  pi.sendUserMessage(
662
1064
  `[BTW question]: ${message}`,
663
1065
  { deliverAs: "followUp" },
@@ -667,7 +1069,6 @@ export default function (pi: ExtensionAPI) {
667
1069
 
668
1070
  // ─── Shortcuts ───────────────────────────────────────────────────────────
669
1071
 
670
- // Ctrl+Shift+V = toggle voice (fallback for non-Kitty terminals)
671
1072
  pi.registerShortcut("ctrl+shift+v", {
672
1073
  description: "Toggle voice recording (start/stop)",
673
1074
  handler: async (handlerCtx) => {
@@ -679,6 +1080,7 @@ export default function (pi: ExtensionAPI) {
679
1080
  if (voiceState === "idle") {
680
1081
  await startVoiceRecording("editor");
681
1082
  } else if (voiceState === "recording") {
1083
+ isHolding = false;
682
1084
  await stopVoiceRecording("editor");
683
1085
  }
684
1086
  },
@@ -694,12 +1096,42 @@ export default function (pi: ExtensionAPI) {
694
1096
  configSource = loaded.source;
695
1097
  updateSocketPath(config, currentCwd);
696
1098
 
697
- // No auto-popup on startup. Users run `/voice setup` to configure.
698
- // Only activate voice features if setup has been completed previously.
1099
+ // Auto-capture DEEPGRAM_API_KEY from env into config if not already stored.
1100
+ // This ensures streaming works even when Pi is launched from a context
1101
+ // that doesn't source .zshrc (GUI app, tmux, etc.)
1102
+ if (process.env.DEEPGRAM_API_KEY && !config.deepgramApiKey) {
1103
+ config.deepgramApiKey = process.env.DEEPGRAM_API_KEY;
1104
+ if (configSource !== "default") {
1105
+ saveConfig(config, config.scope, currentCwd);
1106
+ }
1107
+ }
1108
+
1109
+ // Also try to load DEEPGRAM_API_KEY from shell if not in process.env and not in config
1110
+ if (!resolveDeepgramApiKey(config) && config.backend === "deepgram") {
1111
+ try {
1112
+ const result = spawnSync("zsh", ["-ic", "echo $DEEPGRAM_API_KEY"], {
1113
+ stdio: ["pipe", "pipe", "pipe"],
1114
+ timeout: 3000,
1115
+ env: { ...process.env, HOME: os.homedir() },
1116
+ });
1117
+ const shellKey = result.stdout?.toString().trim();
1118
+ if (shellKey && shellKey.length > 5) {
1119
+ config.deepgramApiKey = shellKey;
1120
+ process.env.DEEPGRAM_API_KEY = shellKey; // Also set for child processes
1121
+ if (configSource !== "default") {
1122
+ saveConfig(config, config.scope, currentCwd);
1123
+ }
1124
+ }
1125
+ } catch {}
1126
+ }
1127
+
699
1128
  if (config.enabled && config.onboarding.completed) {
700
1129
  updateVoiceStatus();
701
1130
  setupHoldToTalk();
702
- ensureDaemon(config).catch(() => {});
1131
+ // Only start daemon for non-streaming backends
1132
+ if (!isDeepgramStreaming(config)) {
1133
+ ensureDaemon(config).catch(() => {});
1134
+ }
703
1135
  }
704
1136
  });
705
1137
 
@@ -744,7 +1176,7 @@ export default function (pi: ExtensionAPI) {
744
1176
  // ─── /voice command ──────────────────────────────────────────────────────
745
1177
 
746
1178
  pi.registerCommand("voice", {
747
- description: "Voice input: /voice [on|off|test|info|setup|reconfigure|doctor|backends|daemon]",
1179
+ description: "Voice input: /voice [on|off|stop|test|info|setup|reconfigure|doctor|backends|daemon]",
748
1180
  handler: async (args, cmdCtx) => {
749
1181
  ctx = cmdCtx;
750
1182
  const sub = (args || "").trim().toLowerCase();
@@ -753,8 +1185,11 @@ export default function (pi: ExtensionAPI) {
753
1185
  config.enabled = true;
754
1186
  updateVoiceStatus();
755
1187
  setupHoldToTalk();
756
- ensureDaemon(config).catch(() => {});
757
- cmdCtx.ui.notify("Voice enabled. Hold SPACE (empty editor) to record.", "info");
1188
+ if (!isDeepgramStreaming(config)) {
1189
+ ensureDaemon(config).catch(() => {});
1190
+ }
1191
+ const mode = isDeepgramStreaming(config) ? "Deepgram streaming" : config.backend;
1192
+ cmdCtx.ui.notify(`Voice enabled (${mode}).\n Hold SPACE (empty editor) → release to transcribe\n Ctrl+Shift+V → toggle recording on/off\n Live transcription shown while speaking`, "info");
758
1193
  return;
759
1194
  }
760
1195
 
@@ -767,9 +1202,22 @@ export default function (pi: ExtensionAPI) {
767
1202
  return;
768
1203
  }
769
1204
 
1205
+ if (sub === "stop") {
1206
+ if (voiceState === "recording") {
1207
+ isHolding = false;
1208
+ await stopVoiceRecording("editor");
1209
+ cmdCtx.ui.notify("Recording stopped and transcribed.", "info");
1210
+ } else {
1211
+ cmdCtx.ui.notify("No recording in progress.", "info");
1212
+ }
1213
+ return;
1214
+ }
1215
+
770
1216
  if (sub === "test") {
771
1217
  cmdCtx.ui.notify("Testing voice setup...", "info");
772
1218
  const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
1219
+ const dgKey = resolveDeepgramApiKey(config);
1220
+ const streaming = isDeepgramStreaming(config);
773
1221
  const daemonUp = await isDaemonRunning();
774
1222
  const provisioningPlan = buildProvisioningPlan(config, diagnostics);
775
1223
  const selectedBackend = diagnostics.backends.find((backend) => backend.name === config.backend);
@@ -782,6 +1230,8 @@ export default function (pi: ExtensionAPI) {
782
1230
  ` model: ${config.model}`,
783
1231
  ` model status: ${modelReadiness}`,
784
1232
  ` language: ${config.language}`,
1233
+ ` streaming: ${streaming ? "YES (Deepgram WS)" : "NO (batch)"}`,
1234
+ ` DEEPGRAM_API_KEY: ${dgKey ? "set (" + dgKey.slice(0, 8) + "...)" : "NOT SET"}`,
785
1235
  ` onboarding: ${config.onboarding.completed ? "complete" : "incomplete"}`,
786
1236
  ` python3: ${diagnostics.hasPython ? "OK" : "missing"}`,
787
1237
  ` sox/rec: ${diagnostics.hasSox ? "OK" : "missing"}`,
@@ -803,11 +1253,10 @@ export default function (pi: ExtensionAPI) {
803
1253
  }
804
1254
  }
805
1255
 
806
- lines.push("", "Suggested commands:");
807
- lines.push(...(provisioningPlan.commands.length > 0 ? provisioningPlan.commands.map((command) => ` - ${command}`) : [" - none"]));
808
- if (provisioningPlan.manualSteps.length > 0) {
809
- lines.push("", "Manual steps:");
810
- lines.push(...provisioningPlan.manualSteps.map((step) => ` - ${step}`));
1256
+ if (!dgKey && config.backend === "deepgram") {
1257
+ lines.push("");
1258
+ lines.push("⚠️ DEEPGRAM_API_KEY not set! Add to ~/.zshrc or ~/.env.secrets");
1259
+ lines.push(" export DEEPGRAM_API_KEY=your_key_here");
811
1260
  }
812
1261
 
813
1262
  cmdCtx.ui.notify(lines.join("\n"), provisioningPlan.ready ? "info" : "warning");
@@ -824,22 +1273,24 @@ export default function (pi: ExtensionAPI) {
824
1273
  const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
825
1274
  const selectedBackend = diagnostics.backends.find((backend) => backend.name === config.backend);
826
1275
  const modelReadiness = getModelReadiness(selectedBackend, config.model);
1276
+ const streaming = isDeepgramStreaming(config);
827
1277
 
828
1278
  cmdCtx.ui.notify([
829
1279
  `Voice config:`,
830
- ` enabled: ${config.enabled}`,
831
- ` mode: ${config.mode}`,
832
- ` scope: ${config.scope}`,
833
- ` backend: ${config.backend}`,
834
- ` model: ${config.model}`,
835
- ` model status: ${modelReadiness}`,
836
- ` language: ${config.language}`,
837
- ` state: ${voiceState}`,
838
- ` setup: ${config.onboarding.completed ? `complete (${config.onboarding.source ?? "unknown"})` : "incomplete"}`,
839
- ` socket: ${activeSocketPath}`,
840
- ` daemon: ${daemonUp ? "running" : "stopped"}${daemonInfo}`,
841
- ` hold-key: SPACE (editor empty) or Ctrl+Shift+V (toggle)`,
842
- ` btw-key: Ctrl+Shift+B (hold to record → auto-btw)`,
1280
+ ` enabled: ${config.enabled}`,
1281
+ ` mode: ${config.mode}`,
1282
+ ` scope: ${config.scope}`,
1283
+ ` backend: ${config.backend}`,
1284
+ ` model: ${config.model}`,
1285
+ ` model stat: ${modelReadiness}`,
1286
+ ` language: ${config.language}`,
1287
+ ` streaming: ${streaming ? "YES (Deepgram WebSocket)" : "NO (batch)"}`,
1288
+ ` state: ${voiceState}`,
1289
+ ` setup: ${config.onboarding.completed ? `complete (${config.onboarding.source ?? "unknown"})` : "incomplete"}`,
1290
+ ` socket: ${activeSocketPath}`,
1291
+ ` daemon: ${daemonUp ? "running" : "stopped"}${daemonInfo}`,
1292
+ ` hold-key: SPACE (editor empty) or Ctrl+Shift+V (toggle)`,
1293
+ ` btw-key: Ctrl+Shift+B (hold to record → auto-btw)`,
843
1294
  ].join("\n"), "info");
844
1295
  return;
845
1296
  }
@@ -882,7 +1333,6 @@ export default function (pi: ExtensionAPI) {
882
1333
  cmdCtx.ui.notify("Voice setup cancelled.", "warning");
883
1334
  return;
884
1335
  }
885
-
886
1336
  await finalizeAndSaveSetup(cmdCtx, result.config, result.selectedScope, result.summaryLines, "setup-command");
887
1337
  return;
888
1338
  }
@@ -990,7 +1440,7 @@ export default function (pi: ExtensionAPI) {
990
1440
  },
991
1441
  });
992
1442
 
993
- // ─── Dedicated setup command (discoverable in /command list) ──────────────
1443
+ // ─── Dedicated setup command ─────────────────────────────────────────────
994
1444
 
995
1445
  pi.registerCommand("voice-setup", {
996
1446
  description: "Configure voice input — select backend, model, and language",
@@ -1058,7 +1508,6 @@ export default function (pi: ExtensionAPI) {
1058
1508
 
1059
1509
  pi.sendUserMessage(content, { deliverAs: "followUp" });
1060
1510
 
1061
- // Clear after injection
1062
1511
  btwThread = [];
1063
1512
  btwWidgetVisible = false;
1064
1513
  cmdCtx.ui.setWidget("btw", undefined);
@@ -1083,7 +1532,6 @@ export default function (pi: ExtensionAPI) {
1083
1532
  threadText += `Q: ${ex.question}\nA: ${ex.answer}\n\n`;
1084
1533
  }
1085
1534
 
1086
- // Ask the model to summarize
1087
1535
  const model = ctx.model;
1088
1536
  if (!model) {
1089
1537
  cmdCtx.ui.notify("No model available for summarization.", "error");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codexstar/pi-listen",
3
- "version": "1.0.11",
3
+ "version": "1.0.13",
4
4
  "description": "Voice input, first-run onboarding, and side-channel BTW conversations for Pi",
5
5
  "type": "module",
6
6
  "keywords": [