npm - @codexstar/pi-listen - Versions diffs - 1.0.12 → 1.0.14 - Mend

@codexstar/pi-listen 1.0.12 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/extensions/voice/config.ts +4 -0
package/extensions/voice.ts +814 -141
package/package.json +1 -1

package/extensions/voice/config.ts CHANGED Viewed

@@ -31,6 +31,8 @@ export interface VoiceConfig {
 	scope: VoiceSettingsScope;
 	btwEnabled: boolean;
 	onboarding: VoiceOnboardingState;
+	/** Deepgram API key — stored in config so it's available even when env var isn't set */
+	deepgramApiKey?: string;
 }
 export interface LoadedVoiceConfig {
@@ -60,6 +62,7 @@ export const DEFAULT_CONFIG: VoiceConfig = {
 	model: "small",
 	scope: "global",
 	btwEnabled: true,
+	deepgramApiKey: undefined,
 	onboarding: {
 		completed: false,
 		schemaVersion: VOICE_CONFIG_VERSION,
@@ -121,6 +124,7 @@ function migrateConfig(rawVoice: any, source: VoiceConfigSource): VoiceConfig {
 		model: typeof rawVoice.model === "string" ? rawVoice.model : DEFAULT_CONFIG.model,
 		scope: (rawVoice.scope as VoiceSettingsScope | undefined) ?? (source === "project" ? "project" : "global"),
 		btwEnabled: typeof rawVoice.btwEnabled === "boolean" ? rawVoice.btwEnabled : DEFAULT_CONFIG.btwEnabled,
+		deepgramApiKey: typeof rawVoice.deepgramApiKey === "string" ? rawVoice.deepgramApiKey : undefined,
 		onboarding: normalizeOnboarding(rawVoice.onboarding, fallbackCompleted),
 	};
 }

package/extensions/voice.ts CHANGED Viewed

@@ -1,22 +1,27 @@
 /**
- * pi-voice — Voice input + BTW side conversations for Pi CLI.
+ * pi-voice — Deepgram WebSocket streaming STT for Pi CLI.
  *
- * Features:
- *   1. Hold-spacebar to talk (Kitty protocol key release detection)
- *      Fallback: Ctrl+Shift+V toggle for non-Kitty terminals
- *   2. BTW side conversations (/btw <msg>, /btw:new, /btw:clear, /btw:inject, /btw:summarize)
- *   3. Voice → BTW glue: Ctrl+Shift+B = hold to record → auto-send as /btw
+ * Architecture (modeled after Claude Code's voice pipeline):
+ *   1. SoX `rec` captures mic audio as raw PCM (16kHz, mono, 16-bit)
+ *      and pipes it to stdout (no file).
+ *   2. Raw PCM chunks are streamed over a WebSocket to Deepgram Nova 3.
+ *   3. Deepgram returns interim + final transcripts in real-time.
+ *   4. Interim transcripts update a live widget above the editor.
+ *   5. On key-release (or toggle stop), a CloseStream message is sent;
+ *      final transcript is injected into the editor.
  *
- * Records audio via SoX, transcribes via persistent daemon (daemon.py) or fallback subprocess.
- * STT backends: faster-whisper, moonshine, whisper.cpp, deepgram, parakeet.
+ * Activation:
+ *   - Hold SPACE (empty editor) → release to finalize
+ *   - Ctrl+Shift+V → toggle start/stop (fallback for non-Kitty terminals)
+ *   - Ctrl+Shift+B → hold to record → auto-send as /btw
  *
- * Config in ~/.pi/agent/settings.json or <project>/.pi/settings.json:
+ * Config in ~/.pi/agent/settings.json:
  * {
  *   "voice": {
  *     "enabled": true,
  *     "language": "en",
- *     "backend": "faster-whisper",
- *     "model": "small"
+ *     "backend": "deepgram",
+ *     "model": "nova-3"
  *   }
  * }
  */
@@ -65,6 +70,14 @@ interface BtwExchange {
 // ─── Constants ───────────────────────────────────────────────────────────────
 const SAMPLE_RATE = 16000;
+const CHANNELS = 1;
+const ENCODING = "linear16";
+const DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
+const KEEPALIVE_INTERVAL_MS = 8000;
+const FINALIZE_SAFETY_TIMEOUT_MS = 5000;
+const FINALIZE_NO_DATA_TIMEOUT_MS = 1500;
+const MAX_RECORDING_SECS = 120; // 2 minutes safety cap (streaming is efficient)
 const EXT_DIR = path.dirname(new URL(import.meta.url).pathname);
 const PROJECT_ROOT = path.join(EXT_DIR, "..");
 const DAEMON_SCRIPT = path.join(PROJECT_ROOT, "daemon.py");
@@ -74,7 +87,7 @@ function commandExists(cmd: string): boolean {
 	return spawnSync("which", [cmd], { stdio: "pipe", timeout: 3000 }).status === 0;
 }
-// ─── Daemon Communication ────────────────────────────────────────────────────
+// ─── Daemon Communication (kept for non-deepgram local backends) ─────────────
 let activeSocketPath = getSocketPath({
 	scope: DEFAULT_CONFIG.scope,
@@ -135,8 +148,6 @@ async function isDaemonRunning(socketPath = activeSocketPath): Promise<boolean>
 async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
 	if (await isDaemonRunning(activeSocketPath)) {
 		const status = await daemonSend({ cmd: "status" }, 3000, activeSocketPath);
-		// When backend is 'auto', accept any loaded backend — the daemon already
-		// resolved 'auto' to a concrete backend, so we don't need to reload.
 		if (config.backend === "auto" || (status.backend === config.backend && status.model === config.model)) return true;
 		const reloaded = await daemonSend({
 			cmd: "load",
@@ -175,7 +186,6 @@ async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
 		proc.on("error", () => resolve(false));
-		// Timeout: if daemon doesn't start in 10s, kill orphan and fall back
 		setTimeout(() => {
 			if (!started) {
 				try { proc.kill(); } catch {}
@@ -185,46 +195,40 @@ async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
 	});
 }
-// ─── Audio Recording ─────────────────────────────────────────────────────────
+// ─── Legacy file-based transcription (for non-deepgram backends) ─────────────
-let recProcess: ChildProcess | null = null;
+let legacyRecProcess: ChildProcess | null = null;
-function startRecordingToFile(outPath: string): boolean {
-	if (recProcess) {
-		recProcess.kill("SIGTERM");
-		recProcess = null;
+function startLegacyRecordingToFile(outPath: string): boolean {
+	if (legacyRecProcess) {
+		legacyRecProcess.kill("SIGTERM");
+		legacyRecProcess = null;
 	}
 	if (!commandExists("rec")) return false;
-	recProcess = spawn("rec", [
+	legacyRecProcess = spawn("rec", [
 		"-q", "-r", String(SAMPLE_RATE), "-c", "1", "-b", "16", outPath,
 	], { stdio: ["pipe", "pipe", "pipe"] });
-	recProcess.stderr?.on("data", () => {});
-	recProcess.on("error", () => { recProcess = null; });
+	legacyRecProcess.stderr?.on("data", () => {});
+	legacyRecProcess.on("error", () => { legacyRecProcess = null; });
 	return true;
 }
-function stopRecording(): Promise<void> {
+function stopLegacyRecording(): Promise<void> {
 	return new Promise((resolve) => {
-		if (!recProcess) { resolve(); return; }
-		recProcess.on("close", () => { recProcess = null; resolve(); });
-		recProcess.kill("SIGTERM");
+		if (!legacyRecProcess) { resolve(); return; }
+		legacyRecProcess.on("close", () => { legacyRecProcess = null; resolve(); });
+		legacyRecProcess.kill("SIGTERM");
 		setTimeout(() => {
-			if (recProcess) { recProcess.kill("SIGKILL"); recProcess = null; }
+			if (legacyRecProcess) { legacyRecProcess.kill("SIGKILL"); legacyRecProcess = null; }
 			resolve();
 		}, 2000);
 	});
 }
-// ─── Transcription (daemon or fallback) ──────────────────────────────────────
-async function transcribeAudio(
+async function transcribeAudioFile(
 	audioPath: string,
 	config: VoiceConfig,
 ): Promise<{ text: string; duration: number; error?: string }> {
-	// Try daemon first
 	if (await isDaemonRunning()) {
 		const resp = await daemonSend({
 			cmd: "transcribe",
@@ -238,13 +242,10 @@ async function transcribeAudio(
 			return resp as { text: string; duration: number };
 		}
 	}
-	// Fallback: direct subprocess
 	return new Promise((resolve) => {
 		const args = [TRANSCRIBE_SCRIPT, "--language", config.language, audioPath];
 		if (config.backend !== "auto") args.splice(1, 0, "--backend", config.backend);
 		if (config.model) args.splice(1, 0, "--model", config.model);
 		const proc = spawn("python3", args, { stdio: ["pipe", "pipe", "pipe"] });
 		let stdout = "";
 		let stderr = "";
@@ -258,6 +259,250 @@ async function transcribeAudio(
 	});
 }
+// ─── Deepgram WebSocket Streaming ────────────────────────────────────────────
+interface StreamingSession {
+	ws: WebSocket;
+	recProcess: ChildProcess;
+	interimText: string;      // Current interim (partial) transcript
+	finalizedParts: string[]; // All finalized transcript segments
+	keepAliveTimer: ReturnType<typeof setInterval> | null;
+	closed: boolean;
+	onTranscript: (interim: string, finals: string[]) => void;
+	onDone: (fullText: string) => void;
+	onError: (err: string) => void;
+}
+function getDeepgramApiKey(): string | null {
+	// Priority: env var → config file → null
+	return process.env.DEEPGRAM_API_KEY || null;
+}
+/**
+ * Resolve the Deepgram API key from all sources:
+ * 1. process.env.DEEPGRAM_API_KEY (shell)
+ * 2. config.deepgramApiKey (settings.json, persisted at setup time)
+ */
+function resolveDeepgramApiKey(config: VoiceConfig): string | null {
+	return process.env.DEEPGRAM_API_KEY || config.deepgramApiKey || null;
+}
+function isDeepgramStreaming(config: VoiceConfig): boolean {
+	const key = resolveDeepgramApiKey(config);
+	if (!key) return false;
+	// Use streaming for deepgram backend, or auto mode when deepgram key is available
+	return config.backend === "deepgram" || (config.backend === "auto" && !!key);
+}
+function buildDeepgramWsUrl(config: VoiceConfig): string {
+	const params = new URLSearchParams({
+		encoding: ENCODING,
+		sample_rate: String(SAMPLE_RATE),
+		channels: String(CHANNELS),
+		endpointing: "300",       // ms of silence before phrase boundary
+		utterance_end_ms: "1000", // ms of silence before utterance is complete
+		language: config.language || "en",
+		model: config.model || "nova-3",
+		smart_format: "true",
+		interim_results: "true",
+	});
+	return `${DEEPGRAM_WS_URL}?${params.toString()}`;
+}
+function startStreamingSession(
+	config: VoiceConfig,
+	callbacks: {
+		onTranscript: (interim: string, finals: string[]) => void;
+		onDone: (fullText: string) => void;
+		onError: (err: string) => void;
+	},
+): StreamingSession | null {
+	const apiKey = resolveDeepgramApiKey(config);
+	if (!apiKey) {
+		callbacks.onError("DEEPGRAM_API_KEY not set");
+		return null;
+	}
+	if (!commandExists("rec")) {
+		callbacks.onError("Voice requires SoX. Install: brew install sox");
+		return null;
+	}
+	// Start SoX streaming raw PCM to stdout (no file)
+	const recProc = spawn("rec", [
+		"-q",
+		"-r", String(SAMPLE_RATE),
+		"-c", String(CHANNELS),
+		"-b", "16",
+		"-e", "signed-integer",
+		"-t", "raw",
+		"-",  // output to stdout
+	], { stdio: ["pipe", "pipe", "pipe"] });
+	recProc.stderr?.on("data", () => {}); // suppress SoX warnings
+	// Connect WebSocket to Deepgram
+	const wsUrl = buildDeepgramWsUrl(config);
+	const ws = new WebSocket(wsUrl, {
+		headers: {
+			"Authorization": `Token ${apiKey}`,
+		},
+	} as any);
+	const session: StreamingSession = {
+		ws,
+		recProcess: recProc,
+		interimText: "",
+		finalizedParts: [],
+		keepAliveTimer: null,
+		closed: false,
+		onTranscript: callbacks.onTranscript,
+		onDone: callbacks.onDone,
+		onError: callbacks.onError,
+	};
+	ws.onopen = () => {
+		// Send initial KeepAlive
+		try { ws.send(JSON.stringify({ type: "KeepAlive" })); } catch {}
+		// Start keepalive timer
+		session.keepAliveTimer = setInterval(() => {
+			if (ws.readyState === WebSocket.OPEN) {
+				try { ws.send(JSON.stringify({ type: "KeepAlive" })); } catch {}
+			}
+		}, KEEPALIVE_INTERVAL_MS);
+		// Pipe SoX stdout → WebSocket as binary frames
+		recProc.stdout?.on("data", (chunk: Buffer) => {
+			if (ws.readyState === WebSocket.OPEN) {
+				try { ws.send(chunk); } catch {}
+			}
+		});
+	};
+	ws.onmessage = (event: MessageEvent) => {
+		try {
+			const msg = typeof event.data === "string" ? JSON.parse(event.data) : null;
+			if (!msg) return;
+			if (msg.type === "Results") {
+				const alt = msg.channel?.alternatives?.[0];
+				const transcript = alt?.transcript || "";
+				if (msg.is_final) {
+					// Final result for this audio segment
+					if (transcript.trim()) {
+						session.finalizedParts.push(transcript.trim());
+					}
+					session.interimText = "";
+				} else {
+					// Interim result — live update
+					session.interimText = transcript;
+				}
+				session.onTranscript(session.interimText, session.finalizedParts);
+				// If speech_final is true, it's the end of an utterance
+				// (similar to TranscriptEndpoint in Claude Code's protocol)
+				if (msg.speech_final && transcript.trim()) {
+					// Already added to finalizedParts above when is_final was true
+				}
+			} else if (msg.type === "Metadata") {
+				// Connection metadata — ignore
+			} else if (msg.type === "UtteranceEnd") {
+				// Utterance boundary — Deepgram detected end of speech
+				// Nothing extra needed, is_final already handles finalization
+			} else if (msg.type === "Error" || msg.type === "error") {
+				session.onError(msg.message || msg.description || "Deepgram error");
+			}
+		} catch (e: any) {
+			// Ignore parse errors for binary data
+		}
+	};
+	ws.onerror = (event: Event) => {
+		if (!session.closed) {
+			session.onError("WebSocket connection error");
+		}
+	};
+	ws.onclose = () => {
+		if (!session.closed) {
+			finalizeSession(session);
+		}
+	};
+	recProc.on("error", (err) => {
+		session.onError(`SoX error: ${err.message}`);
+	});
+	recProc.on("close", () => {
+		// SoX stopped — send CloseStream to Deepgram
+		if (ws.readyState === WebSocket.OPEN) {
+			try { ws.send(JSON.stringify({ type: "CloseStream" })); } catch {}
+		}
+	});
+	return session;
+}
+function stopStreamingSession(session: StreamingSession): void {
+	if (session.closed) return;
+	// Stop the microphone
+	try { session.recProcess.kill("SIGTERM"); } catch {}
+	// CloseStream tells Deepgram to flush remaining audio
+	if (session.ws.readyState === WebSocket.OPEN) {
+		try { session.ws.send(JSON.stringify({ type: "CloseStream" })); } catch {}
+	}
+	// Safety: finalize after timeout even if Deepgram doesn't respond
+	setTimeout(() => {
+		if (!session.closed) {
+			finalizeSession(session);
+		}
+	}, FINALIZE_SAFETY_TIMEOUT_MS);
+	// Shorter timeout: if no new data arrives for 1.5s, assume done
+	let lastDataTime = Date.now();
+	const origOnMessage = session.ws.onmessage;
+	session.ws.onmessage = (event: MessageEvent) => {
+		lastDataTime = Date.now();
+		if (origOnMessage) origOnMessage.call(session.ws, event);
+	};
+	const noDataCheck = setInterval(() => {
+		if (Date.now() - lastDataTime > FINALIZE_NO_DATA_TIMEOUT_MS) {
+			clearInterval(noDataCheck);
+			if (!session.closed) {
+				finalizeSession(session);
+			}
+		}
+	}, 500);
+}
+function finalizeSession(session: StreamingSession): void {
+	if (session.closed) return;
+	session.closed = true;
+	// Clean up keepalive
+	if (session.keepAliveTimer) {
+		clearInterval(session.keepAliveTimer);
+		session.keepAliveTimer = null;
+	}
+	// Close WebSocket
+	try { session.ws.close(); } catch {}
+	// Kill SoX if still running
+	try { session.recProcess.kill("SIGKILL"); } catch {}
+	// Deliver final transcript
+	const fullText = session.finalizedParts.join(" ").trim();
+	session.onDone(fullText);
+}
 // ─── Extension ───────────────────────────────────────────────────────────────
 export default function (pi: ExtensionAPI) {
@@ -272,6 +517,10 @@ export default function (pi: ExtensionAPI) {
 	let terminalInputUnsub: (() => void) | null = null;
 	let isHolding = false;
+	// Streaming session state
+	let activeSession: StreamingSession | null = null;
+	let currentTarget: "editor" | "btw" = "editor";
 	// ─── BTW State ───────────────────────────────────────────────────────────
 	let btwThread: BtwExchange[] = [];
@@ -289,17 +538,19 @@ export default function (pi: ExtensionAPI) {
 				}
 				const modeTag = !config.onboarding.completed
 					? "SETUP"
-					: config.mode === "api"
-						? "API"
-						: config.mode === "local"
-							? "LOCAL"
-							: "AUTO";
+					: isDeepgramStreaming(config)
+						? "STREAM"
+						: config.mode === "api"
+							? "API"
+							: config.mode === "local"
+								? "LOCAL"
+								: "AUTO";
 				ctx.ui.setStatus("voice", `MIC ${modeTag}`);
 				break;
 			}
 			case "recording": {
 				const secs = Math.round((Date.now() - recordingStart) / 1000);
-				ctx.ui.setStatus("voice", `REC ${secs}s`);
+				ctx.ui.setStatus("voice", `🔴 REC ${secs}s`);
 				break;
 			}
 			case "transcribing":
@@ -315,9 +566,17 @@ export default function (pi: ExtensionAPI) {
 	function voiceCleanup() {
 		if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
-		if (recProcess) { recProcess.kill("SIGTERM"); recProcess = null; }
+		clearHoldTimer();
+		stopRecordingWidgetAnimation();
+		if (activeSession) {
+			finalizeSession(activeSession);
+			activeSession = null;
+		}
+		if (legacyRecProcess) { legacyRecProcess.kill("SIGTERM"); legacyRecProcess = null; }
 		if (tempFile) { try { fs.unlinkSync(tempFile); } catch {} tempFile = null; }
 		isHolding = false;
+		spaceConsumed = false;
+		spaceDownTime = null;
 		setVoiceState("idle");
 	}
@@ -332,7 +591,7 @@ export default function (pi: ExtensionAPI) {
 		const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
 		const provisioningPlan = buildProvisioningPlan(nextConfig, diagnostics);
 		let validated = provisioningPlan.ready;
-		if (validated && nextConfig.enabled) {
+		if (validated && nextConfig.enabled && !isDeepgramStreaming(nextConfig)) {
 			validated = await ensureDaemon(nextConfig);
 		}
@@ -349,53 +608,337 @@ export default function (pi: ExtensionAPI) {
 		].join("\n"), validated ? "info" : "warning");
 	}
-	// ─── Voice: Start / Stop / Transcribe ────────────────────────────────────
+	// ─── Live Transcript Widget (Component-based, themed) ───────────────────
-	const MAX_RECORDING_SECS = 30; // Safety cap: auto-stop after 30s
+	/** Subtle hint shown during the hold threshold wait */
+	function showHoldHintWidget() {
+		if (!ctx?.hasUI) return;
+		ctx.ui.setWidget("voice-recording", (tui, theme) => {
+			return {
+				invalidate() {},
+				render(width: number): string[] {
+					const bar = theme.fg("muted", "─".repeat(Math.min(width - 2, 60)));
+					return [
+						bar,
+						theme.fg("dim", "  Hold " + theme.bold("SPACE") + " for voice input..."),
+						bar,
+					];
+				},
+			};
+		}, { placement: "aboveEditor" });
+	}
-	async function startVoiceRecording(target: "editor" | "btw" = "editor"): Promise<boolean> {
-		if (voiceState !== "idle" || !ctx) return false;
+	function hideHoldHintWidget() {
+		if (!ctx?.hasUI) return;
+		ctx.ui.setWidget("voice-recording", undefined);
+	}
+	/** Animated recording indicator with live waveform */
+	function showRecordingWidget(target: "editor" | "btw") {
+		if (!ctx?.hasUI) return;
+		let frame = 0;
+		const waveChars = ["▁", "▂", "▃", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
-		tempFile = path.join(os.tmpdir(), `pi-voice-${Date.now()}.wav`);
-		if (!startRecordingToFile(tempFile)) {
-			ctx.ui.notify("Voice requires SoX. Install: brew install sox", "error");
-			return false;
+		// Animate the widget every 200ms
+		const animTimer = setInterval(() => {
+			frame++;
+			if (ctx?.hasUI) ctx.ui.setWidget("voice-recording", undefined); // force re-render
+			showRecordingWidgetFrame(target, frame, waveChars);
+		}, 200);
+		// Store the timer so we can clean it up
+		(showRecordingWidget as any)._animTimer = animTimer;
+		showRecordingWidgetFrame(target, frame, waveChars);
+	}
+	function showRecordingWidgetFrame(target: "editor" | "btw", frame: number, waveChars: string[]) {
+		if (!ctx?.hasUI) return;
+		ctx.ui.setWidget("voice-recording", (tui, theme) => {
+			return {
+				invalidate() {},
+				render(width: number): string[] {
+					const maxW = Math.min(width - 2, 72);
+					const elapsed = Math.round((Date.now() - recordingStart) / 1000);
+					const mins = Math.floor(elapsed / 60);
+					const secs = elapsed % 60;
+					const timeStr = mins > 0 ? `${mins}:${String(secs).padStart(2, "0")}` : `${secs}s`;
+					// Animated waveform
+					const waveLen = 12;
+					let wave = "";
+					for (let i = 0; i < waveLen; i++) {
+						wave += waveChars[(frame + i) % waveChars.length];
+					}
+					const topBorder = theme.fg("borderAccent", "╭" + "─".repeat(maxW) + "╮");
+					const botBorder = theme.fg("borderAccent", "╰" + "─".repeat(maxW) + "╯");
+					const pad = (s: string, w: number) => {
+						const visible = s.replace(/\x1b\[[^m]*m/g, "").length;
+						return s + " ".repeat(Math.max(0, w - visible));
+					};
+					const dot = theme.fg("error", "●");
+					const label = target === "btw"
+						? theme.bold(theme.fg("accent", " BTW "))
+						: theme.bold(theme.fg("accent", " VOICE "));
+					const waveStyled = theme.fg("accent", wave);
+					const timeStyled = theme.fg("muted", timeStr);
+					const titleLine = `  ${dot} ${label} ${waveStyled}  ${timeStyled}`;
+					const hint = target === "btw"
+						? theme.fg("dim", "  Press Ctrl+Shift+B to stop")
+						: kittyReleaseDetected
+							? theme.fg("dim", "  Release SPACE to finalize")
+							: theme.fg("dim", "  Press SPACE again to stop");
+					const lines = [
+						topBorder,
+						theme.fg("borderAccent", "│") + pad(titleLine, maxW) + theme.fg("borderAccent", "│"),
+						theme.fg("borderAccent", "│") + pad(hint, maxW) + theme.fg("borderAccent", "│"),
+						botBorder,
+					];
+					return lines;
+				},
+			};
+		}, { placement: "aboveEditor" });
+	}
+	function stopRecordingWidgetAnimation() {
+		const timer = (showRecordingWidget as any)?._animTimer;
+		if (timer) {
+			clearInterval(timer);
+			(showRecordingWidget as any)._animTimer = null;
 		}
+	}
+	/** Show live transcript inside a themed box */
+	function updateLiveTranscriptWidget(interim: string, finals: string[]) {
+		if (!ctx?.hasUI) return;
+		const finalized = finals.join(" ");
+		const displayText = finalized + (interim ? (finalized ? " " : "") + interim : "");
+		ctx.ui.setWidget("voice-recording", (tui, theme) => {
+			return {
+				invalidate() {},
+				render(width: number): string[] {
+					const maxW = Math.min(width - 2, 72);
+					const elapsed = Math.round((Date.now() - recordingStart) / 1000);
+					const mins = Math.floor(elapsed / 60);
+					const secs = elapsed % 60;
+					const timeStr = mins > 0 ? `${mins}:${String(secs).padStart(2, "0")}` : `${secs}s`;
+					const topBorder = theme.fg("borderAccent", "╭" + "─".repeat(maxW) + "╮");
+					const botBorder = theme.fg("borderAccent", "╰" + "─".repeat(maxW) + "╯");
+					const sep = theme.fg("borderAccent", "│") + theme.fg("borderAccent", "─".repeat(maxW)) + theme.fg("borderAccent", "│");
+					const side = (content: string) => {
+						const stripped = content.replace(/\x1b\[[^m]*m/g, "");
+						const padding = Math.max(0, maxW - stripped.length);
+						return theme.fg("borderAccent", "│") + content + " ".repeat(padding) + theme.fg("borderAccent", "│");
+					};
+					const dot = theme.fg("error", "●");
+					const label = theme.bold(theme.fg("accent", " VOICE "));
+					const timeStyled = theme.fg("muted", timeStr);
+					const titleLine = `  ${dot} ${label}  ${timeStyled}`;
+					const hint = kittyReleaseDetected
+						? theme.fg("dim", "  Release SPACE to finalize")
+						: theme.fg("dim", "  Press SPACE again to stop");
+					const lines = [topBorder, side(titleLine)];
+					if (!displayText.trim()) {
+						lines.push(side(theme.fg("dim", "  Listening... speak now")));
+					} else {
+						lines.push(sep);
+						// Word-wrap the transcript text
+						const innerMax = maxW - 4; // padding inside box
+						const words = displayText.split(" ");
+						const wrappedLines: string[] = [];
+						let currentLine = "";
+						for (const word of words) {
+							if ((currentLine + " " + word).trim().length > innerMax && currentLine) {
+								wrappedLines.push(currentLine);
+								currentLine = word;
+							} else {
+								currentLine = currentLine ? currentLine + " " + word : word;
+							}
+						}
+						if (currentLine) wrappedLines.push(currentLine);
+						// Show last 3 lines of transcript
+						const visible = wrappedLines.slice(-3);
+						for (let i = 0; i < visible.length; i++) {
+							let line = visible[i];
+							// Style: finalized parts in normal text, interim in accent
+							if (i === visible.length - 1 && interim) {
+								line = theme.fg("text", line) + theme.fg("accent", "▍");
+							} else {
+								line = theme.fg("text", line);
+							}
+							lines.push(side("  " + line));
+						}
+					}
+					lines.push(side(hint));
+					lines.push(botBorder);
+					return lines;
+				},
+			};
+		}, { placement: "aboveEditor" });
+	}
+	/** Transcribing state — show a processing indicator */
+	function showTranscribingWidget() {
+		if (!ctx?.hasUI) return;
+		ctx.ui.setWidget("voice-recording", (tui, theme) => {
+			return {
+				invalidate() {},
+				render(width: number): string[] {
+					const maxW = Math.min(width - 2, 72);
+					const topBorder = theme.fg("border", "╭" + "─".repeat(maxW) + "╮");
+					const botBorder = theme.fg("border", "╰" + "─".repeat(maxW) + "╯");
+					const side = (content: string) => {
+						const stripped = content.replace(/\x1b\[[^m]*m/g, "");
+						const padding = Math.max(0, maxW - stripped.length);
+						return theme.fg("border", "│") + content + " ".repeat(padding) + theme.fg("border", "│");
+					};
+					const spinner = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
+					const idx = Math.floor(Date.now() / 100) % spinner.length;
+					const line = `  ${theme.fg("accent", spinner[idx])} ${theme.fg("dim", "Finalizing transcription...")}`;
+					return [topBorder, side(line), botBorder];
+				},
+			};
+		}, { placement: "aboveEditor" });
+	}
+	// ─── Voice: Start / Stop (Streaming or Legacy) ───────────────────────────
+	async function startVoiceRecording(target: "editor" | "btw" = "editor"): Promise<boolean> {
+		if (voiceState !== "idle" || !ctx) return false;
+		currentTarget = target;
 		recordingStart = Date.now();
-		setVoiceState("recording");
-		statusTimer = setInterval(() => {
-			if (voiceState === "recording") {
-				updateVoiceStatus();
-				// Safety: auto-stop after MAX_RECORDING_SECS
-				const elapsed = (Date.now() - recordingStart) / 1000;
-				if (elapsed >= MAX_RECORDING_SECS) {
-					isHolding = false;
-					stopVoiceRecording(target);
+		if (isDeepgramStreaming(config)) {
+			// === STREAMING PATH === (Deepgram WebSocket)
+			setVoiceState("recording");
+			const session = startStreamingSession(config, {
+				onTranscript: (interim, finals) => {
+					updateLiveTranscriptWidget(interim, finals);
+					updateVoiceStatus();
+				},
+				onDone: (fullText) => {
+					activeSession = null;
+					stopRecordingWidgetAnimation();
+					ctx?.ui.setWidget("voice-recording", undefined);
+					if (!fullText.trim()) {
+						ctx?.ui.notify("No speech detected.", "warning");
+						setVoiceState("idle");
+						return;
+					}
+					if (target === "btw") {
+						handleBtw(fullText);
+					} else {
+						if (ctx?.hasUI) {
+							const existing = ctx.ui.getEditorText();
+							ctx.ui.setEditorText(existing ? existing + " " + fullText : fullText);
+							const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
+							ctx.ui.notify(
+								`STT (${elapsed}s): ${fullText.slice(0, 80)}${fullText.length > 80 ? "..." : ""}`,
+								"info",
+							);
+						}
+					}
+					setVoiceState("idle");
+				},
+				onError: (err) => {
+					activeSession = null;
+					stopRecordingWidgetAnimation();
+					ctx?.ui.setWidget("voice-recording", undefined);
+					ctx?.ui.notify(`STT error: ${err}`, "error");
+					setVoiceState("idle");
+				},
+			});
+			if (!session) {
+				setVoiceState("idle");
+				return false;
+			}
+			activeSession = session;
+			// Status timer for elapsed time
+			statusTimer = setInterval(() => {
+				if (voiceState === "recording") {
+					updateVoiceStatus();
+					const elapsed = (Date.now() - recordingStart) / 1000;
+					if (elapsed >= MAX_RECORDING_SECS) {
+						isHolding = false;
+						stopVoiceRecording(target);
+					}
+				}
+			}, 1000);
+			// Show the themed recording widget
+			showRecordingWidget(target);
+			return true;
+		} else {
+			// === LEGACY PATH === (file-based for local backends)
+			tempFile = path.join(os.tmpdir(), `pi-voice-${Date.now()}.wav`);
+			if (!startLegacyRecordingToFile(tempFile)) {
+				ctx.ui.notify("Voice requires SoX. Install: brew install sox", "error");
+				return false;
+			}
+			setVoiceState("recording");
+			statusTimer = setInterval(() => {
+				if (voiceState === "recording") {
+					updateVoiceStatus();
+					const elapsed = (Date.now() - recordingStart) / 1000;
+					if (elapsed >= MAX_RECORDING_SECS) {
+						isHolding = false;
+						stopVoiceRecording(target);
+					}
 				}
+			}, 1000);
+			if (ctx.hasUI) {
+				// Show themed recording widget for legacy path
+				showRecordingWidget(target);
 			}
-		}, 1000);
-		if (ctx.hasUI) {
-			ctx.ui.setWidget("voice-recording", [
-				target === "btw"
-					? " 🎙 BTW Recording... Ctrl+Shift+V to stop"
-					: " 🎙 Recording... Ctrl+Shift+V to stop (or release SPACE)",
-			], { placement: "aboveEditor" });
+			return true;
 		}
-		return true;
 	}
 	async function stopVoiceRecording(target: "editor" | "btw" = "editor") {
 		if (voiceState !== "recording" || !ctx) return;
 		if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
+		if (activeSession) {
+			// === STREAMING PATH === Stop the stream, finalize will call onDone
+			setVoiceState("transcribing");
+			stopRecordingWidgetAnimation();
+			showTranscribingWidget();
+			stopStreamingSession(activeSession);
+			return;
+		}
+		// === LEGACY PATH ===
 		const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
-		const audioFile = tempFile; // capture before cleanup can null it
+		const audioFile = tempFile;
 		setVoiceState("transcribing");
-		ctx.ui.setWidget("voice-recording", undefined);
+		stopRecordingWidgetAnimation();
+		showTranscribingWidget();
-		await stopRecording();
+		await stopLegacyRecording();
 		if (!audioFile || !fs.existsSync(audioFile)) {
 			ctx.ui.notify("No audio recorded.", "warning");
@@ -412,12 +955,9 @@ export default function (pi: ExtensionAPI) {
 			return;
 		}
-		// Ensure daemon is up before transcribing — await so the warm path
-		// is available for this request instead of falling through to the
-		// cold subprocess fallback.
 		await ensureDaemon(config).catch(() => {});
-		const result = await transcribeAudio(audioFile, config);
+		const result = await transcribeAudioFile(audioFile, config);
 		try { fs.unlinkSync(audioFile); } catch {}
 		if (tempFile === audioFile) tempFile = null;
@@ -437,7 +977,6 @@ export default function (pi: ExtensionAPI) {
 		if (target === "btw") {
 			await handleBtw(transcript);
 		} else {
-			// Inject into editor
 			if (ctx.hasUI) {
 				const existing = ctx.ui.getEditorText();
 				ctx.ui.setEditorText(existing ? existing + " " + transcript : transcript);
@@ -451,54 +990,163 @@ export default function (pi: ExtensionAPI) {
 		setVoiceState("idle");
 	}
-	// ─── Hold-to-talk via Kitty protocol ─────────────────────────────────────
+	// ─── Hold-to-talk with Duration Threshold ──────────────────────────────
+	//
+	// SPACE activates voice ONLY when:
+	//   1. The editor is empty (no text typed yet)
+	//   2. SPACE is held for ≥ HOLD_THRESHOLD_MS (500ms)
+	//
+	// If SPACE is released before the threshold, a regular space character
+	// is typed into the editor (normal typing behavior).
+	//
+	// This prevents accidental voice activation when typing and matches
+	// Claude Code's hold-to-talk UX pattern.
+	//
+	// For Kitty protocol terminals: hold → wait threshold → activate →
+	//   release → stop recording. True hold-to-talk.
+	// For non-Kitty terminals: hold → wait threshold → activate →
+	//   press SPACE again → stop recording. Toggle after activation.
+	const HOLD_THRESHOLD_MS = 500; // minimum hold time before voice activates
+	let kittyReleaseDetected = false;
+	let spaceDownTime: number | null = null; // timestamp when SPACE was first pressed
+	let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
+	let spaceConsumed = false; // whether we've committed to voice (past threshold)
+	function clearHoldTimer() {
+		if (holdActivationTimer) {
+			clearTimeout(holdActivationTimer);
+			holdActivationTimer = null;
+		}
+	}
 	function setupHoldToTalk() {
 		if (!ctx?.hasUI) return;
-		// Remove previous listener
 		if (terminalInputUnsub) { terminalInputUnsub(); terminalInputUnsub = null; }
 		terminalInputUnsub = ctx.ui.onTerminalInput((data: string) => {
 			if (!config.enabled) return undefined;
-			// Hold SPACE → talk → release → transcribe to editor
+			// ── SPACE handling ──
 			if (matchesKey(data, "space")) {
-				// Only activate when editor is empty (avoid conflicting with typing)
+				// RULE: If editor has content, SPACE always types a space — never voice
 				const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
-				if (editorText && editorText.trim().length > 0) return undefined;
+				if (editorText && editorText.trim().length > 0) {
+					clearHoldTimer();
+					spaceDownTime = null;
+					spaceConsumed = false;
+					return undefined; // let the default space character through
+				}
+				// ── Kitty key-release ──
 				if (isKeyRelease(data)) {
-					if (isHolding) {
+					kittyReleaseDetected = true;
+					// Released before threshold → type a space character
+					if (spaceDownTime && !spaceConsumed) {
+						clearHoldTimer();
+						spaceDownTime = null;
+						spaceConsumed = false;
+						// Insert a space into editor
+						if (ctx?.hasUI) ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
+						return { consume: true };
+					}
+					// Released after threshold → stop recording (true hold-to-talk)
+					if (spaceConsumed && isHolding && voiceState === "recording") {
 						isHolding = false;
+						spaceConsumed = false;
+						spaceDownTime = null;
 						stopVoiceRecording("editor");
 						return { consume: true };
 					}
+					spaceDownTime = null;
+					spaceConsumed = false;
 					return undefined;
 				}
+				// ── Kitty key-repeat: suppress while holding past threshold ──
 				if (isKeyRepeat(data)) {
-					if (isHolding) return { consume: true };
+					if (spaceConsumed || isHolding) return { consume: true };
 					return undefined;
 				}
-				// Key press — start recording
-				if (voiceState === "idle" && !isHolding) {
-					isHolding = true;
-					startVoiceRecording("editor").then((ok) => {
-						if (!ok) isHolding = false;
-					});
+				// === Key PRESS ===
+				// If already recording (toggle mode for non-Kitty) → stop
+				if (voiceState === "recording" && spaceConsumed) {
+					isHolding = false;
+					spaceConsumed = false;
+					spaceDownTime = null;
+					clearHoldTimer();
+					stopVoiceRecording("editor");
 					return { consume: true };
 				}
-				if (isHolding) return { consume: true };
+				// If transcribing → ignore
+				if (voiceState === "transcribing") {
+					return { consume: true };
+				}
+				// Idle → start the hold timer
+				if (voiceState === "idle" && !spaceDownTime) {
+					spaceDownTime = Date.now();
+					spaceConsumed = false;
+					// Show a subtle "preparing" indicator
+					if (ctx?.hasUI) {
+						showHoldHintWidget();
+					}
+					// After threshold: activate voice recording
+					holdActivationTimer = setTimeout(() => {
+						holdActivationTimer = null;
+						// Double-check: still idle, still holding, editor still empty
+						const currentText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
+						if (voiceState === "idle" && spaceDownTime && !(currentText && currentText.trim().length > 0)) {
+							spaceConsumed = true;
+							isHolding = true;
+							startVoiceRecording("editor").then((ok) => {
+								if (!ok) {
+									isHolding = false;
+									spaceConsumed = false;
+									spaceDownTime = null;
+								}
+							});
+						} else {
+							spaceDownTime = null;
+							spaceConsumed = false;
+						}
+					}, HOLD_THRESHOLD_MS);
+					return { consume: true }; // consume now — we'll insert space on early release
+				}
+				if (isHolding || spaceConsumed) return { consume: true };
+				return undefined;
+			}
+			// ── Any other key while holding space (pre-threshold) → cancel hold, insert space ──
+			if (spaceDownTime && !spaceConsumed && !matchesKey(data, "space")) {
+				clearHoldTimer();
+				// Insert the space that was consumed during hold detection
+				if (ctx?.hasUI) {
+					ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
+					hideHoldHintWidget();
+				}
+				spaceDownTime = null;
+				spaceConsumed = false;
+				// Don't consume this key — let it through
 				return undefined;
 			}
-			// Hold Ctrl+Shift+B → talk → release → auto-btw
+			// ── Ctrl+Shift+B handling (BTW voice) — direct toggle, no hold threshold ──
 			if (matchesKey(data, "ctrl+shift+b")) {
 				if (isKeyRelease(data)) {
-					if (isHolding) {
+					kittyReleaseDetected = true;
+					if (isHolding && voiceState === "recording") {
 						isHolding = false;
 						stopVoiceRecording("btw");
 						return { consume: true };
@@ -511,6 +1159,13 @@ export default function (pi: ExtensionAPI) {
 					return undefined;
 				}
+				// Toggle: stop if recording
+				if (voiceState === "recording") {
+					isHolding = false;
+					stopVoiceRecording("btw");
+					return { consume: true };
+				}
 				if (voiceState === "idle" && !isHolding) {
 					isHolding = true;
 					startVoiceRecording("btw").then((ok) => {
@@ -523,12 +1178,6 @@ export default function (pi: ExtensionAPI) {
 				return undefined;
 			}
-			// Any other key while holding = cancel
-			if (isHolding && voiceState === "recording") {
-				// Don't cancel on modifier-only events
-				return undefined;
-			}
 			return undefined;
 		});
 	}
@@ -536,7 +1185,6 @@ export default function (pi: ExtensionAPI) {
 	// ─── BTW: Side Conversations ─────────────────────────────────────────────
 	function buildBtwContext(): string {
-		// Build context from main session + btw thread
 		const systemPrompt = ctx?.getSystemPrompt() ?? "";
 		let btwContext = "You are a helpful side-channel assistant. ";
 		btwContext += "The user is having a parallel conversation while their main Pi agent works. ";
@@ -570,7 +1218,6 @@ export default function (pi: ExtensionAPI) {
 			"",
 		];
-		// Show last exchange
 		lines.push(`  Q: ${last.question.slice(0, 100)}${last.question.length > 100 ? "..." : ""}`);
 		const answerLines = last.answer.split("\n");
 		for (const line of answerLines.slice(0, 8)) {
@@ -589,7 +1236,6 @@ export default function (pi: ExtensionAPI) {
 		btwWidgetVisible = true;
-		// Show thinking state
 		ctx.ui.setWidget("btw", [
 			" BTW",
 			"",
@@ -598,10 +1244,8 @@ export default function (pi: ExtensionAPI) {
 			"  Thinking...",
 		], { placement: "aboveEditor" });
-		// Build context for LLM
 		const btwContext = buildBtwContext();
-		// Use the model registry to get current model
 		const model = ctx.model;
 		if (!model) {
 			const exchange: BtwExchange = {
@@ -616,7 +1260,6 @@ export default function (pi: ExtensionAPI) {
 		}
 		try {
-			// Stream the response
 			let answer = "";
 			const eventStream = streamSimple(model, {
 				systemPrompt: btwContext,
@@ -633,7 +1276,6 @@ export default function (pi: ExtensionAPI) {
 					break;
 				}
-				// Update widget with streaming response
 				const displayLines: string[] = [
 					` BTW`,
 					"",
@@ -657,7 +1299,6 @@ export default function (pi: ExtensionAPI) {
 			pi.appendEntry("btw", exchange);
 			updateBtwWidget();
 		} catch (err: any) {
-			// Fallback: send as a follow-up message to the main agent
 			const exchange: BtwExchange = {
 				question: message,
 				answer: `(BTW streaming failed: ${err.message}. Falling back to sendUserMessage.)`,
@@ -667,7 +1308,6 @@ export default function (pi: ExtensionAPI) {
 			pi.appendEntry("btw", exchange);
 			updateBtwWidget();
-			// Use sendUserMessage as alternative
 			pi.sendUserMessage(
 				`[BTW question]: ${message}`,
 				{ deliverAs: "followUp" },
@@ -677,7 +1317,6 @@ export default function (pi: ExtensionAPI) {
 	// ─── Shortcuts ───────────────────────────────────────────────────────────
-	// Ctrl+Shift+V = toggle voice (fallback for non-Kitty terminals)
 	pi.registerShortcut("ctrl+shift+v", {
 		description: "Toggle voice recording (start/stop)",
 		handler: async (handlerCtx) => {
@@ -705,12 +1344,42 @@ export default function (pi: ExtensionAPI) {
 		configSource = loaded.source;
 		updateSocketPath(config, currentCwd);
-		// No auto-popup on startup. Users run `/voice setup` to configure.
-		// Only activate voice features if setup has been completed previously.
+		// Auto-capture DEEPGRAM_API_KEY from env into config if not already stored.
+		// This ensures streaming works even when Pi is launched from a context
+		// that doesn't source .zshrc (GUI app, tmux, etc.)
+		if (process.env.DEEPGRAM_API_KEY && !config.deepgramApiKey) {
+			config.deepgramApiKey = process.env.DEEPGRAM_API_KEY;
+			if (configSource !== "default") {
+				saveConfig(config, config.scope, currentCwd);
+			}
+		}
+		// Also try to load DEEPGRAM_API_KEY from shell if not in process.env and not in config
+		if (!resolveDeepgramApiKey(config) && config.backend === "deepgram") {
+			try {
+				const result = spawnSync("zsh", ["-ic", "echo $DEEPGRAM_API_KEY"], {
+					stdio: ["pipe", "pipe", "pipe"],
+					timeout: 3000,
+					env: { ...process.env, HOME: os.homedir() },
+				});
+				const shellKey = result.stdout?.toString().trim();
+				if (shellKey && shellKey.length > 5) {
+					config.deepgramApiKey = shellKey;
+					process.env.DEEPGRAM_API_KEY = shellKey; // Also set for child processes
+					if (configSource !== "default") {
+						saveConfig(config, config.scope, currentCwd);
+					}
+				}
+			} catch {}
+		}
 		if (config.enabled && config.onboarding.completed) {
 			updateVoiceStatus();
 			setupHoldToTalk();
-			ensureDaemon(config).catch(() => {});
+			// Only start daemon for non-streaming backends
+			if (!isDeepgramStreaming(config)) {
+				ensureDaemon(config).catch(() => {});
+			}
 		}
 	});
@@ -764,8 +1433,11 @@ export default function (pi: ExtensionAPI) {
 				config.enabled = true;
 				updateVoiceStatus();
 				setupHoldToTalk();
-				ensureDaemon(config).catch(() => {});
-				cmdCtx.ui.notify("Voice enabled.\n  Hold SPACE (empty editor) → release to transcribe\n  Ctrl+Shift+V → toggle recording on/off\n  Auto-stops after 30s", "info");
+				if (!isDeepgramStreaming(config)) {
+					ensureDaemon(config).catch(() => {});
+				}
+				const mode = isDeepgramStreaming(config) ? "Deepgram streaming" : config.backend;
+				cmdCtx.ui.notify(`Voice enabled (${mode}).\n  Hold SPACE (empty editor) → release to transcribe\n  Ctrl+Shift+V → toggle recording on/off\n  Live transcription shown while speaking`, "info");
 				return;
 			}
@@ -779,7 +1451,6 @@ export default function (pi: ExtensionAPI) {
 			}
 			if (sub === "stop") {
-				// Emergency stop — cancel any active recording
 				if (voiceState === "recording") {
 					isHolding = false;
 					await stopVoiceRecording("editor");
@@ -793,6 +1464,8 @@ export default function (pi: ExtensionAPI) {
 			if (sub === "test") {
 				cmdCtx.ui.notify("Testing voice setup...", "info");
 				const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
+				const dgKey = resolveDeepgramApiKey(config);
+				const streaming = isDeepgramStreaming(config);
 				const daemonUp = await isDaemonRunning();
 				const provisioningPlan = buildProvisioningPlan(config, diagnostics);
 				const selectedBackend = diagnostics.backends.find((backend) => backend.name === config.backend);
@@ -805,6 +1478,8 @@ export default function (pi: ExtensionAPI) {
 					`  model: ${config.model}`,
 					`  model status: ${modelReadiness}`,
 					`  language: ${config.language}`,
+					`  streaming: ${streaming ? "YES (Deepgram WS)" : "NO (batch)"}`,
+					`  DEEPGRAM_API_KEY: ${dgKey ? "set (" + dgKey.slice(0, 8) + "...)" : "NOT SET"}`,
 					`  onboarding: ${config.onboarding.completed ? "complete" : "incomplete"}`,
 					`  python3: ${diagnostics.hasPython ? "OK" : "missing"}`,
 					`  sox/rec: ${diagnostics.hasSox ? "OK" : "missing"}`,
@@ -826,11 +1501,10 @@ export default function (pi: ExtensionAPI) {
 					}
 				}
-				lines.push("", "Suggested commands:");
-				lines.push(...(provisioningPlan.commands.length > 0 ? provisioningPlan.commands.map((command) => `  - ${command}`) : ["  - none"]));
-				if (provisioningPlan.manualSteps.length > 0) {
-					lines.push("", "Manual steps:");
-					lines.push(...provisioningPlan.manualSteps.map((step) => `  - ${step}`));
+				if (!dgKey && config.backend === "deepgram") {
+					lines.push("");
+					lines.push("⚠️  DEEPGRAM_API_KEY not set! Add to ~/.zshrc or ~/.env.secrets");
+					lines.push("   export DEEPGRAM_API_KEY=your_key_here");
 				}
 				cmdCtx.ui.notify(lines.join("\n"), provisioningPlan.ready ? "info" : "warning");
@@ -847,22 +1521,24 @@ export default function (pi: ExtensionAPI) {
 				const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
 				const selectedBackend = diagnostics.backends.find((backend) => backend.name === config.backend);
 				const modelReadiness = getModelReadiness(selectedBackend, config.model);
+				const streaming = isDeepgramStreaming(config);
 				cmdCtx.ui.notify([
 					`Voice config:`,
-					`  enabled:  ${config.enabled}`,
-					`  mode:     ${config.mode}`,
-					`  scope:    ${config.scope}`,
-					`  backend:  ${config.backend}`,
-					`  model:    ${config.model}`,
-					`  model status: ${modelReadiness}`,
-					`  language: ${config.language}`,
-					`  state:    ${voiceState}`,
-					`  setup:    ${config.onboarding.completed ? `complete (${config.onboarding.source ?? "unknown"})` : "incomplete"}`,
-					`  socket:   ${activeSocketPath}`,
-					`  daemon:   ${daemonUp ? "running" : "stopped"}${daemonInfo}`,
-					`  hold-key: SPACE (editor empty) or Ctrl+Shift+V (toggle)`,
-					`  btw-key:  Ctrl+Shift+B (hold to record → auto-btw)`,
+					`  enabled:    ${config.enabled}`,
+					`  mode:       ${config.mode}`,
+					`  scope:      ${config.scope}`,
+					`  backend:    ${config.backend}`,
+					`  model:      ${config.model}`,
+					`  model stat: ${modelReadiness}`,
+					`  language:   ${config.language}`,
+					`  streaming:  ${streaming ? "YES (Deepgram WebSocket)" : "NO (batch)"}`,
+					`  state:      ${voiceState}`,
+					`  setup:      ${config.onboarding.completed ? `complete (${config.onboarding.source ?? "unknown"})` : "incomplete"}`,
+					`  socket:     ${activeSocketPath}`,
+					`  daemon:     ${daemonUp ? "running" : "stopped"}${daemonInfo}`,
+					`  hold-key:   SPACE (editor empty) or Ctrl+Shift+V (toggle)`,
+					`  btw-key:    Ctrl+Shift+B (hold to record → auto-btw)`,
 				].join("\n"), "info");
 				return;
 			}
@@ -905,7 +1581,6 @@ export default function (pi: ExtensionAPI) {
 					cmdCtx.ui.notify("Voice setup cancelled.", "warning");
 					return;
 				}
 				await finalizeAndSaveSetup(cmdCtx, result.config, result.selectedScope, result.summaryLines, "setup-command");
 				return;
 			}
@@ -1013,7 +1688,7 @@ export default function (pi: ExtensionAPI) {
 		},
 	});
-	// ─── Dedicated setup command (discoverable in /command list) ──────────────
+	// ─── Dedicated setup command ─────────────────────────────────────────────
 	pi.registerCommand("voice-setup", {
 		description: "Configure voice input — select backend, model, and language",
@@ -1081,7 +1756,6 @@ export default function (pi: ExtensionAPI) {
 			pi.sendUserMessage(content, { deliverAs: "followUp" });
-			// Clear after injection
 			btwThread = [];
 			btwWidgetVisible = false;
 			cmdCtx.ui.setWidget("btw", undefined);
@@ -1106,7 +1780,6 @@ export default function (pi: ExtensionAPI) {
 				threadText += `Q: ${ex.question}\nA: ${ex.answer}\n\n`;
 			}
-			// Ask the model to summarize
 			const model = ctx.model;
 			if (!model) {
 				cmdCtx.ui.notify("No model available for summarization.", "error");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@codexstar/pi-listen",
-  "version": "1.0.12",
+  "version": "1.0.14",
   "description": "Voice input, first-run onboarding, and side-channel BTW conversations for Pi",
   "type": "module",
   "keywords": [