npm - @codexstar/pi-listen - Versions diffs - 1.0.13 → 1.0.14 - Mend

@codexstar/pi-listen 1.0.13 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/extensions/voice.ts +318 -70
package/package.json +1 -1

package/extensions/voice.ts CHANGED Viewed

@@ -566,6 +566,8 @@ export default function (pi: ExtensionAPI) {
 	function voiceCleanup() {
 		if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
+		clearHoldTimer();
+		stopRecordingWidgetAnimation();
 		if (activeSession) {
 			finalizeSession(activeSession);
 			activeSession = null;
@@ -573,6 +575,8 @@ export default function (pi: ExtensionAPI) {
 		if (legacyRecProcess) { legacyRecProcess.kill("SIGTERM"); legacyRecProcess = null; }
 		if (tempFile) { try { fs.unlinkSync(tempFile); } catch {} tempFile = null; }
 		isHolding = false;
+		spaceConsumed = false;
+		spaceDownTime = null;
 		setVoiceState("idle");
 	}
@@ -604,46 +608,211 @@ export default function (pi: ExtensionAPI) {
 		].join("\n"), validated ? "info" : "warning");
 	}
-	// ─── Live Transcript Widget ──────────────────────────────────────────────
+	// ─── Live Transcript Widget (Component-based, themed) ───────────────────
+	/** Subtle hint shown during the hold threshold wait */
+	function showHoldHintWidget() {
+		if (!ctx?.hasUI) return;
+		ctx.ui.setWidget("voice-recording", (tui, theme) => {
+			return {
+				invalidate() {},
+				render(width: number): string[] {
+					const bar = theme.fg("muted", "─".repeat(Math.min(width - 2, 60)));
+					return [
+						bar,
+						theme.fg("dim", "  Hold " + theme.bold("SPACE") + " for voice input..."),
+						bar,
+					];
+				},
+			};
+		}, { placement: "aboveEditor" });
+	}
+	function hideHoldHintWidget() {
+		if (!ctx?.hasUI) return;
+		ctx.ui.setWidget("voice-recording", undefined);
+	}
+	/** Animated recording indicator with live waveform */
+	function showRecordingWidget(target: "editor" | "btw") {
+		if (!ctx?.hasUI) return;
+		let frame = 0;
+		const waveChars = ["▁", "▂", "▃", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
+		// Animate the widget every 200ms
+		const animTimer = setInterval(() => {
+			frame++;
+			if (ctx?.hasUI) ctx.ui.setWidget("voice-recording", undefined); // force re-render
+			showRecordingWidgetFrame(target, frame, waveChars);
+		}, 200);
+		// Store the timer so we can clean it up
+		(showRecordingWidget as any)._animTimer = animTimer;
+		showRecordingWidgetFrame(target, frame, waveChars);
+	}
+	function showRecordingWidgetFrame(target: "editor" | "btw", frame: number, waveChars: string[]) {
+		if (!ctx?.hasUI) return;
+		ctx.ui.setWidget("voice-recording", (tui, theme) => {
+			return {
+				invalidate() {},
+				render(width: number): string[] {
+					const maxW = Math.min(width - 2, 72);
+					const elapsed = Math.round((Date.now() - recordingStart) / 1000);
+					const mins = Math.floor(elapsed / 60);
+					const secs = elapsed % 60;
+					const timeStr = mins > 0 ? `${mins}:${String(secs).padStart(2, "0")}` : `${secs}s`;
+					// Animated waveform
+					const waveLen = 12;
+					let wave = "";
+					for (let i = 0; i < waveLen; i++) {
+						wave += waveChars[(frame + i) % waveChars.length];
+					}
+					const topBorder = theme.fg("borderAccent", "╭" + "─".repeat(maxW) + "╮");
+					const botBorder = theme.fg("borderAccent", "╰" + "─".repeat(maxW) + "╯");
+					const pad = (s: string, w: number) => {
+						const visible = s.replace(/\x1b\[[^m]*m/g, "").length;
+						return s + " ".repeat(Math.max(0, w - visible));
+					};
+					const dot = theme.fg("error", "●");
+					const label = target === "btw"
+						? theme.bold(theme.fg("accent", " BTW "))
+						: theme.bold(theme.fg("accent", " VOICE "));
+					const waveStyled = theme.fg("accent", wave);
+					const timeStyled = theme.fg("muted", timeStr);
+					const titleLine = `  ${dot} ${label} ${waveStyled}  ${timeStyled}`;
+					const hint = target === "btw"
+						? theme.fg("dim", "  Press Ctrl+Shift+B to stop")
+						: kittyReleaseDetected
+							? theme.fg("dim", "  Release SPACE to finalize")
+							: theme.fg("dim", "  Press SPACE again to stop");
+					const lines = [
+						topBorder,
+						theme.fg("borderAccent", "│") + pad(titleLine, maxW) + theme.fg("borderAccent", "│"),
+						theme.fg("borderAccent", "│") + pad(hint, maxW) + theme.fg("borderAccent", "│"),
+						botBorder,
+					];
+					return lines;
+				},
+			};
+		}, { placement: "aboveEditor" });
+	}
+	function stopRecordingWidgetAnimation() {
+		const timer = (showRecordingWidget as any)?._animTimer;
+		if (timer) {
+			clearInterval(timer);
+			(showRecordingWidget as any)._animTimer = null;
+		}
+	}
+	/** Show live transcript inside a themed box */
 	function updateLiveTranscriptWidget(interim: string, finals: string[]) {
 		if (!ctx?.hasUI) return;
 		const finalized = finals.join(" ");
 		const displayText = finalized + (interim ? (finalized ? " " : "") + interim : "");
-		if (!displayText.trim()) {
-			ctx.ui.setWidget("voice-recording", [
-				" 🎙 Listening... (speak now)",
-			], { placement: "aboveEditor" });
-			return;
-		}
+		ctx.ui.setWidget("voice-recording", (tui, theme) => {
+			return {
+				invalidate() {},
+				render(width: number): string[] {
+					const maxW = Math.min(width - 2, 72);
+					const elapsed = Math.round((Date.now() - recordingStart) / 1000);
+					const mins = Math.floor(elapsed / 60);
+					const secs = elapsed % 60;
+					const timeStr = mins > 0 ? `${mins}:${String(secs).padStart(2, "0")}` : `${secs}s`;
+					const topBorder = theme.fg("borderAccent", "╭" + "─".repeat(maxW) + "╮");
+					const botBorder = theme.fg("borderAccent", "╰" + "─".repeat(maxW) + "╯");
+					const sep = theme.fg("borderAccent", "│") + theme.fg("borderAccent", "─".repeat(maxW)) + theme.fg("borderAccent", "│");
+					const side = (content: string) => {
+						const stripped = content.replace(/\x1b\[[^m]*m/g, "");
+						const padding = Math.max(0, maxW - stripped.length);
+						return theme.fg("borderAccent", "│") + content + " ".repeat(padding) + theme.fg("borderAccent", "│");
+					};
+					const dot = theme.fg("error", "●");
+					const label = theme.bold(theme.fg("accent", " VOICE "));
+					const timeStyled = theme.fg("muted", timeStr);
+					const titleLine = `  ${dot} ${label}  ${timeStyled}`;
+					const hint = kittyReleaseDetected
+						? theme.fg("dim", "  Release SPACE to finalize")
+						: theme.fg("dim", "  Press SPACE again to stop");
+					const lines = [topBorder, side(titleLine)];
+					if (!displayText.trim()) {
+						lines.push(side(theme.fg("dim", "  Listening... speak now")));
+					} else {
+						lines.push(sep);
+						// Word-wrap the transcript text
+						const innerMax = maxW - 4; // padding inside box
+						const words = displayText.split(" ");
+						const wrappedLines: string[] = [];
+						let currentLine = "";
+						for (const word of words) {
+							if ((currentLine + " " + word).trim().length > innerMax && currentLine) {
+								wrappedLines.push(currentLine);
+								currentLine = word;
+							} else {
+								currentLine = currentLine ? currentLine + " " + word : word;
+							}
+						}
+						if (currentLine) wrappedLines.push(currentLine);
+						// Show last 3 lines of transcript
+						const visible = wrappedLines.slice(-3);
+						for (let i = 0; i < visible.length; i++) {
+							let line = visible[i];
+							// Style: finalized parts in normal text, interim in accent
+							if (i === visible.length - 1 && interim) {
+								line = theme.fg("text", line) + theme.fg("accent", "▍");
+							} else {
+								line = theme.fg("text", line);
+							}
+							lines.push(side("  " + line));
+						}
+					}
-		// Show the live transcript — last 3 lines max
-		const words = displayText.split(" ");
-		const lines: string[] = [];
-		let currentLine = " 🎙 ";
-		const maxLineLen = 70;
-		for (const word of words) {
-			if ((currentLine + word).length > maxLineLen) {
-				lines.push(currentLine);
-				currentLine = "    " + word + " ";
-			} else {
-				currentLine += word + " ";
-			}
-		}
-		if (currentLine.trim()) lines.push(currentLine);
-		// Keep only last 4 lines to avoid widget overflow
-		const visibleLines = lines.slice(-4);
-		if (interim) {
-			// Show a blinking cursor for interim text
-			const lastIdx = visibleLines.length - 1;
-			visibleLines[lastIdx] = visibleLines[lastIdx].trimEnd() + "▍";
-		}
+					lines.push(side(hint));
+					lines.push(botBorder);
+					return lines;
+				},
+			};
+		}, { placement: "aboveEditor" });
+	}
-		ctx.ui.setWidget("voice-recording", visibleLines, { placement: "aboveEditor" });
+	/** Transcribing state — show a processing indicator */
+	function showTranscribingWidget() {
+		if (!ctx?.hasUI) return;
+		ctx.ui.setWidget("voice-recording", (tui, theme) => {
+			return {
+				invalidate() {},
+				render(width: number): string[] {
+					const maxW = Math.min(width - 2, 72);
+					const topBorder = theme.fg("border", "╭" + "─".repeat(maxW) + "╮");
+					const botBorder = theme.fg("border", "╰" + "─".repeat(maxW) + "╯");
+					const side = (content: string) => {
+						const stripped = content.replace(/\x1b\[[^m]*m/g, "");
+						const padding = Math.max(0, maxW - stripped.length);
+						return theme.fg("border", "│") + content + " ".repeat(padding) + theme.fg("border", "│");
+					};
+					const spinner = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
+					const idx = Math.floor(Date.now() / 100) % spinner.length;
+					const line = `  ${theme.fg("accent", spinner[idx])} ${theme.fg("dim", "Finalizing transcription...")}`;
+					return [topBorder, side(line), botBorder];
+				},
+			};
+		}, { placement: "aboveEditor" });
 	}
 	// ─── Voice: Start / Stop (Streaming or Legacy) ───────────────────────────
@@ -665,6 +834,7 @@ export default function (pi: ExtensionAPI) {
 				},
 				onDone: (fullText) => {
 					activeSession = null;
+					stopRecordingWidgetAnimation();
 					ctx?.ui.setWidget("voice-recording", undefined);
 					if (!fullText.trim()) {
@@ -690,6 +860,7 @@ export default function (pi: ExtensionAPI) {
 				},
 				onError: (err) => {
 					activeSession = null;
+					stopRecordingWidgetAnimation();
 					ctx?.ui.setWidget("voice-recording", undefined);
 					ctx?.ui.notify(`STT error: ${err}`, "error");
 					setVoiceState("idle");
@@ -715,11 +886,8 @@ export default function (pi: ExtensionAPI) {
 				}
 			}, 1000);
-			if (ctx.hasUI) {
-				ctx.ui.setWidget("voice-recording", [
-					" 🎙 Listening... speak now — press SPACE again to stop",
-				], { placement: "aboveEditor" });
-			}
+			// Show the themed recording widget
+			showRecordingWidget(target);
 			return true;
 		} else {
@@ -743,11 +911,8 @@ export default function (pi: ExtensionAPI) {
 			}, 1000);
 			if (ctx.hasUI) {
-				ctx.ui.setWidget("voice-recording", [
-					target === "btw"
-						? " 🎙 BTW Recording... Ctrl+Shift+V to stop"
-						: " 🎙 Recording... Ctrl+Shift+V to stop (or release SPACE)",
-				], { placement: "aboveEditor" });
+				// Show themed recording widget for legacy path
+				showRecordingWidget(target);
 			}
 			return true;
 		}
@@ -760,6 +925,8 @@ export default function (pi: ExtensionAPI) {
 		if (activeSession) {
 			// === STREAMING PATH === Stop the stream, finalize will call onDone
 			setVoiceState("transcribing");
+			stopRecordingWidgetAnimation();
+			showTranscribingWidget();
 			stopStreamingSession(activeSession);
 			return;
 		}
@@ -768,7 +935,8 @@ export default function (pi: ExtensionAPI) {
 		const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
 		const audioFile = tempFile;
 		setVoiceState("transcribing");
-		ctx.ui.setWidget("voice-recording", undefined);
+		stopRecordingWidgetAnimation();
+		showTranscribingWidget();
 		await stopLegacyRecording();
@@ -822,20 +990,35 @@ export default function (pi: ExtensionAPI) {
 		setVoiceState("idle");
 	}
-	// ─── Hold-to-talk / Toggle-to-talk ──────────────────────────────────────
+	// ─── Hold-to-talk with Duration Threshold ──────────────────────────────
 	//
-	// Kitty protocol terminals (Ghostty, WezTerm, Kitty) send key-release
-	// events (":3u" sequences), enabling true hold-to-talk.
+	// SPACE activates voice ONLY when:
+	//   1. The editor is empty (no text typed yet)
+	//   2. SPACE is held for ≥ HOLD_THRESHOLD_MS (500ms)
 	//
-	// Non-Kitty terminals (Apple Terminal, iTerm2 without config, basic xterm)
-	// only send key-press. We detect this and fall back to toggle:
-	//   1st SPACE press → start recording
-	//   2nd SPACE press → stop recording + transcribe
+	// If SPACE is released before the threshold, a regular space character
+	// is typed into the editor (normal typing behavior).
 	//
-	// We auto-detect Kitty support: if we see a key-release within the first
-	// recording, we know hold-to-talk works. Otherwise, we stay in toggle mode.
-	let kittyReleaseDetected = false; // have we ever seen a Kitty release event?
+	// This prevents accidental voice activation when typing and matches
+	// Claude Code's hold-to-talk UX pattern.
+	//
+	// For Kitty protocol terminals: hold → wait threshold → activate →
+	//   release → stop recording. True hold-to-talk.
+	// For non-Kitty terminals: hold → wait threshold → activate →
+	//   press SPACE again → stop recording. Toggle after activation.
+	const HOLD_THRESHOLD_MS = 500; // minimum hold time before voice activates
+	let kittyReleaseDetected = false;
+	let spaceDownTime: number | null = null; // timestamp when SPACE was first pressed
+	let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
+	let spaceConsumed = false; // whether we've committed to voice (past threshold)
+	function clearHoldTimer() {
+		if (holdActivationTimer) {
+			clearTimeout(holdActivationTimer);
+			holdActivationTimer = null;
+		}
+	}
 	function setupHoldToTalk() {
 		if (!ctx?.hasUI) return;
@@ -847,54 +1030,119 @@ export default function (pi: ExtensionAPI) {
 			// ── SPACE handling ──
 			if (matchesKey(data, "space")) {
+				// RULE: If editor has content, SPACE always types a space — never voice
 				const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
-				if (editorText && editorText.trim().length > 0) return undefined;
+				if (editorText && editorText.trim().length > 0) {
+					clearHoldTimer();
+					spaceDownTime = null;
+					spaceConsumed = false;
+					return undefined; // let the default space character through
+				}
-				// Kitty key-release: stop recording
+				// ── Kitty key-release ──
 				if (isKeyRelease(data)) {
 					kittyReleaseDetected = true;
-					if (isHolding && voiceState === "recording") {
+					// Released before threshold → type a space character
+					if (spaceDownTime && !spaceConsumed) {
+						clearHoldTimer();
+						spaceDownTime = null;
+						spaceConsumed = false;
+						// Insert a space into editor
+						if (ctx?.hasUI) ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
+						return { consume: true };
+					}
+					// Released after threshold → stop recording (true hold-to-talk)
+					if (spaceConsumed && isHolding && voiceState === "recording") {
 						isHolding = false;
+						spaceConsumed = false;
+						spaceDownTime = null;
 						stopVoiceRecording("editor");
 						return { consume: true };
 					}
+					spaceDownTime = null;
+					spaceConsumed = false;
 					return undefined;
 				}
-				// Kitty key-repeat: suppress while holding
+				// ── Kitty key-repeat: suppress while holding past threshold ──
 				if (isKeyRepeat(data)) {
-					if (isHolding) return { consume: true };
+					if (spaceConsumed || isHolding) return { consume: true };
 					return undefined;
 				}
 				// === Key PRESS ===
-				// Currently recording? → this is the "stop" press (toggle mode)
-				if (voiceState === "recording") {
+				// If already recording (toggle mode for non-Kitty) → stop
+				if (voiceState === "recording" && spaceConsumed) {
 					isHolding = false;
+					spaceConsumed = false;
+					spaceDownTime = null;
+					clearHoldTimer();
 					stopVoiceRecording("editor");
 					return { consume: true };
 				}
-				// Currently transcribing? → ignore, wait for it to finish
+				// If transcribing → ignore
 				if (voiceState === "transcribing") {
 					return { consume: true };
 				}
-				// Idle → start recording
-				if (voiceState === "idle" && !isHolding) {
-					isHolding = true;
-					startVoiceRecording("editor").then((ok) => {
-						if (!ok) isHolding = false;
-					});
-					return { consume: true };
+				// Idle → start the hold timer
+				if (voiceState === "idle" && !spaceDownTime) {
+					spaceDownTime = Date.now();
+					spaceConsumed = false;
+					// Show a subtle "preparing" indicator
+					if (ctx?.hasUI) {
+						showHoldHintWidget();
+					}
+					// After threshold: activate voice recording
+					holdActivationTimer = setTimeout(() => {
+						holdActivationTimer = null;
+						// Double-check: still idle, still holding, editor still empty
+						const currentText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
+						if (voiceState === "idle" && spaceDownTime && !(currentText && currentText.trim().length > 0)) {
+							spaceConsumed = true;
+							isHolding = true;
+							startVoiceRecording("editor").then((ok) => {
+								if (!ok) {
+									isHolding = false;
+									spaceConsumed = false;
+									spaceDownTime = null;
+								}
+							});
+						} else {
+							spaceDownTime = null;
+							spaceConsumed = false;
+						}
+					}, HOLD_THRESHOLD_MS);
+					return { consume: true }; // consume now — we'll insert space on early release
 				}
-				if (isHolding) return { consume: true };
+				if (isHolding || spaceConsumed) return { consume: true };
+				return undefined;
+			}
+			// ── Any other key while holding space (pre-threshold) → cancel hold, insert space ──
+			if (spaceDownTime && !spaceConsumed && !matchesKey(data, "space")) {
+				clearHoldTimer();
+				// Insert the space that was consumed during hold detection
+				if (ctx?.hasUI) {
+					ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
+					hideHoldHintWidget();
+				}
+				spaceDownTime = null;
+				spaceConsumed = false;
+				// Don't consume this key — let it through
 				return undefined;
 			}
-			// ── Ctrl+Shift+B handling (BTW voice) ──
+			// ── Ctrl+Shift+B handling (BTW voice) — direct toggle, no hold threshold ──
 			if (matchesKey(data, "ctrl+shift+b")) {
 				if (isKeyRelease(data)) {
 					kittyReleaseDetected = true;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@codexstar/pi-listen",
-  "version": "1.0.13",
+  "version": "1.0.14",
   "description": "Voice input, first-run onboarding, and side-channel BTW conversations for Pi",
   "type": "module",
   "keywords": [