npm - @codexstar/pi-listen - Versions diffs - 1.0.13 → 1.0.15 - Mend

@codexstar/pi-listen 1.0.13 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/extensions/voice.ts +360 -74
package/package.json +1 -1

package/extensions/voice.ts CHANGED Viewed

@@ -566,6 +566,8 @@ export default function (pi: ExtensionAPI) {
 	function voiceCleanup() {
 		if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
+		clearHoldTimer();
+		stopRecordingWidgetAnimation();
 		if (activeSession) {
 			finalizeSession(activeSession);
 			activeSession = null;
@@ -573,6 +575,8 @@ export default function (pi: ExtensionAPI) {
 		if (legacyRecProcess) { legacyRecProcess.kill("SIGTERM"); legacyRecProcess = null; }
 		if (tempFile) { try { fs.unlinkSync(tempFile); } catch {} tempFile = null; }
 		isHolding = false;
+		spaceConsumed = false;
+		spaceDownTime = null;
 		setVoiceState("idle");
 	}
@@ -604,46 +608,223 @@ export default function (pi: ExtensionAPI) {
 		].join("\n"), validated ? "info" : "warning");
 	}
-	// ─── Live Transcript Widget ──────────────────────────────────────────────
+	// ─── Live Transcript Widget (Component-based, themed) ───────────────────
+	/** Subtle hint shown during the hold threshold wait */
+	function showHoldHintWidget() {
+		if (!ctx?.hasUI) return;
+		ctx.ui.setWidget("voice-recording", (tui, theme) => {
+			return {
+				invalidate() {},
+				render(width: number): string[] {
+					const bar = theme.fg("muted", "─".repeat(Math.min(width - 2, 60)));
+					return [
+						bar,
+						theme.fg("dim", "  Hold " + theme.bold("SPACE") + " for voice input..."),
+						bar,
+					];
+				},
+			};
+		}, { placement: "aboveEditor" });
+	}
+	function hideHoldHintWidget() {
+		if (!ctx?.hasUI) return;
+		ctx.ui.setWidget("voice-recording", undefined);
+	}
+	/** Animated recording indicator with live waveform */
+	function showRecordingWidget(target: "editor" | "btw") {
+		if (!ctx?.hasUI) return;
+		// Store initial state — once live transcription arrives,
+		// updateLiveTranscriptWidget takes over and we stop the animation.
+		(showRecordingWidget as any)._target = target;
+		(showRecordingWidget as any)._frame = 0;
+		(showRecordingWidget as any)._hasTranscript = false;
+		// Animate the widget every 300ms (only while no transcript is showing)
+		const animTimer = setInterval(() => {
+			// Stop animating once live transcript takes over
+			if ((showRecordingWidget as any)?._hasTranscript) return;
+			(showRecordingWidget as any)._frame = ((showRecordingWidget as any)._frame || 0) + 1;
+			showRecordingWidgetFrame(target, (showRecordingWidget as any)._frame);
+		}, 300);
+		// Store the timer so we can clean it up
+		(showRecordingWidget as any)._animTimer = animTimer;
+		showRecordingWidgetFrame(target, 0);
+	}
+	const waveChars = ["▁", "▂", "▃", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
+	function showRecordingWidgetFrame(target: "editor" | "btw", frame: number) {
+		if (!ctx?.hasUI) return;
+		ctx.ui.setWidget("voice-recording", (tui, theme) => {
+			return {
+				invalidate() {},
+				render(width: number): string[] {
+					const maxW = Math.min(width - 2, 72);
+					const elapsed = Math.round((Date.now() - recordingStart) / 1000);
+					const mins = Math.floor(elapsed / 60);
+					const secs = elapsed % 60;
+					const timeStr = mins > 0 ? `${mins}:${String(secs).padStart(2, "0")}` : `${secs}s`;
+					// Animated waveform
+					const waveLen = 12;
+					let wave = "";
+					for (let i = 0; i < waveLen; i++) {
+						wave += waveChars[(frame + i) % waveChars.length];
+					}
+					const topBorder = theme.fg("borderAccent", "╭" + "─".repeat(maxW) + "╮");
+					const botBorder = theme.fg("borderAccent", "╰" + "─".repeat(maxW) + "╯");
+					const pad = (s: string, w: number) => {
+						const visible = s.replace(/\x1b\[[^m]*m/g, "").length;
+						return s + " ".repeat(Math.max(0, w - visible));
+					};
+					const dot = theme.fg("error", "●");
+					const label = target === "btw"
+						? theme.bold(theme.fg("accent", " BTW "))
+						: theme.bold(theme.fg("accent", " VOICE "));
+					const waveStyled = theme.fg("accent", wave);
+					const timeStyled = theme.fg("muted", timeStr);
+					const titleLine = `  ${dot} ${label} ${waveStyled}  ${timeStyled}`;
+					const hint = target === "btw"
+						? theme.fg("dim", "  Press Ctrl+Shift+B to stop")
+						: kittyReleaseDetected
+							? theme.fg("dim", "  Release SPACE to finalize")
+							: theme.fg("dim", "  Press Ctrl+Shift+V to stop");
+					const lines = [
+						topBorder,
+						theme.fg("borderAccent", "│") + pad(titleLine, maxW) + theme.fg("borderAccent", "│"),
+						theme.fg("borderAccent", "│") + pad(hint, maxW) + theme.fg("borderAccent", "│"),
+						botBorder,
+					];
+					return lines;
+				},
+			};
+		}, { placement: "aboveEditor" });
+	}
+	function stopRecordingWidgetAnimation() {
+		const timer = (showRecordingWidget as any)?._animTimer;
+		if (timer) {
+			clearInterval(timer);
+			(showRecordingWidget as any)._animTimer = null;
+		}
+	}
+	/** Show live transcript inside a themed box */
 	function updateLiveTranscriptWidget(interim: string, finals: string[]) {
 		if (!ctx?.hasUI) return;
+		// Stop the recording animation — live transcript takes over
+		(showRecordingWidget as any)._hasTranscript = true;
+		stopRecordingWidgetAnimation();
 		const finalized = finals.join(" ");
 		const displayText = finalized + (interim ? (finalized ? " " : "") + interim : "");
-		if (!displayText.trim()) {
-			ctx.ui.setWidget("voice-recording", [
-				" 🎙 Listening... (speak now)",
-			], { placement: "aboveEditor" });
-			return;
-		}
+		ctx.ui.setWidget("voice-recording", (tui, theme) => {
+			return {
+				invalidate() {},
+				render(width: number): string[] {
+					const maxW = Math.min(width - 2, 72);
+					const elapsed = Math.round((Date.now() - recordingStart) / 1000);
+					const mins = Math.floor(elapsed / 60);
+					const secs = elapsed % 60;
+					const timeStr = mins > 0 ? `${mins}:${String(secs).padStart(2, "0")}` : `${secs}s`;
+					const topBorder = theme.fg("borderAccent", "╭" + "─".repeat(maxW) + "╮");
+					const botBorder = theme.fg("borderAccent", "╰" + "─".repeat(maxW) + "╯");
+					const sep = theme.fg("borderAccent", "│") + theme.fg("borderAccent", "─".repeat(maxW)) + theme.fg("borderAccent", "│");
+					const side = (content: string) => {
+						const stripped = content.replace(/\x1b\[[^m]*m/g, "");
+						const padding = Math.max(0, maxW - stripped.length);
+						return theme.fg("borderAccent", "│") + content + " ".repeat(padding) + theme.fg("borderAccent", "│");
+					};
+					const dot = theme.fg("error", "●");
+					const label = theme.bold(theme.fg("accent", " VOICE "));
+					const timeStyled = theme.fg("muted", timeStr);
+					const titleLine = `  ${dot} ${label}  ${timeStyled}`;
+					const hint = kittyReleaseDetected
+						? theme.fg("dim", "  Release SPACE to finalize")
+						: theme.fg("dim", "  Press Ctrl+Shift+V to stop");
+					const lines = [topBorder, side(titleLine)];
+					if (!displayText.trim()) {
+						lines.push(side(theme.fg("dim", "  Listening... speak now")));
+					} else {
+						lines.push(sep);
+						// Word-wrap the transcript text
+						const innerMax = maxW - 4; // padding inside box
+						const words = displayText.split(" ");
+						const wrappedLines: string[] = [];
+						let currentLine = "";
+						for (const word of words) {
+							if ((currentLine + " " + word).trim().length > innerMax && currentLine) {
+								wrappedLines.push(currentLine);
+								currentLine = word;
+							} else {
+								currentLine = currentLine ? currentLine + " " + word : word;
+							}
+						}
+						if (currentLine) wrappedLines.push(currentLine);
+						// Show last 3 lines of transcript
+						const visible = wrappedLines.slice(-3);
+						for (let i = 0; i < visible.length; i++) {
+							let line = visible[i];
+							// Style: finalized parts in normal text, interim in accent
+							if (i === visible.length - 1 && interim) {
+								line = theme.fg("text", line) + theme.fg("accent", "▍");
+							} else {
+								line = theme.fg("text", line);
+							}
+							lines.push(side("  " + line));
+						}
+					}
-		// Show the live transcript — last 3 lines max
-		const words = displayText.split(" ");
-		const lines: string[] = [];
-		let currentLine = " 🎙 ";
-		const maxLineLen = 70;
-		for (const word of words) {
-			if ((currentLine + word).length > maxLineLen) {
-				lines.push(currentLine);
-				currentLine = "    " + word + " ";
-			} else {
-				currentLine += word + " ";
-			}
-		}
-		if (currentLine.trim()) lines.push(currentLine);
-		// Keep only last 4 lines to avoid widget overflow
-		const visibleLines = lines.slice(-4);
-		if (interim) {
-			// Show a blinking cursor for interim text
-			const lastIdx = visibleLines.length - 1;
-			visibleLines[lastIdx] = visibleLines[lastIdx].trimEnd() + "▍";
-		}
+					lines.push(side(hint));
+					lines.push(botBorder);
+					return lines;
+				},
+			};
+		}, { placement: "aboveEditor" });
+	}
-		ctx.ui.setWidget("voice-recording", visibleLines, { placement: "aboveEditor" });
+	/** Transcribing state — show a processing indicator */
+	function showTranscribingWidget() {
+		if (!ctx?.hasUI) return;
+		ctx.ui.setWidget("voice-recording", (tui, theme) => {
+			return {
+				invalidate() {},
+				render(width: number): string[] {
+					const maxW = Math.min(width - 2, 72);
+					const topBorder = theme.fg("border", "╭" + "─".repeat(maxW) + "╮");
+					const botBorder = theme.fg("border", "╰" + "─".repeat(maxW) + "╯");
+					const side = (content: string) => {
+						const stripped = content.replace(/\x1b\[[^m]*m/g, "");
+						const padding = Math.max(0, maxW - stripped.length);
+						return theme.fg("border", "│") + content + " ".repeat(padding) + theme.fg("border", "│");
+					};
+					const spinner = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
+					const idx = Math.floor(Date.now() / 100) % spinner.length;
+					const line = `  ${theme.fg("accent", spinner[idx])} ${theme.fg("dim", "Finalizing transcription...")}`;
+					return [topBorder, side(line), botBorder];
+				},
+			};
+		}, { placement: "aboveEditor" });
 	}
 	// ─── Voice: Start / Stop (Streaming or Legacy) ───────────────────────────
@@ -665,6 +846,7 @@ export default function (pi: ExtensionAPI) {
 				},
 				onDone: (fullText) => {
 					activeSession = null;
+					stopRecordingWidgetAnimation();
 					ctx?.ui.setWidget("voice-recording", undefined);
 					if (!fullText.trim()) {
@@ -690,6 +872,7 @@ export default function (pi: ExtensionAPI) {
 				},
 				onError: (err) => {
 					activeSession = null;
+					stopRecordingWidgetAnimation();
 					ctx?.ui.setWidget("voice-recording", undefined);
 					ctx?.ui.notify(`STT error: ${err}`, "error");
 					setVoiceState("idle");
@@ -715,11 +898,8 @@ export default function (pi: ExtensionAPI) {
 				}
 			}, 1000);
-			if (ctx.hasUI) {
-				ctx.ui.setWidget("voice-recording", [
-					" 🎙 Listening... speak now — press SPACE again to stop",
-				], { placement: "aboveEditor" });
-			}
+			// Show the themed recording widget
+			showRecordingWidget(target);
 			return true;
 		} else {
@@ -743,11 +923,8 @@ export default function (pi: ExtensionAPI) {
 			}, 1000);
 			if (ctx.hasUI) {
-				ctx.ui.setWidget("voice-recording", [
-					target === "btw"
-						? " 🎙 BTW Recording... Ctrl+Shift+V to stop"
-						: " 🎙 Recording... Ctrl+Shift+V to stop (or release SPACE)",
-				], { placement: "aboveEditor" });
+				// Show themed recording widget for legacy path
+				showRecordingWidget(target);
 			}
 			return true;
 		}
@@ -760,6 +937,8 @@ export default function (pi: ExtensionAPI) {
 		if (activeSession) {
 			// === STREAMING PATH === Stop the stream, finalize will call onDone
 			setVoiceState("transcribing");
+			stopRecordingWidgetAnimation();
+			showTranscribingWidget();
 			stopStreamingSession(activeSession);
 			return;
 		}
@@ -768,7 +947,8 @@ export default function (pi: ExtensionAPI) {
 		const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
 		const audioFile = tempFile;
 		setVoiceState("transcribing");
-		ctx.ui.setWidget("voice-recording", undefined);
+		stopRecordingWidgetAnimation();
+		showTranscribingWidget();
 		await stopLegacyRecording();
@@ -822,20 +1002,41 @@ export default function (pi: ExtensionAPI) {
 		setVoiceState("idle");
 	}
-	// ─── Hold-to-talk / Toggle-to-talk ──────────────────────────────────────
+	// ─── Hold-to-talk with Duration Threshold ──────────────────────────────
 	//
-	// Kitty protocol terminals (Ghostty, WezTerm, Kitty) send key-release
-	// events (":3u" sequences), enabling true hold-to-talk.
+	// SPACE activates voice ONLY when:
+	//   1. The editor is empty (no text typed yet)
+	//   2. SPACE is held for ≥ HOLD_THRESHOLD_MS (500ms)
 	//
-	// Non-Kitty terminals (Apple Terminal, iTerm2 without config, basic xterm)
-	// only send key-press. We detect this and fall back to toggle:
-	//   1st SPACE press → start recording
-	//   2nd SPACE press → stop recording + transcribe
+	// If SPACE is released before the threshold, a regular space character
+	// is typed into the editor (normal typing behavior).
 	//
-	// We auto-detect Kitty support: if we see a key-release within the first
-	// recording, we know hold-to-talk works. Otherwise, we stay in toggle mode.
-	let kittyReleaseDetected = false; // have we ever seen a Kitty release event?
+	// This prevents accidental voice activation when typing and matches
+	// Claude Code's hold-to-talk UX pattern.
+	//
+	// For Kitty protocol terminals: hold → wait threshold → activate →
+	//   release → stop recording. True hold-to-talk.
+	// For non-Kitty terminals: hold → wait threshold → activate →
+	//   Ctrl+Shift+V or /voice stop to end recording.
+	//
+	// KEY INSIGHT: In non-Kitty terminals, holding a key generates
+	// rapid press events (key-repeat). We CANNOT use "second space press
+	// = stop" because repeats arrive while holding. Instead, non-Kitty
+	// users must use Ctrl+Shift+V to stop.
+	const HOLD_THRESHOLD_MS = 500; // minimum hold time before voice activates
+	let kittyReleaseDetected = false;
+	let spaceDownTime: number | null = null; // timestamp when SPACE was first pressed
+	let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
+	let spaceConsumed = false; // whether we've committed to voice (past threshold)
+	let lastSpacePressTime = 0; // debounce rapid space presses from key-repeat
+	function clearHoldTimer() {
+		if (holdActivationTimer) {
+			clearTimeout(holdActivationTimer);
+			holdActivationTimer = null;
+		}
+	}
 	function setupHoldToTalk() {
 		if (!ctx?.hasUI) return;
@@ -847,54 +1048,129 @@ export default function (pi: ExtensionAPI) {
 			// ── SPACE handling ──
 			if (matchesKey(data, "space")) {
+				// RULE: If editor has content, SPACE always types a space — never voice
 				const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
-				if (editorText && editorText.trim().length > 0) return undefined;
+				if (editorText && editorText.trim().length > 0) {
+					clearHoldTimer();
+					spaceDownTime = null;
+					spaceConsumed = false;
+					return undefined; // let the default space character through
+				}
-				// Kitty key-release: stop recording
+				// ── Kitty key-release ──
 				if (isKeyRelease(data)) {
 					kittyReleaseDetected = true;
-					if (isHolding && voiceState === "recording") {
+					// Released before threshold → type a space character
+					if (spaceDownTime && !spaceConsumed) {
+						clearHoldTimer();
+						spaceDownTime = null;
+						spaceConsumed = false;
+						// Insert a space into editor
+						if (ctx?.hasUI) ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
+						return { consume: true };
+					}
+					// Released after threshold → stop recording (true hold-to-talk)
+					if (spaceConsumed && voiceState === "recording") {
 						isHolding = false;
+						spaceConsumed = false;
+						spaceDownTime = null;
 						stopVoiceRecording("editor");
 						return { consume: true };
 					}
+					spaceDownTime = null;
+					spaceConsumed = false;
 					return undefined;
 				}
-				// Kitty key-repeat: suppress while holding
+				// ── Kitty key-repeat: ALWAYS suppress while holding/recording ──
 				if (isKeyRepeat(data)) {
-					if (isHolding) return { consume: true };
+					if (spaceDownTime || spaceConsumed || isHolding || voiceState === "recording") {
+						return { consume: true };
+					}
 					return undefined;
 				}
-				// === Key PRESS ===
+				// === Key PRESS (initial press only) ===
-				// Currently recording? → this is the "stop" press (toggle mode)
-				if (voiceState === "recording") {
-					isHolding = false;
-					stopVoiceRecording("editor");
-					return { consume: true };
+				const now = Date.now();
+				// Debounce: ignore rapid presses within 100ms (terminal key-repeat
+				// generates press events in non-Kitty terminals since there's no
+				// key-repeat flag — they all look like fresh presses)
+				if (now - lastSpacePressTime < 100) {
+					lastSpacePressTime = now;
+					return { consume: true }; // suppress repeat
 				}
+				lastSpacePressTime = now;
-				// Currently transcribing? → ignore, wait for it to finish
+				// If transcribing → ignore
 				if (voiceState === "transcribing") {
 					return { consume: true };
 				}
-				// Idle → start recording
-				if (voiceState === "idle" && !isHolding) {
-					isHolding = true;
-					startVoiceRecording("editor").then((ok) => {
-						if (!ok) isHolding = false;
-					});
+				// If already recording: In Kitty mode, release handles stop.
+				// In non-Kitty, we can't safely detect "real second press" vs
+				// key-repeat. Use Ctrl+Shift+V instead. Just consume.
+				if (voiceState === "recording") {
 					return { consume: true };
 				}
-				if (isHolding) return { consume: true };
+				// Idle → start the hold timer
+				if (voiceState === "idle" && !spaceDownTime) {
+					spaceDownTime = now;
+					spaceConsumed = false;
+					// Show a subtle "preparing" indicator
+					if (ctx?.hasUI) {
+						showHoldHintWidget();
+					}
+					// After threshold: activate voice recording
+					holdActivationTimer = setTimeout(() => {
+						holdActivationTimer = null;
+						// Double-check: still idle, still holding, editor still empty
+						const currentText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
+						if (voiceState === "idle" && spaceDownTime && !(currentText && currentText.trim().length > 0)) {
+							spaceConsumed = true;
+							isHolding = true;
+							startVoiceRecording("editor").then((ok) => {
+								if (!ok) {
+									isHolding = false;
+									spaceConsumed = false;
+									spaceDownTime = null;
+								}
+							});
+						} else {
+							spaceDownTime = null;
+							spaceConsumed = false;
+						}
+					}, HOLD_THRESHOLD_MS);
+					return { consume: true }; // consume now — we'll insert space on early release
+				}
+				if (isHolding || spaceConsumed) return { consume: true };
+				return undefined;
+			}
+			// ── Any other key while holding space (pre-threshold) → cancel hold, insert space ──
+			if (spaceDownTime && !spaceConsumed && !matchesKey(data, "space")) {
+				clearHoldTimer();
+				// Insert the space that was consumed during hold detection
+				if (ctx?.hasUI) {
+					ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
+					hideHoldHintWidget();
+				}
+				spaceDownTime = null;
+				spaceConsumed = false;
+				// Don't consume this key — let it through
 				return undefined;
 			}
-			// ── Ctrl+Shift+B handling (BTW voice) ──
+			// ── Ctrl+Shift+B handling (BTW voice) — direct toggle, no hold threshold ──
 			if (matchesKey(data, "ctrl+shift+b")) {
 				if (isKeyRelease(data)) {
 					kittyReleaseDetected = true;
@@ -1078,9 +1354,19 @@ export default function (pi: ExtensionAPI) {
 				return;
 			}
 			if (voiceState === "idle") {
-				await startVoiceRecording("editor");
+				// Direct start — bypass hold threshold
+				spaceConsumed = true;
+				isHolding = true;
+				const ok = await startVoiceRecording("editor");
+				if (!ok) {
+					isHolding = false;
+					spaceConsumed = false;
+				}
 			} else if (voiceState === "recording") {
 				isHolding = false;
+				spaceConsumed = false;
+				spaceDownTime = null;
+				clearHoldTimer();
 				await stopVoiceRecording("editor");
 			}
 		},

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@codexstar/pi-listen",
-  "version": "1.0.13",
+  "version": "1.0.15",
   "description": "Voice input, first-run onboarding, and side-channel BTW conversations for Pi",
   "type": "module",
   "keywords": [