@codexstar/pi-listen 1.0.13 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/extensions/voice.ts +360 -74
  2. package/package.json +1 -1
@@ -566,6 +566,8 @@ export default function (pi: ExtensionAPI) {
566
566
 
567
567
  function voiceCleanup() {
568
568
  if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
569
+ clearHoldTimer();
570
+ stopRecordingWidgetAnimation();
569
571
  if (activeSession) {
570
572
  finalizeSession(activeSession);
571
573
  activeSession = null;
@@ -573,6 +575,8 @@ export default function (pi: ExtensionAPI) {
573
575
  if (legacyRecProcess) { legacyRecProcess.kill("SIGTERM"); legacyRecProcess = null; }
574
576
  if (tempFile) { try { fs.unlinkSync(tempFile); } catch {} tempFile = null; }
575
577
  isHolding = false;
578
+ spaceConsumed = false;
579
+ spaceDownTime = null;
576
580
  setVoiceState("idle");
577
581
  }
578
582
 
@@ -604,46 +608,223 @@ export default function (pi: ExtensionAPI) {
604
608
  ].join("\n"), validated ? "info" : "warning");
605
609
  }
606
610
 
607
- // ─── Live Transcript Widget ──────────────────────────────────────────────
611
+ // ─── Live Transcript Widget (Component-based, themed) ───────────────────
608
612
 
613
+ /** Subtle hint shown during the hold threshold wait */
614
+ function showHoldHintWidget() {
615
+ if (!ctx?.hasUI) return;
616
+ ctx.ui.setWidget("voice-recording", (tui, theme) => {
617
+ return {
618
+ invalidate() {},
619
+ render(width: number): string[] {
620
+ const bar = theme.fg("muted", "─".repeat(Math.min(width - 2, 60)));
621
+ return [
622
+ bar,
623
+ theme.fg("dim", " Hold " + theme.bold("SPACE") + " for voice input..."),
624
+ bar,
625
+ ];
626
+ },
627
+ };
628
+ }, { placement: "aboveEditor" });
629
+ }
630
+
631
+ function hideHoldHintWidget() {
632
+ if (!ctx?.hasUI) return;
633
+ ctx.ui.setWidget("voice-recording", undefined);
634
+ }
635
+
636
+ /** Animated recording indicator with live waveform */
637
+ function showRecordingWidget(target: "editor" | "btw") {
638
+ if (!ctx?.hasUI) return;
639
+
640
+ // Store initial state — once live transcription arrives,
641
+ // updateLiveTranscriptWidget takes over and we stop the animation.
642
+ (showRecordingWidget as any)._target = target;
643
+ (showRecordingWidget as any)._frame = 0;
644
+ (showRecordingWidget as any)._hasTranscript = false;
645
+
646
+ // Animate the widget every 300ms (only while no transcript is showing)
647
+ const animTimer = setInterval(() => {
648
+ // Stop animating once live transcript takes over
649
+ if ((showRecordingWidget as any)?._hasTranscript) return;
650
+
651
+ (showRecordingWidget as any)._frame = ((showRecordingWidget as any)._frame || 0) + 1;
652
+ showRecordingWidgetFrame(target, (showRecordingWidget as any)._frame);
653
+ }, 300);
654
+
655
+ // Store the timer so we can clean it up
656
+ (showRecordingWidget as any)._animTimer = animTimer;
657
+
658
+ showRecordingWidgetFrame(target, 0);
659
+ }
660
+
661
+ const waveChars = ["▁", "▂", "▃", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
662
+
663
+ function showRecordingWidgetFrame(target: "editor" | "btw", frame: number) {
664
+ if (!ctx?.hasUI) return;
665
+ ctx.ui.setWidget("voice-recording", (tui, theme) => {
666
+ return {
667
+ invalidate() {},
668
+ render(width: number): string[] {
669
+ const maxW = Math.min(width - 2, 72);
670
+ const elapsed = Math.round((Date.now() - recordingStart) / 1000);
671
+ const mins = Math.floor(elapsed / 60);
672
+ const secs = elapsed % 60;
673
+ const timeStr = mins > 0 ? `${mins}:${String(secs).padStart(2, "0")}` : `${secs}s`;
674
+
675
+ // Animated waveform
676
+ const waveLen = 12;
677
+ let wave = "";
678
+ for (let i = 0; i < waveLen; i++) {
679
+ wave += waveChars[(frame + i) % waveChars.length];
680
+ }
681
+
682
+ const topBorder = theme.fg("borderAccent", "╭" + "─".repeat(maxW) + "╮");
683
+ const botBorder = theme.fg("borderAccent", "╰" + "─".repeat(maxW) + "╯");
684
+ const pad = (s: string, w: number) => {
685
+ const visible = s.replace(/\x1b\[[^m]*m/g, "").length;
686
+ return s + " ".repeat(Math.max(0, w - visible));
687
+ };
688
+
689
+ const dot = theme.fg("error", "●");
690
+ const label = target === "btw"
691
+ ? theme.bold(theme.fg("accent", " BTW "))
692
+ : theme.bold(theme.fg("accent", " VOICE "));
693
+ const waveStyled = theme.fg("accent", wave);
694
+ const timeStyled = theme.fg("muted", timeStr);
695
+
696
+ const titleLine = ` ${dot} ${label} ${waveStyled} ${timeStyled}`;
697
+
698
+ const hint = target === "btw"
699
+ ? theme.fg("dim", " Press Ctrl+Shift+B to stop")
700
+ : kittyReleaseDetected
701
+ ? theme.fg("dim", " Release SPACE to finalize")
702
+ : theme.fg("dim", " Press Ctrl+Shift+V to stop");
703
+
704
+ const lines = [
705
+ topBorder,
706
+ theme.fg("borderAccent", "│") + pad(titleLine, maxW) + theme.fg("borderAccent", "│"),
707
+ theme.fg("borderAccent", "│") + pad(hint, maxW) + theme.fg("borderAccent", "│"),
708
+ botBorder,
709
+ ];
710
+ return lines;
711
+ },
712
+ };
713
+ }, { placement: "aboveEditor" });
714
+ }
715
+
716
+ function stopRecordingWidgetAnimation() {
717
+ const timer = (showRecordingWidget as any)?._animTimer;
718
+ if (timer) {
719
+ clearInterval(timer);
720
+ (showRecordingWidget as any)._animTimer = null;
721
+ }
722
+ }
723
+
724
+ /** Show live transcript inside a themed box */
609
725
  function updateLiveTranscriptWidget(interim: string, finals: string[]) {
610
726
  if (!ctx?.hasUI) return;
611
727
 
728
+ // Stop the recording animation — live transcript takes over
729
+ (showRecordingWidget as any)._hasTranscript = true;
730
+ stopRecordingWidgetAnimation();
731
+
612
732
  const finalized = finals.join(" ");
613
733
  const displayText = finalized + (interim ? (finalized ? " " : "") + interim : "");
614
734
 
615
- if (!displayText.trim()) {
616
- ctx.ui.setWidget("voice-recording", [
617
- " 🎙 Listening... (speak now)",
618
- ], { placement: "aboveEditor" });
619
- return;
620
- }
735
+ ctx.ui.setWidget("voice-recording", (tui, theme) => {
736
+ return {
737
+ invalidate() {},
738
+ render(width: number): string[] {
739
+ const maxW = Math.min(width - 2, 72);
740
+ const elapsed = Math.round((Date.now() - recordingStart) / 1000);
741
+ const mins = Math.floor(elapsed / 60);
742
+ const secs = elapsed % 60;
743
+ const timeStr = mins > 0 ? `${mins}:${String(secs).padStart(2, "0")}` : `${secs}s`;
744
+
745
+ const topBorder = theme.fg("borderAccent", "╭" + "─".repeat(maxW) + "╮");
746
+ const botBorder = theme.fg("borderAccent", "╰" + "─".repeat(maxW) + "╯");
747
+ const sep = theme.fg("borderAccent", "│") + theme.fg("borderAccent", "─".repeat(maxW)) + theme.fg("borderAccent", "│");
748
+ const side = (content: string) => {
749
+ const stripped = content.replace(/\x1b\[[^m]*m/g, "");
750
+ const padding = Math.max(0, maxW - stripped.length);
751
+ return theme.fg("borderAccent", "│") + content + " ".repeat(padding) + theme.fg("borderAccent", "│");
752
+ };
753
+
754
+ const dot = theme.fg("error", "●");
755
+ const label = theme.bold(theme.fg("accent", " VOICE "));
756
+ const timeStyled = theme.fg("muted", timeStr);
757
+ const titleLine = ` ${dot} ${label} ${timeStyled}`;
758
+ const hint = kittyReleaseDetected
759
+ ? theme.fg("dim", " Release SPACE to finalize")
760
+ : theme.fg("dim", " Press Ctrl+Shift+V to stop");
761
+
762
+ const lines = [topBorder, side(titleLine)];
763
+
764
+ if (!displayText.trim()) {
765
+ lines.push(side(theme.fg("dim", " Listening... speak now")));
766
+ } else {
767
+ lines.push(sep);
768
+ // Word-wrap the transcript text
769
+ const innerMax = maxW - 4; // padding inside box
770
+ const words = displayText.split(" ");
771
+ const wrappedLines: string[] = [];
772
+ let currentLine = "";
773
+
774
+ for (const word of words) {
775
+ if ((currentLine + " " + word).trim().length > innerMax && currentLine) {
776
+ wrappedLines.push(currentLine);
777
+ currentLine = word;
778
+ } else {
779
+ currentLine = currentLine ? currentLine + " " + word : word;
780
+ }
781
+ }
782
+ if (currentLine) wrappedLines.push(currentLine);
783
+
784
+ // Show last 3 lines of transcript
785
+ const visible = wrappedLines.slice(-3);
786
+ for (let i = 0; i < visible.length; i++) {
787
+ let line = visible[i];
788
+ // Style: finalized parts in normal text, interim in accent
789
+ if (i === visible.length - 1 && interim) {
790
+ line = theme.fg("text", line) + theme.fg("accent", "▍");
791
+ } else {
792
+ line = theme.fg("text", line);
793
+ }
794
+ lines.push(side(" " + line));
795
+ }
796
+ }
621
797
 
622
- // Show the live transcript — last 3 lines max
623
- const words = displayText.split(" ");
624
- const lines: string[] = [];
625
- let currentLine = " 🎙 ";
626
- const maxLineLen = 70;
627
-
628
- for (const word of words) {
629
- if ((currentLine + word).length > maxLineLen) {
630
- lines.push(currentLine);
631
- currentLine = " " + word + " ";
632
- } else {
633
- currentLine += word + " ";
634
- }
635
- }
636
- if (currentLine.trim()) lines.push(currentLine);
637
-
638
- // Keep only last 4 lines to avoid widget overflow
639
- const visibleLines = lines.slice(-4);
640
- if (interim) {
641
- // Show a blinking cursor for interim text
642
- const lastIdx = visibleLines.length - 1;
643
- visibleLines[lastIdx] = visibleLines[lastIdx].trimEnd() + "▍";
644
- }
798
+ lines.push(side(hint));
799
+ lines.push(botBorder);
800
+ return lines;
801
+ },
802
+ };
803
+ }, { placement: "aboveEditor" });
804
+ }
645
805
 
646
- ctx.ui.setWidget("voice-recording", visibleLines, { placement: "aboveEditor" });
806
+ /** Transcribing state show a processing indicator */
807
+ function showTranscribingWidget() {
808
+ if (!ctx?.hasUI) return;
809
+ ctx.ui.setWidget("voice-recording", (tui, theme) => {
810
+ return {
811
+ invalidate() {},
812
+ render(width: number): string[] {
813
+ const maxW = Math.min(width - 2, 72);
814
+ const topBorder = theme.fg("border", "╭" + "─".repeat(maxW) + "╮");
815
+ const botBorder = theme.fg("border", "╰" + "─".repeat(maxW) + "╯");
816
+ const side = (content: string) => {
817
+ const stripped = content.replace(/\x1b\[[^m]*m/g, "");
818
+ const padding = Math.max(0, maxW - stripped.length);
819
+ return theme.fg("border", "│") + content + " ".repeat(padding) + theme.fg("border", "│");
820
+ };
821
+ const spinner = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
822
+ const idx = Math.floor(Date.now() / 100) % spinner.length;
823
+ const line = ` ${theme.fg("accent", spinner[idx])} ${theme.fg("dim", "Finalizing transcription...")}`;
824
+ return [topBorder, side(line), botBorder];
825
+ },
826
+ };
827
+ }, { placement: "aboveEditor" });
647
828
  }
648
829
 
649
830
  // ─── Voice: Start / Stop (Streaming or Legacy) ───────────────────────────
@@ -665,6 +846,7 @@ export default function (pi: ExtensionAPI) {
665
846
  },
666
847
  onDone: (fullText) => {
667
848
  activeSession = null;
849
+ stopRecordingWidgetAnimation();
668
850
  ctx?.ui.setWidget("voice-recording", undefined);
669
851
 
670
852
  if (!fullText.trim()) {
@@ -690,6 +872,7 @@ export default function (pi: ExtensionAPI) {
690
872
  },
691
873
  onError: (err) => {
692
874
  activeSession = null;
875
+ stopRecordingWidgetAnimation();
693
876
  ctx?.ui.setWidget("voice-recording", undefined);
694
877
  ctx?.ui.notify(`STT error: ${err}`, "error");
695
878
  setVoiceState("idle");
@@ -715,11 +898,8 @@ export default function (pi: ExtensionAPI) {
715
898
  }
716
899
  }, 1000);
717
900
 
718
- if (ctx.hasUI) {
719
- ctx.ui.setWidget("voice-recording", [
720
- " 🎙 Listening... speak now — press SPACE again to stop",
721
- ], { placement: "aboveEditor" });
722
- }
901
+ // Show the themed recording widget
902
+ showRecordingWidget(target);
723
903
  return true;
724
904
 
725
905
  } else {
@@ -743,11 +923,8 @@ export default function (pi: ExtensionAPI) {
743
923
  }, 1000);
744
924
 
745
925
  if (ctx.hasUI) {
746
- ctx.ui.setWidget("voice-recording", [
747
- target === "btw"
748
- ? " 🎙 BTW Recording... Ctrl+Shift+V to stop"
749
- : " 🎙 Recording... Ctrl+Shift+V to stop (or release SPACE)",
750
- ], { placement: "aboveEditor" });
926
+ // Show themed recording widget for legacy path
927
+ showRecordingWidget(target);
751
928
  }
752
929
  return true;
753
930
  }
@@ -760,6 +937,8 @@ export default function (pi: ExtensionAPI) {
760
937
  if (activeSession) {
761
938
  // === STREAMING PATH === Stop the stream, finalize will call onDone
762
939
  setVoiceState("transcribing");
940
+ stopRecordingWidgetAnimation();
941
+ showTranscribingWidget();
763
942
  stopStreamingSession(activeSession);
764
943
  return;
765
944
  }
@@ -768,7 +947,8 @@ export default function (pi: ExtensionAPI) {
768
947
  const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
769
948
  const audioFile = tempFile;
770
949
  setVoiceState("transcribing");
771
- ctx.ui.setWidget("voice-recording", undefined);
950
+ stopRecordingWidgetAnimation();
951
+ showTranscribingWidget();
772
952
 
773
953
  await stopLegacyRecording();
774
954
 
@@ -822,20 +1002,41 @@ export default function (pi: ExtensionAPI) {
822
1002
  setVoiceState("idle");
823
1003
  }
824
1004
 
825
- // ─── Hold-to-talk / Toggle-to-talk ──────────────────────────────────────
1005
+ // ─── Hold-to-talk with Duration Threshold ──────────────────────────────
826
1006
  //
827
- // Kitty protocol terminals (Ghostty, WezTerm, Kitty) send key-release
828
- // events (":3u" sequences), enabling true hold-to-talk.
1007
+ // SPACE activates voice ONLY when:
1008
+ // 1. The editor is empty (no text typed yet)
1009
+ // 2. SPACE is held for ≥ HOLD_THRESHOLD_MS (500ms)
829
1010
  //
830
- // Non-Kitty terminals (Apple Terminal, iTerm2 without config, basic xterm)
831
- // only send key-press. We detect this and fall back to toggle:
832
- // 1st SPACE press → start recording
833
- // 2nd SPACE press → stop recording + transcribe
1011
+ // If SPACE is released before the threshold, a regular space character
1012
+ // is typed into the editor (normal typing behavior).
834
1013
  //
835
- // We auto-detect Kitty support: if we see a key-release within the first
836
- // recording, we know hold-to-talk works. Otherwise, we stay in toggle mode.
837
-
838
- let kittyReleaseDetected = false; // have we ever seen a Kitty release event?
1014
+ // This prevents accidental voice activation when typing and matches
1015
+ // Claude Code's hold-to-talk UX pattern.
1016
+ //
1017
+ // For Kitty protocol terminals: hold wait threshold activate
1018
+ // release → stop recording. True hold-to-talk.
1019
+ // For non-Kitty terminals: hold → wait threshold → activate →
1020
+ // Ctrl+Shift+V or /voice stop to end recording.
1021
+ //
1022
+ // KEY INSIGHT: In non-Kitty terminals, holding a key generates
1023
+ // rapid press events (key-repeat). We CANNOT use "second space press
1024
+ // = stop" because repeats arrive while holding. Instead, non-Kitty
1025
+ // users must use Ctrl+Shift+V to stop.
1026
+
1027
+ const HOLD_THRESHOLD_MS = 500; // minimum hold time before voice activates
1028
+ let kittyReleaseDetected = false;
1029
+ let spaceDownTime: number | null = null; // timestamp when SPACE was first pressed
1030
+ let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
1031
+ let spaceConsumed = false; // whether we've committed to voice (past threshold)
1032
+ let lastSpacePressTime = 0; // debounce rapid space presses from key-repeat
1033
+
1034
+ function clearHoldTimer() {
1035
+ if (holdActivationTimer) {
1036
+ clearTimeout(holdActivationTimer);
1037
+ holdActivationTimer = null;
1038
+ }
1039
+ }
839
1040
 
840
1041
  function setupHoldToTalk() {
841
1042
  if (!ctx?.hasUI) return;
@@ -847,54 +1048,129 @@ export default function (pi: ExtensionAPI) {
847
1048
 
848
1049
  // ── SPACE handling ──
849
1050
  if (matchesKey(data, "space")) {
1051
+ // RULE: If editor has content, SPACE always types a space — never voice
850
1052
  const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
851
- if (editorText && editorText.trim().length > 0) return undefined;
1053
+ if (editorText && editorText.trim().length > 0) {
1054
+ clearHoldTimer();
1055
+ spaceDownTime = null;
1056
+ spaceConsumed = false;
1057
+ return undefined; // let the default space character through
1058
+ }
852
1059
 
853
- // Kitty key-release: stop recording
1060
+ // ── Kitty key-release ──
854
1061
  if (isKeyRelease(data)) {
855
1062
  kittyReleaseDetected = true;
856
- if (isHolding && voiceState === "recording") {
1063
+
1064
+ // Released before threshold → type a space character
1065
+ if (spaceDownTime && !spaceConsumed) {
1066
+ clearHoldTimer();
1067
+ spaceDownTime = null;
1068
+ spaceConsumed = false;
1069
+ // Insert a space into editor
1070
+ if (ctx?.hasUI) ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
1071
+ return { consume: true };
1072
+ }
1073
+
1074
+ // Released after threshold → stop recording (true hold-to-talk)
1075
+ if (spaceConsumed && voiceState === "recording") {
857
1076
  isHolding = false;
1077
+ spaceConsumed = false;
1078
+ spaceDownTime = null;
858
1079
  stopVoiceRecording("editor");
859
1080
  return { consume: true };
860
1081
  }
1082
+
1083
+ spaceDownTime = null;
1084
+ spaceConsumed = false;
861
1085
  return undefined;
862
1086
  }
863
1087
 
864
- // Kitty key-repeat: suppress while holding
1088
+ // ── Kitty key-repeat: ALWAYS suppress while holding/recording ──
865
1089
  if (isKeyRepeat(data)) {
866
- if (isHolding) return { consume: true };
1090
+ if (spaceDownTime || spaceConsumed || isHolding || voiceState === "recording") {
1091
+ return { consume: true };
1092
+ }
867
1093
  return undefined;
868
1094
  }
869
1095
 
870
- // === Key PRESS ===
1096
+ // === Key PRESS (initial press only) ===
871
1097
 
872
- // Currently recording? → this is the "stop" press (toggle mode)
873
- if (voiceState === "recording") {
874
- isHolding = false;
875
- stopVoiceRecording("editor");
876
- return { consume: true };
1098
+ const now = Date.now();
1099
+
1100
+ // Debounce: ignore rapid presses within 100ms (terminal key-repeat
1101
+ // generates press events in non-Kitty terminals since there's no
1102
+ // key-repeat flag they all look like fresh presses)
1103
+ if (now - lastSpacePressTime < 100) {
1104
+ lastSpacePressTime = now;
1105
+ return { consume: true }; // suppress repeat
877
1106
  }
1107
+ lastSpacePressTime = now;
878
1108
 
879
- // Currently transcribing? → ignore, wait for it to finish
1109
+ // If transcribing → ignore
880
1110
  if (voiceState === "transcribing") {
881
1111
  return { consume: true };
882
1112
  }
883
1113
 
884
- // Idle start recording
885
- if (voiceState === "idle" && !isHolding) {
886
- isHolding = true;
887
- startVoiceRecording("editor").then((ok) => {
888
- if (!ok) isHolding = false;
889
- });
1114
+ // If already recording: In Kitty mode, release handles stop.
1115
+ // In non-Kitty, we can't safely detect "real second press" vs
1116
+ // key-repeat. Use Ctrl+Shift+V instead. Just consume.
1117
+ if (voiceState === "recording") {
890
1118
  return { consume: true };
891
1119
  }
892
1120
 
893
- if (isHolding) return { consume: true };
1121
+ // Idle start the hold timer
1122
+ if (voiceState === "idle" && !spaceDownTime) {
1123
+ spaceDownTime = now;
1124
+ spaceConsumed = false;
1125
+
1126
+ // Show a subtle "preparing" indicator
1127
+ if (ctx?.hasUI) {
1128
+ showHoldHintWidget();
1129
+ }
1130
+
1131
+ // After threshold: activate voice recording
1132
+ holdActivationTimer = setTimeout(() => {
1133
+ holdActivationTimer = null;
1134
+ // Double-check: still idle, still holding, editor still empty
1135
+ const currentText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
1136
+ if (voiceState === "idle" && spaceDownTime && !(currentText && currentText.trim().length > 0)) {
1137
+ spaceConsumed = true;
1138
+ isHolding = true;
1139
+ startVoiceRecording("editor").then((ok) => {
1140
+ if (!ok) {
1141
+ isHolding = false;
1142
+ spaceConsumed = false;
1143
+ spaceDownTime = null;
1144
+ }
1145
+ });
1146
+ } else {
1147
+ spaceDownTime = null;
1148
+ spaceConsumed = false;
1149
+ }
1150
+ }, HOLD_THRESHOLD_MS);
1151
+
1152
+ return { consume: true }; // consume now — we'll insert space on early release
1153
+ }
1154
+
1155
+ if (isHolding || spaceConsumed) return { consume: true };
1156
+ return undefined;
1157
+ }
1158
+
1159
+ // ── Any other key while holding space (pre-threshold) → cancel hold, insert space ──
1160
+ if (spaceDownTime && !spaceConsumed && !matchesKey(data, "space")) {
1161
+ clearHoldTimer();
1162
+ // Insert the space that was consumed during hold detection
1163
+ if (ctx?.hasUI) {
1164
+ ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
1165
+ hideHoldHintWidget();
1166
+ }
1167
+ spaceDownTime = null;
1168
+ spaceConsumed = false;
1169
+ // Don't consume this key — let it through
894
1170
  return undefined;
895
1171
  }
896
1172
 
897
- // ── Ctrl+Shift+B handling (BTW voice) ──
1173
+ // ── Ctrl+Shift+B handling (BTW voice) — direct toggle, no hold threshold ──
898
1174
  if (matchesKey(data, "ctrl+shift+b")) {
899
1175
  if (isKeyRelease(data)) {
900
1176
  kittyReleaseDetected = true;
@@ -1078,9 +1354,19 @@ export default function (pi: ExtensionAPI) {
1078
1354
  return;
1079
1355
  }
1080
1356
  if (voiceState === "idle") {
1081
- await startVoiceRecording("editor");
1357
+ // Direct start — bypass hold threshold
1358
+ spaceConsumed = true;
1359
+ isHolding = true;
1360
+ const ok = await startVoiceRecording("editor");
1361
+ if (!ok) {
1362
+ isHolding = false;
1363
+ spaceConsumed = false;
1364
+ }
1082
1365
  } else if (voiceState === "recording") {
1083
1366
  isHolding = false;
1367
+ spaceConsumed = false;
1368
+ spaceDownTime = null;
1369
+ clearHoldTimer();
1084
1370
  await stopVoiceRecording("editor");
1085
1371
  }
1086
1372
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codexstar/pi-listen",
3
- "version": "1.0.13",
3
+ "version": "1.0.15",
4
4
  "description": "Voice input, first-run onboarding, and side-channel BTW conversations for Pi",
5
5
  "type": "module",
6
6
  "keywords": [