@codexstar/pi-listen 1.0.14 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/extensions/voice.ts +121 -43
  2. package/package.json +1 -1
@@ -567,6 +567,7 @@ export default function (pi: ExtensionAPI) {
567
567
  function voiceCleanup() {
568
568
  if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
569
569
  clearHoldTimer();
570
+ clearReleaseTimer();
570
571
  stopRecordingWidgetAnimation();
571
572
  if (activeSession) {
572
573
  finalizeSession(activeSession);
@@ -636,23 +637,31 @@ export default function (pi: ExtensionAPI) {
636
637
  /** Animated recording indicator with live waveform */
637
638
  function showRecordingWidget(target: "editor" | "btw") {
638
639
  if (!ctx?.hasUI) return;
639
- let frame = 0;
640
- const waveChars = ["▁", "▂", "▃", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
641
640
 
642
- // Animate the widget every 200ms
641
+ // Store initial state once live transcription arrives,
642
+ // updateLiveTranscriptWidget takes over and we stop the animation.
643
+ (showRecordingWidget as any)._target = target;
644
+ (showRecordingWidget as any)._frame = 0;
645
+ (showRecordingWidget as any)._hasTranscript = false;
646
+
647
+ // Animate the widget every 300ms (only while no transcript is showing)
643
648
  const animTimer = setInterval(() => {
644
- frame++;
645
- if (ctx?.hasUI) ctx.ui.setWidget("voice-recording", undefined); // force re-render
646
- showRecordingWidgetFrame(target, frame, waveChars);
647
- }, 200);
649
+ // Stop animating once live transcript takes over
650
+ if ((showRecordingWidget as any)?._hasTranscript) return;
651
+
652
+ (showRecordingWidget as any)._frame = ((showRecordingWidget as any)._frame || 0) + 1;
653
+ showRecordingWidgetFrame(target, (showRecordingWidget as any)._frame);
654
+ }, 300);
648
655
 
649
656
  // Store the timer so we can clean it up
650
657
  (showRecordingWidget as any)._animTimer = animTimer;
651
658
 
652
- showRecordingWidgetFrame(target, frame, waveChars);
659
+ showRecordingWidgetFrame(target, 0);
653
660
  }
654
661
 
655
- function showRecordingWidgetFrame(target: "editor" | "btw", frame: number, waveChars: string[]) {
662
+ const waveChars = ["", "▂", "", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
663
+
664
+ function showRecordingWidgetFrame(target: "editor" | "btw", frame: number) {
656
665
  if (!ctx?.hasUI) return;
657
666
  ctx.ui.setWidget("voice-recording", (tui, theme) => {
658
667
  return {
@@ -691,7 +700,7 @@ export default function (pi: ExtensionAPI) {
691
700
  ? theme.fg("dim", " Press Ctrl+Shift+B to stop")
692
701
  : kittyReleaseDetected
693
702
  ? theme.fg("dim", " Release SPACE to finalize")
694
- : theme.fg("dim", " Press SPACE again to stop");
703
+ : theme.fg("dim", " Release SPACE to stop");
695
704
 
696
705
  const lines = [
697
706
  topBorder,
@@ -717,6 +726,10 @@ export default function (pi: ExtensionAPI) {
717
726
  function updateLiveTranscriptWidget(interim: string, finals: string[]) {
718
727
  if (!ctx?.hasUI) return;
719
728
 
729
+ // Stop the recording animation — live transcript takes over
730
+ (showRecordingWidget as any)._hasTranscript = true;
731
+ stopRecordingWidgetAnimation();
732
+
720
733
  const finalized = finals.join(" ");
721
734
  const displayText = finalized + (interim ? (finalized ? " " : "") + interim : "");
722
735
 
@@ -743,10 +756,7 @@ export default function (pi: ExtensionAPI) {
743
756
  const label = theme.bold(theme.fg("accent", " VOICE "));
744
757
  const timeStyled = theme.fg("muted", timeStr);
745
758
  const titleLine = ` ${dot} ${label} ${timeStyled}`;
746
- const hint = kittyReleaseDetected
747
- ? theme.fg("dim", " Release SPACE to finalize")
748
- : theme.fg("dim", " Press SPACE again to stop");
749
-
759
+ const hint = theme.fg("dim", " Release SPACE to stop");
750
760
  const lines = [topBorder, side(titleLine)];
751
761
 
752
762
  if (!displayText.trim()) {
@@ -999,19 +1009,27 @@ export default function (pi: ExtensionAPI) {
999
1009
  // If SPACE is released before the threshold, a regular space character
1000
1010
  // is typed into the editor (normal typing behavior).
1001
1011
  //
1002
- // This prevents accidental voice activation when typing and matches
1003
- // Claude Code's hold-to-talk UX pattern.
1012
+ // KEY DESIGN for non-Kitty terminals (no key-release events):
1013
+ // Holding a key generates rapid press events (~30ms apart). We detect
1014
+ // "release" by watching for the stream of space presses to STOP.
1015
+ // Once the gap exceeds RELEASE_DETECT_MS (200ms), we know the user
1016
+ // lifted their finger and we stop recording.
1004
1017
  //
1005
- // For Kitty protocol terminals: hold → wait threshold → activate →
1006
- // releasestop recording. True hold-to-talk.
1007
- // For non-Kitty terminals: holdwait threshold activate
1008
- // press SPACE again → stop recording. Toggle after activation.
1018
+ // Flow:
1019
+ // Hold SPACE rapid presses arrive → first press starts 500ms timer →
1020
+ // timer fires recording startspresses keep coming (consumed)
1021
+ // user releasespresses stop 200ms silence → auto-stop recording
1022
+ //
1023
+ // Kitty protocol terminals get true key-release events and work natively.
1009
1024
 
1010
1025
  const HOLD_THRESHOLD_MS = 500; // minimum hold time before voice activates
1026
+ const RELEASE_DETECT_MS = 200; // gap in key-repeat that means "released"
1011
1027
  let kittyReleaseDetected = false;
1012
- let spaceDownTime: number | null = null; // timestamp when SPACE was first pressed
1028
+ let spaceDownTime: number | null = null;
1013
1029
  let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
1014
- let spaceConsumed = false; // whether we've committed to voice (past threshold)
1030
+ let spaceConsumed = false;
1031
+ let lastSpacePressTime = 0;
1032
+ let releaseDetectTimer: ReturnType<typeof setTimeout> | null = null;
1015
1033
 
1016
1034
  function clearHoldTimer() {
1017
1035
  if (holdActivationTimer) {
@@ -1020,6 +1038,49 @@ export default function (pi: ExtensionAPI) {
1020
1038
  }
1021
1039
  }
1022
1040
 
1041
+ function clearReleaseTimer() {
1042
+ if (releaseDetectTimer) {
1043
+ clearTimeout(releaseDetectTimer);
1044
+ releaseDetectTimer = null;
1045
+ }
1046
+ }
1047
+
1048
+ /** Called when we detect the user has released SPACE (non-Kitty) */
1049
+ function onSpaceReleaseDetected() {
1050
+ releaseDetectTimer = null;
1051
+
1052
+ // If we're still in the threshold wait (< 500ms), user just tapped space
1053
+ if (spaceDownTime && !spaceConsumed) {
1054
+ clearHoldTimer();
1055
+ spaceDownTime = null;
1056
+ spaceConsumed = false;
1057
+ // Insert a space character
1058
+ if (ctx?.hasUI) {
1059
+ ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
1060
+ hideHoldHintWidget();
1061
+ }
1062
+ return;
1063
+ }
1064
+
1065
+ // If we're recording, stop
1066
+ if (spaceConsumed && voiceState === "recording") {
1067
+ isHolding = false;
1068
+ spaceConsumed = false;
1069
+ spaceDownTime = null;
1070
+ stopVoiceRecording("editor");
1071
+ }
1072
+ }
1073
+
1074
+ /** Reset the release detection timer — called on every space press */
1075
+ function resetReleaseDetect() {
1076
+ clearReleaseTimer();
1077
+ // If we're in a hold state (threshold pending or recording),
1078
+ // start a timer to detect release
1079
+ if (spaceDownTime || spaceConsumed || voiceState === "recording") {
1080
+ releaseDetectTimer = setTimeout(onSpaceReleaseDetected, RELEASE_DETECT_MS);
1081
+ }
1082
+ }
1083
+
1023
1084
  function setupHoldToTalk() {
1024
1085
  if (!ctx?.hasUI) return;
1025
1086
 
@@ -1034,6 +1095,7 @@ export default function (pi: ExtensionAPI) {
1034
1095
  const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
1035
1096
  if (editorText && editorText.trim().length > 0) {
1036
1097
  clearHoldTimer();
1098
+ clearReleaseTimer();
1037
1099
  spaceDownTime = null;
1038
1100
  spaceConsumed = false;
1039
1101
  return undefined; // let the default space character through
@@ -1042,19 +1104,19 @@ export default function (pi: ExtensionAPI) {
1042
1104
  // ── Kitty key-release ──
1043
1105
  if (isKeyRelease(data)) {
1044
1106
  kittyReleaseDetected = true;
1107
+ clearReleaseTimer();
1045
1108
 
1046
1109
  // Released before threshold → type a space character
1047
1110
  if (spaceDownTime && !spaceConsumed) {
1048
1111
  clearHoldTimer();
1049
1112
  spaceDownTime = null;
1050
1113
  spaceConsumed = false;
1051
- // Insert a space into editor
1052
1114
  if (ctx?.hasUI) ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
1053
1115
  return { consume: true };
1054
1116
  }
1055
1117
 
1056
1118
  // Released after threshold → stop recording (true hold-to-talk)
1057
- if (spaceConsumed && isHolding && voiceState === "recording") {
1119
+ if (spaceConsumed && voiceState === "recording") {
1058
1120
  isHolding = false;
1059
1121
  spaceConsumed = false;
1060
1122
  spaceDownTime = null;
@@ -1067,33 +1129,42 @@ export default function (pi: ExtensionAPI) {
1067
1129
  return undefined;
1068
1130
  }
1069
1131
 
1070
- // ── Kitty key-repeat: suppress while holding past threshold ──
1132
+ // ── Kitty key-repeat: ALWAYS suppress while holding/recording ──
1071
1133
  if (isKeyRepeat(data)) {
1072
- if (spaceConsumed || isHolding) return { consume: true };
1134
+ if (spaceDownTime || spaceConsumed || isHolding || voiceState === "recording") {
1135
+ resetReleaseDetect(); // keep resetting — still holding
1136
+ return { consume: true };
1137
+ }
1073
1138
  return undefined;
1074
1139
  }
1075
1140
 
1076
1141
  // === Key PRESS ===
1142
+ // In non-Kitty terminals, holding a key sends rapid press events.
1143
+ // We use these to detect "still holding" and the gap to detect "released".
1077
1144
 
1078
- // If already recording (toggle mode for non-Kitty) → stop
1079
- if (voiceState === "recording" && spaceConsumed) {
1080
- isHolding = false;
1081
- spaceConsumed = false;
1082
- spaceDownTime = null;
1083
- clearHoldTimer();
1084
- stopVoiceRecording("editor");
1085
- return { consume: true };
1086
- }
1145
+ // Reset release detection user is still holding
1146
+ resetReleaseDetect();
1087
1147
 
1088
1148
  // If transcribing → ignore
1089
1149
  if (voiceState === "transcribing") {
1090
1150
  return { consume: true };
1091
1151
  }
1092
1152
 
1093
- // Idlestart the hold timer
1094
- if (voiceState === "idle" && !spaceDownTime) {
1153
+ // If already recording just consume (release detect handles stop)
1154
+ if (voiceState === "recording") {
1155
+ return { consume: true };
1156
+ }
1157
+
1158
+ // If we already started the hold timer, this is a repeat → consume
1159
+ if (spaceDownTime) {
1160
+ return { consume: true };
1161
+ }
1162
+
1163
+ // Idle, first press → start the hold timer
1164
+ if (voiceState === "idle") {
1095
1165
  spaceDownTime = Date.now();
1096
1166
  spaceConsumed = false;
1167
+ lastSpacePressTime = Date.now();
1097
1168
 
1098
1169
  // Show a subtle "preparing" indicator
1099
1170
  if (ctx?.hasUI) {
@@ -1103,7 +1174,6 @@ export default function (pi: ExtensionAPI) {
1103
1174
  // After threshold: activate voice recording
1104
1175
  holdActivationTimer = setTimeout(() => {
1105
1176
  holdActivationTimer = null;
1106
- // Double-check: still idle, still holding, editor still empty
1107
1177
  const currentText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
1108
1178
  if (voiceState === "idle" && spaceDownTime && !(currentText && currentText.trim().length > 0)) {
1109
1179
  spaceConsumed = true;
@@ -1121,7 +1191,7 @@ export default function (pi: ExtensionAPI) {
1121
1191
  }
1122
1192
  }, HOLD_THRESHOLD_MS);
1123
1193
 
1124
- return { consume: true }; // consume now — we'll insert space on early release
1194
+ return { consume: true };
1125
1195
  }
1126
1196
 
1127
1197
  if (isHolding || spaceConsumed) return { consume: true };
@@ -1131,14 +1201,13 @@ export default function (pi: ExtensionAPI) {
1131
1201
  // ── Any other key while holding space (pre-threshold) → cancel hold, insert space ──
1132
1202
  if (spaceDownTime && !spaceConsumed && !matchesKey(data, "space")) {
1133
1203
  clearHoldTimer();
1134
- // Insert the space that was consumed during hold detection
1204
+ clearReleaseTimer();
1135
1205
  if (ctx?.hasUI) {
1136
1206
  ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
1137
1207
  hideHoldHintWidget();
1138
1208
  }
1139
1209
  spaceDownTime = null;
1140
1210
  spaceConsumed = false;
1141
- // Don't consume this key — let it through
1142
1211
  return undefined;
1143
1212
  }
1144
1213
 
@@ -1159,7 +1228,6 @@ export default function (pi: ExtensionAPI) {
1159
1228
  return undefined;
1160
1229
  }
1161
1230
 
1162
- // Toggle: stop if recording
1163
1231
  if (voiceState === "recording") {
1164
1232
  isHolding = false;
1165
1233
  stopVoiceRecording("btw");
@@ -1326,9 +1394,19 @@ export default function (pi: ExtensionAPI) {
1326
1394
  return;
1327
1395
  }
1328
1396
  if (voiceState === "idle") {
1329
- await startVoiceRecording("editor");
1397
+ // Direct start — bypass hold threshold
1398
+ spaceConsumed = true;
1399
+ isHolding = true;
1400
+ const ok = await startVoiceRecording("editor");
1401
+ if (!ok) {
1402
+ isHolding = false;
1403
+ spaceConsumed = false;
1404
+ }
1330
1405
  } else if (voiceState === "recording") {
1331
1406
  isHolding = false;
1407
+ spaceConsumed = false;
1408
+ spaceDownTime = null;
1409
+ clearHoldTimer();
1332
1410
  await stopVoiceRecording("editor");
1333
1411
  }
1334
1412
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codexstar/pi-listen",
3
- "version": "1.0.14",
3
+ "version": "1.0.16",
4
4
  "description": "Voice input, first-run onboarding, and side-channel BTW conversations for Pi",
5
5
  "type": "module",
6
6
  "keywords": [