@codexstar/pi-listen 1.0.15 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/extensions/voice.ts +81 -41
  2. package/package.json +1 -1
@@ -567,6 +567,7 @@ export default function (pi: ExtensionAPI) {
567
567
  function voiceCleanup() {
568
568
  if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
569
569
  clearHoldTimer();
570
+ clearReleaseTimer();
570
571
  stopRecordingWidgetAnimation();
571
572
  if (activeSession) {
572
573
  finalizeSession(activeSession);
@@ -699,7 +700,7 @@ export default function (pi: ExtensionAPI) {
699
700
  ? theme.fg("dim", " Press Ctrl+Shift+B to stop")
700
701
  : kittyReleaseDetected
701
702
  ? theme.fg("dim", " Release SPACE to finalize")
702
- : theme.fg("dim", " Press Ctrl+Shift+V to stop");
703
+ : theme.fg("dim", " Release SPACE to stop");
703
704
 
704
705
  const lines = [
705
706
  topBorder,
@@ -755,10 +756,7 @@ export default function (pi: ExtensionAPI) {
755
756
  const label = theme.bold(theme.fg("accent", " VOICE "));
756
757
  const timeStyled = theme.fg("muted", timeStr);
757
758
  const titleLine = ` ${dot} ${label} ${timeStyled}`;
758
- const hint = kittyReleaseDetected
759
- ? theme.fg("dim", " Release SPACE to finalize")
760
- : theme.fg("dim", " Press Ctrl+Shift+V to stop");
761
-
759
+ const hint = theme.fg("dim", " Release SPACE to stop");
762
760
  const lines = [topBorder, side(titleLine)];
763
761
 
764
762
  if (!displayText.trim()) {
@@ -1011,25 +1009,27 @@ export default function (pi: ExtensionAPI) {
1011
1009
  // If SPACE is released before the threshold, a regular space character
1012
1010
  // is typed into the editor (normal typing behavior).
1013
1011
  //
1014
- // This prevents accidental voice activation when typing and matches
1015
- // Claude Code's hold-to-talk UX pattern.
1012
+ // KEY DESIGN for non-Kitty terminals (no key-release events):
1013
+ // Holding a key generates rapid press events (~30ms apart). We detect
1014
+ // "release" by watching for the stream of space presses to STOP.
1015
+ // Once the gap exceeds RELEASE_DETECT_MS (200ms), we know the user
1016
+ // lifted their finger and we stop recording.
1016
1017
  //
1017
- // For Kitty protocol terminals: hold → wait threshold → activate →
1018
- // releasestop recording. True hold-to-talk.
1019
- // For non-Kitty terminals: holdwait threshold activate
1020
- // Ctrl+Shift+V or /voice stop to end recording.
1018
+ // Flow:
1019
+ // Hold SPACE rapid presses arrive → first press starts 500ms timer →
1020
+ // timer fires recording startspresses keep coming (consumed)
1021
+ // user releases presses stop 200ms silence → auto-stop recording
1021
1022
  //
1022
- // KEY INSIGHT: In non-Kitty terminals, holding a key generates
1023
- // rapid press events (key-repeat). We CANNOT use "second space press
1024
- // = stop" because repeats arrive while holding. Instead, non-Kitty
1025
- // users must use Ctrl+Shift+V to stop.
1023
+ // Kitty protocol terminals get true key-release events and work natively.
1026
1024
 
1027
1025
  const HOLD_THRESHOLD_MS = 500; // minimum hold time before voice activates
1026
+ const RELEASE_DETECT_MS = 200; // gap in key-repeat that means "released"
1028
1027
  let kittyReleaseDetected = false;
1029
- let spaceDownTime: number | null = null; // timestamp when SPACE was first pressed
1028
+ let spaceDownTime: number | null = null;
1030
1029
  let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
1031
- let spaceConsumed = false; // whether we've committed to voice (past threshold)
1032
- let lastSpacePressTime = 0; // debounce rapid space presses from key-repeat
1030
+ let spaceConsumed = false;
1031
+ let lastSpacePressTime = 0;
1032
+ let releaseDetectTimer: ReturnType<typeof setTimeout> | null = null;
1033
1033
 
1034
1034
  function clearHoldTimer() {
1035
1035
  if (holdActivationTimer) {
@@ -1038,6 +1038,49 @@ export default function (pi: ExtensionAPI) {
1038
1038
  }
1039
1039
  }
1040
1040
 
1041
+ function clearReleaseTimer() {
1042
+ if (releaseDetectTimer) {
1043
+ clearTimeout(releaseDetectTimer);
1044
+ releaseDetectTimer = null;
1045
+ }
1046
+ }
1047
+
1048
+ /** Called when we detect the user has released SPACE (non-Kitty) */
1049
+ function onSpaceReleaseDetected() {
1050
+ releaseDetectTimer = null;
1051
+
1052
+ // If we're still in the threshold wait (< 500ms), user just tapped space
1053
+ if (spaceDownTime && !spaceConsumed) {
1054
+ clearHoldTimer();
1055
+ spaceDownTime = null;
1056
+ spaceConsumed = false;
1057
+ // Insert a space character
1058
+ if (ctx?.hasUI) {
1059
+ ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
1060
+ hideHoldHintWidget();
1061
+ }
1062
+ return;
1063
+ }
1064
+
1065
+ // If we're recording, stop
1066
+ if (spaceConsumed && voiceState === "recording") {
1067
+ isHolding = false;
1068
+ spaceConsumed = false;
1069
+ spaceDownTime = null;
1070
+ stopVoiceRecording("editor");
1071
+ }
1072
+ }
1073
+
1074
+ /** Reset the release detection timer — called on every space press */
1075
+ function resetReleaseDetect() {
1076
+ clearReleaseTimer();
1077
+ // If we're in a hold state (threshold pending or recording),
1078
+ // start a timer to detect release
1079
+ if (spaceDownTime || spaceConsumed || voiceState === "recording") {
1080
+ releaseDetectTimer = setTimeout(onSpaceReleaseDetected, RELEASE_DETECT_MS);
1081
+ }
1082
+ }
1083
+
1041
1084
  function setupHoldToTalk() {
1042
1085
  if (!ctx?.hasUI) return;
1043
1086
 
@@ -1052,6 +1095,7 @@ export default function (pi: ExtensionAPI) {
1052
1095
  const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
1053
1096
  if (editorText && editorText.trim().length > 0) {
1054
1097
  clearHoldTimer();
1098
+ clearReleaseTimer();
1055
1099
  spaceDownTime = null;
1056
1100
  spaceConsumed = false;
1057
1101
  return undefined; // let the default space character through
@@ -1060,13 +1104,13 @@ export default function (pi: ExtensionAPI) {
1060
1104
  // ── Kitty key-release ──
1061
1105
  if (isKeyRelease(data)) {
1062
1106
  kittyReleaseDetected = true;
1107
+ clearReleaseTimer();
1063
1108
 
1064
1109
  // Released before threshold → type a space character
1065
1110
  if (spaceDownTime && !spaceConsumed) {
1066
1111
  clearHoldTimer();
1067
1112
  spaceDownTime = null;
1068
1113
  spaceConsumed = false;
1069
- // Insert a space into editor
1070
1114
  if (ctx?.hasUI) ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
1071
1115
  return { consume: true };
1072
1116
  }
@@ -1088,40 +1132,39 @@ export default function (pi: ExtensionAPI) {
1088
1132
  // ── Kitty key-repeat: ALWAYS suppress while holding/recording ──
1089
1133
  if (isKeyRepeat(data)) {
1090
1134
  if (spaceDownTime || spaceConsumed || isHolding || voiceState === "recording") {
1135
+ resetReleaseDetect(); // keep resetting — still holding
1091
1136
  return { consume: true };
1092
1137
  }
1093
1138
  return undefined;
1094
1139
  }
1095
1140
 
1096
- // === Key PRESS (initial press only) ===
1097
-
1098
- const now = Date.now();
1141
+ // === Key PRESS ===
1142
+ // In non-Kitty terminals, holding a key sends rapid press events.
1143
+ // We use these to detect "still holding" and the gap to detect "released".
1099
1144
 
1100
- // Debounce: ignore rapid presses within 100ms (terminal key-repeat
1101
- // generates press events in non-Kitty terminals since there's no
1102
- // key-repeat flag — they all look like fresh presses)
1103
- if (now - lastSpacePressTime < 100) {
1104
- lastSpacePressTime = now;
1105
- return { consume: true }; // suppress repeat
1106
- }
1107
- lastSpacePressTime = now;
1145
+ // Reset release detection user is still holding
1146
+ resetReleaseDetect();
1108
1147
 
1109
1148
  // If transcribing → ignore
1110
1149
  if (voiceState === "transcribing") {
1111
1150
  return { consume: true };
1112
1151
  }
1113
1152
 
1114
- // If already recording: In Kitty mode, release handles stop.
1115
- // In non-Kitty, we can't safely detect "real second press" vs
1116
- // key-repeat. Use Ctrl+Shift+V instead. Just consume.
1153
+ // If already recording just consume (release detect handles stop)
1117
1154
  if (voiceState === "recording") {
1118
1155
  return { consume: true };
1119
1156
  }
1120
1157
 
1121
- // Idle start the hold timer
1122
- if (voiceState === "idle" && !spaceDownTime) {
1123
- spaceDownTime = now;
1158
+ // If we already started the hold timer, this is a repeat → consume
1159
+ if (spaceDownTime) {
1160
+ return { consume: true };
1161
+ }
1162
+
1163
+ // Idle, first press → start the hold timer
1164
+ if (voiceState === "idle") {
1165
+ spaceDownTime = Date.now();
1124
1166
  spaceConsumed = false;
1167
+ lastSpacePressTime = Date.now();
1125
1168
 
1126
1169
  // Show a subtle "preparing" indicator
1127
1170
  if (ctx?.hasUI) {
@@ -1131,7 +1174,6 @@ export default function (pi: ExtensionAPI) {
1131
1174
  // After threshold: activate voice recording
1132
1175
  holdActivationTimer = setTimeout(() => {
1133
1176
  holdActivationTimer = null;
1134
- // Double-check: still idle, still holding, editor still empty
1135
1177
  const currentText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
1136
1178
  if (voiceState === "idle" && spaceDownTime && !(currentText && currentText.trim().length > 0)) {
1137
1179
  spaceConsumed = true;
@@ -1149,7 +1191,7 @@ export default function (pi: ExtensionAPI) {
1149
1191
  }
1150
1192
  }, HOLD_THRESHOLD_MS);
1151
1193
 
1152
- return { consume: true }; // consume now — we'll insert space on early release
1194
+ return { consume: true };
1153
1195
  }
1154
1196
 
1155
1197
  if (isHolding || spaceConsumed) return { consume: true };
@@ -1159,14 +1201,13 @@ export default function (pi: ExtensionAPI) {
1159
1201
  // ── Any other key while holding space (pre-threshold) → cancel hold, insert space ──
1160
1202
  if (spaceDownTime && !spaceConsumed && !matchesKey(data, "space")) {
1161
1203
  clearHoldTimer();
1162
- // Insert the space that was consumed during hold detection
1204
+ clearReleaseTimer();
1163
1205
  if (ctx?.hasUI) {
1164
1206
  ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
1165
1207
  hideHoldHintWidget();
1166
1208
  }
1167
1209
  spaceDownTime = null;
1168
1210
  spaceConsumed = false;
1169
- // Don't consume this key — let it through
1170
1211
  return undefined;
1171
1212
  }
1172
1213
 
@@ -1187,7 +1228,6 @@ export default function (pi: ExtensionAPI) {
1187
1228
  return undefined;
1188
1229
  }
1189
1230
 
1190
- // Toggle: stop if recording
1191
1231
  if (voiceState === "recording") {
1192
1232
  isHolding = false;
1193
1233
  stopVoiceRecording("btw");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codexstar/pi-listen",
3
- "version": "1.0.15",
3
+ "version": "1.0.16",
4
4
  "description": "Voice input, first-run onboarding, and side-channel BTW conversations for Pi",
5
5
  "type": "module",
6
6
  "keywords": [