@codexstar/pi-listen 1.0.15 → 1.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/extensions/voice.ts +85 -51
  2. package/package.json +1 -1
@@ -567,6 +567,7 @@ export default function (pi: ExtensionAPI) {
567
567
  function voiceCleanup() {
568
568
  if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
569
569
  clearHoldTimer();
570
+ clearReleaseTimer();
570
571
  stopRecordingWidgetAnimation();
571
572
  if (activeSession) {
572
573
  finalizeSession(activeSession);
@@ -699,7 +700,7 @@ export default function (pi: ExtensionAPI) {
699
700
  ? theme.fg("dim", " Press Ctrl+Shift+B to stop")
700
701
  : kittyReleaseDetected
701
702
  ? theme.fg("dim", " Release SPACE to finalize")
702
- : theme.fg("dim", " Press Ctrl+Shift+V to stop");
703
+ : theme.fg("dim", " Release SPACE to stop");
703
704
 
704
705
  const lines = [
705
706
  topBorder,
@@ -755,10 +756,7 @@ export default function (pi: ExtensionAPI) {
755
756
  const label = theme.bold(theme.fg("accent", " VOICE "));
756
757
  const timeStyled = theme.fg("muted", timeStr);
757
758
  const titleLine = ` ${dot} ${label} ${timeStyled}`;
758
- const hint = kittyReleaseDetected
759
- ? theme.fg("dim", " Release SPACE to finalize")
760
- : theme.fg("dim", " Press Ctrl+Shift+V to stop");
761
-
759
+ const hint = theme.fg("dim", " Release SPACE to stop");
762
760
  const lines = [topBorder, side(titleLine)];
763
761
 
764
762
  if (!displayText.trim()) {
@@ -1011,25 +1009,27 @@ export default function (pi: ExtensionAPI) {
1011
1009
  // If SPACE is released before the threshold, a regular space character
1012
1010
  // is typed into the editor (normal typing behavior).
1013
1011
  //
1014
- // This prevents accidental voice activation when typing and matches
1015
- // Claude Code's hold-to-talk UX pattern.
1012
+ // KEY DESIGN for non-Kitty terminals (no key-release events):
1013
+ // Holding a key generates rapid press events (~30ms apart). We detect
1014
+ // "release" by watching for the stream of space presses to STOP.
1015
+ // Once the gap exceeds RELEASE_DETECT_MS (200ms), we know the user
1016
+ // lifted their finger and we stop recording.
1016
1017
  //
1017
- // For Kitty protocol terminals: hold → wait threshold → activate →
1018
- // releasestop recording. True hold-to-talk.
1019
- // For non-Kitty terminals: holdwait threshold activate
1020
- // Ctrl+Shift+V or /voice stop to end recording.
1018
+ // Flow:
1019
+ // Hold SPACE rapid presses arrive → first press starts 500ms timer →
1020
+ // timer fires recording startspresses keep coming (consumed)
1021
+ // user releases presses stop 200ms silence → auto-stop recording
1021
1022
  //
1022
- // KEY INSIGHT: In non-Kitty terminals, holding a key generates
1023
- // rapid press events (key-repeat). We CANNOT use "second space press
1024
- // = stop" because repeats arrive while holding. Instead, non-Kitty
1025
- // users must use Ctrl+Shift+V to stop.
1023
+ // Kitty protocol terminals get true key-release events and work natively.
1026
1024
 
1027
1025
  const HOLD_THRESHOLD_MS = 500; // minimum hold time before voice activates
1026
+ const RELEASE_DETECT_MS = 200; // gap in key-repeat that means "released"
1028
1027
  let kittyReleaseDetected = false;
1029
- let spaceDownTime: number | null = null; // timestamp when SPACE was first pressed
1028
+ let spaceDownTime: number | null = null;
1030
1029
  let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
1031
- let spaceConsumed = false; // whether we've committed to voice (past threshold)
1032
- let lastSpacePressTime = 0; // debounce rapid space presses from key-repeat
1030
+ let spaceConsumed = false;
1031
+ let lastSpacePressTime = 0;
1032
+ let releaseDetectTimer: ReturnType<typeof setTimeout> | null = null;
1033
1033
 
1034
1034
  function clearHoldTimer() {
1035
1035
  if (holdActivationTimer) {
@@ -1038,6 +1038,49 @@ export default function (pi: ExtensionAPI) {
1038
1038
  }
1039
1039
  }
1040
1040
 
1041
+ function clearReleaseTimer() {
1042
+ if (releaseDetectTimer) {
1043
+ clearTimeout(releaseDetectTimer);
1044
+ releaseDetectTimer = null;
1045
+ }
1046
+ }
1047
+
1048
+ /** Called when we detect the user has released SPACE (non-Kitty) */
1049
+ function onSpaceReleaseDetected() {
1050
+ releaseDetectTimer = null;
1051
+
1052
+ // If we're still in the threshold wait (< 500ms), user just tapped space
1053
+ if (spaceDownTime && !spaceConsumed) {
1054
+ clearHoldTimer();
1055
+ spaceDownTime = null;
1056
+ spaceConsumed = false;
1057
+ // Insert a space character
1058
+ if (ctx?.hasUI) {
1059
+ ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
1060
+ hideHoldHintWidget();
1061
+ }
1062
+ return;
1063
+ }
1064
+
1065
+ // If we're recording, stop
1066
+ if (spaceConsumed && voiceState === "recording") {
1067
+ isHolding = false;
1068
+ spaceConsumed = false;
1069
+ spaceDownTime = null;
1070
+ stopVoiceRecording("editor");
1071
+ }
1072
+ }
1073
+
1074
+ /** Reset the release detection timer — called on every space press */
1075
+ function resetReleaseDetect() {
1076
+ clearReleaseTimer();
1077
+ // If we're in a hold state (threshold pending or recording),
1078
+ // start a timer to detect release
1079
+ if (spaceDownTime || spaceConsumed || voiceState === "recording") {
1080
+ releaseDetectTimer = setTimeout(onSpaceReleaseDetected, RELEASE_DETECT_MS);
1081
+ }
1082
+ }
1083
+
1041
1084
  function setupHoldToTalk() {
1042
1085
  if (!ctx?.hasUI) return;
1043
1086
 
@@ -1048,25 +1091,20 @@ export default function (pi: ExtensionAPI) {
1048
1091
 
1049
1092
  // ── SPACE handling ──
1050
1093
  if (matchesKey(data, "space")) {
1051
- // RULE: If editor has content, SPACE always types a space never voice
1052
- const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
1053
- if (editorText && editorText.trim().length > 0) {
1054
- clearHoldTimer();
1055
- spaceDownTime = null;
1056
- spaceConsumed = false;
1057
- return undefined; // let the default space character through
1058
- }
1094
+ // Check editor content hold-to-talk still works even with content,
1095
+ // but a quick tap types a space as normal
1096
+ const editorHasContent = !!(ctx?.hasUI && ctx.ui.getEditorText()?.trim().length);
1059
1097
 
1060
1098
  // ── Kitty key-release ──
1061
1099
  if (isKeyRelease(data)) {
1062
1100
  kittyReleaseDetected = true;
1101
+ clearReleaseTimer();
1063
1102
 
1064
1103
  // Released before threshold → type a space character
1065
1104
  if (spaceDownTime && !spaceConsumed) {
1066
1105
  clearHoldTimer();
1067
1106
  spaceDownTime = null;
1068
1107
  spaceConsumed = false;
1069
- // Insert a space into editor
1070
1108
  if (ctx?.hasUI) ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
1071
1109
  return { consume: true };
1072
1110
  }
@@ -1088,40 +1126,39 @@ export default function (pi: ExtensionAPI) {
1088
1126
  // ── Kitty key-repeat: ALWAYS suppress while holding/recording ──
1089
1127
  if (isKeyRepeat(data)) {
1090
1128
  if (spaceDownTime || spaceConsumed || isHolding || voiceState === "recording") {
1129
+ resetReleaseDetect(); // keep resetting — still holding
1091
1130
  return { consume: true };
1092
1131
  }
1093
1132
  return undefined;
1094
1133
  }
1095
1134
 
1096
- // === Key PRESS (initial press only) ===
1135
+ // === Key PRESS ===
1136
+ // In non-Kitty terminals, holding a key sends rapid press events.
1137
+ // We use these to detect "still holding" and the gap to detect "released".
1097
1138
 
1098
- const now = Date.now();
1099
-
1100
- // Debounce: ignore rapid presses within 100ms (terminal key-repeat
1101
- // generates press events in non-Kitty terminals since there's no
1102
- // key-repeat flag — they all look like fresh presses)
1103
- if (now - lastSpacePressTime < 100) {
1104
- lastSpacePressTime = now;
1105
- return { consume: true }; // suppress repeat
1106
- }
1107
- lastSpacePressTime = now;
1139
+ // Reset release detection — user is still holding
1140
+ resetReleaseDetect();
1108
1141
 
1109
1142
  // If transcribing → ignore
1110
1143
  if (voiceState === "transcribing") {
1111
1144
  return { consume: true };
1112
1145
  }
1113
1146
 
1114
- // If already recording: In Kitty mode, release handles stop.
1115
- // In non-Kitty, we can't safely detect "real second press" vs
1116
- // key-repeat. Use Ctrl+Shift+V instead. Just consume.
1147
+ // If already recording just consume (release detect handles stop)
1117
1148
  if (voiceState === "recording") {
1118
1149
  return { consume: true };
1119
1150
  }
1120
1151
 
1121
- // Idle start the hold timer
1122
- if (voiceState === "idle" && !spaceDownTime) {
1123
- spaceDownTime = now;
1152
+ // If we already started the hold timer, this is a repeat → consume
1153
+ if (spaceDownTime) {
1154
+ return { consume: true };
1155
+ }
1156
+
1157
+ // Idle, first press → start the hold timer
1158
+ if (voiceState === "idle") {
1159
+ spaceDownTime = Date.now();
1124
1160
  spaceConsumed = false;
1161
+ lastSpacePressTime = Date.now();
1125
1162
 
1126
1163
  // Show a subtle "preparing" indicator
1127
1164
  if (ctx?.hasUI) {
@@ -1129,11 +1166,10 @@ export default function (pi: ExtensionAPI) {
1129
1166
  }
1130
1167
 
1131
1168
  // After threshold: activate voice recording
1169
+ // Works regardless of whether editor has content — hold always activates voice
1132
1170
  holdActivationTimer = setTimeout(() => {
1133
1171
  holdActivationTimer = null;
1134
- // Double-check: still idle, still holding, editor still empty
1135
- const currentText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
1136
- if (voiceState === "idle" && spaceDownTime && !(currentText && currentText.trim().length > 0)) {
1172
+ if (voiceState === "idle" && spaceDownTime) {
1137
1173
  spaceConsumed = true;
1138
1174
  isHolding = true;
1139
1175
  startVoiceRecording("editor").then((ok) => {
@@ -1149,7 +1185,7 @@ export default function (pi: ExtensionAPI) {
1149
1185
  }
1150
1186
  }, HOLD_THRESHOLD_MS);
1151
1187
 
1152
- return { consume: true }; // consume now — we'll insert space on early release
1188
+ return { consume: true };
1153
1189
  }
1154
1190
 
1155
1191
  if (isHolding || spaceConsumed) return { consume: true };
@@ -1159,14 +1195,13 @@ export default function (pi: ExtensionAPI) {
1159
1195
  // ── Any other key while holding space (pre-threshold) → cancel hold, insert space ──
1160
1196
  if (spaceDownTime && !spaceConsumed && !matchesKey(data, "space")) {
1161
1197
  clearHoldTimer();
1162
- // Insert the space that was consumed during hold detection
1198
+ clearReleaseTimer();
1163
1199
  if (ctx?.hasUI) {
1164
1200
  ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
1165
1201
  hideHoldHintWidget();
1166
1202
  }
1167
1203
  spaceDownTime = null;
1168
1204
  spaceConsumed = false;
1169
- // Don't consume this key — let it through
1170
1205
  return undefined;
1171
1206
  }
1172
1207
 
@@ -1187,7 +1222,6 @@ export default function (pi: ExtensionAPI) {
1187
1222
  return undefined;
1188
1223
  }
1189
1224
 
1190
- // Toggle: stop if recording
1191
1225
  if (voiceState === "recording") {
1192
1226
  isHolding = false;
1193
1227
  stopVoiceRecording("btw");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codexstar/pi-listen",
3
- "version": "1.0.15",
3
+ "version": "1.0.17",
4
4
  "description": "Voice input, first-run onboarding, and side-channel BTW conversations for Pi",
5
5
  "type": "module",
6
6
  "keywords": [