@codexstar/pi-listen 1.0.15 → 1.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extensions/voice.ts +85 -51
- package/package.json +1 -1
package/extensions/voice.ts
CHANGED
|
@@ -567,6 +567,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
567
567
|
function voiceCleanup() {
|
|
568
568
|
if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
|
|
569
569
|
clearHoldTimer();
|
|
570
|
+
clearReleaseTimer();
|
|
570
571
|
stopRecordingWidgetAnimation();
|
|
571
572
|
if (activeSession) {
|
|
572
573
|
finalizeSession(activeSession);
|
|
@@ -699,7 +700,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
699
700
|
? theme.fg("dim", " Press Ctrl+Shift+B to stop")
|
|
700
701
|
: kittyReleaseDetected
|
|
701
702
|
? theme.fg("dim", " Release SPACE to finalize")
|
|
702
|
-
: theme.fg("dim", "
|
|
703
|
+
: theme.fg("dim", " Release SPACE to stop");
|
|
703
704
|
|
|
704
705
|
const lines = [
|
|
705
706
|
topBorder,
|
|
@@ -755,10 +756,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
755
756
|
const label = theme.bold(theme.fg("accent", " VOICE "));
|
|
756
757
|
const timeStyled = theme.fg("muted", timeStr);
|
|
757
758
|
const titleLine = ` ${dot} ${label} ${timeStyled}`;
|
|
758
|
-
const hint =
|
|
759
|
-
? theme.fg("dim", " Release SPACE to finalize")
|
|
760
|
-
: theme.fg("dim", " Press Ctrl+Shift+V to stop");
|
|
761
|
-
|
|
759
|
+
const hint = theme.fg("dim", " Release SPACE to stop");
|
|
762
760
|
const lines = [topBorder, side(titleLine)];
|
|
763
761
|
|
|
764
762
|
if (!displayText.trim()) {
|
|
@@ -1011,25 +1009,27 @@ export default function (pi: ExtensionAPI) {
|
|
|
1011
1009
|
// If SPACE is released before the threshold, a regular space character
|
|
1012
1010
|
// is typed into the editor (normal typing behavior).
|
|
1013
1011
|
//
|
|
1014
|
-
//
|
|
1015
|
-
//
|
|
1012
|
+
// KEY DESIGN for non-Kitty terminals (no key-release events):
|
|
1013
|
+
// Holding a key generates rapid press events (~30ms apart). We detect
|
|
1014
|
+
// "release" by watching for the stream of space presses to STOP.
|
|
1015
|
+
// Once the gap exceeds RELEASE_DETECT_MS (200ms), we know the user
|
|
1016
|
+
// lifted their finger and we stop recording.
|
|
1016
1017
|
//
|
|
1017
|
-
//
|
|
1018
|
-
//
|
|
1019
|
-
//
|
|
1020
|
-
//
|
|
1018
|
+
// Flow:
|
|
1019
|
+
// Hold SPACE → rapid presses arrive → first press starts 500ms timer →
|
|
1020
|
+
// timer fires → recording starts → presses keep coming (consumed) →
|
|
1021
|
+
// user releases → presses stop → 200ms silence → auto-stop recording
|
|
1021
1022
|
//
|
|
1022
|
-
//
|
|
1023
|
-
// rapid press events (key-repeat). We CANNOT use "second space press
|
|
1024
|
-
// = stop" because repeats arrive while holding. Instead, non-Kitty
|
|
1025
|
-
// users must use Ctrl+Shift+V to stop.
|
|
1023
|
+
// Kitty protocol terminals get true key-release events and work natively.
|
|
1026
1024
|
|
|
1027
1025
|
const HOLD_THRESHOLD_MS = 500; // minimum hold time before voice activates
|
|
1026
|
+
const RELEASE_DETECT_MS = 200; // gap in key-repeat that means "released"
|
|
1028
1027
|
let kittyReleaseDetected = false;
|
|
1029
|
-
let spaceDownTime: number | null = null;
|
|
1028
|
+
let spaceDownTime: number | null = null;
|
|
1030
1029
|
let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
|
|
1031
|
-
let spaceConsumed = false;
|
|
1032
|
-
let lastSpacePressTime = 0;
|
|
1030
|
+
let spaceConsumed = false;
|
|
1031
|
+
let lastSpacePressTime = 0;
|
|
1032
|
+
let releaseDetectTimer: ReturnType<typeof setTimeout> | null = null;
|
|
1033
1033
|
|
|
1034
1034
|
function clearHoldTimer() {
|
|
1035
1035
|
if (holdActivationTimer) {
|
|
@@ -1038,6 +1038,49 @@ export default function (pi: ExtensionAPI) {
|
|
|
1038
1038
|
}
|
|
1039
1039
|
}
|
|
1040
1040
|
|
|
1041
|
+
function clearReleaseTimer() {
|
|
1042
|
+
if (releaseDetectTimer) {
|
|
1043
|
+
clearTimeout(releaseDetectTimer);
|
|
1044
|
+
releaseDetectTimer = null;
|
|
1045
|
+
}
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
/** Called when we detect the user has released SPACE (non-Kitty) */
|
|
1049
|
+
function onSpaceReleaseDetected() {
|
|
1050
|
+
releaseDetectTimer = null;
|
|
1051
|
+
|
|
1052
|
+
// If we're still in the threshold wait (< 500ms), user just tapped space
|
|
1053
|
+
if (spaceDownTime && !spaceConsumed) {
|
|
1054
|
+
clearHoldTimer();
|
|
1055
|
+
spaceDownTime = null;
|
|
1056
|
+
spaceConsumed = false;
|
|
1057
|
+
// Insert a space character
|
|
1058
|
+
if (ctx?.hasUI) {
|
|
1059
|
+
ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1060
|
+
hideHoldHintWidget();
|
|
1061
|
+
}
|
|
1062
|
+
return;
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
// If we're recording, stop
|
|
1066
|
+
if (spaceConsumed && voiceState === "recording") {
|
|
1067
|
+
isHolding = false;
|
|
1068
|
+
spaceConsumed = false;
|
|
1069
|
+
spaceDownTime = null;
|
|
1070
|
+
stopVoiceRecording("editor");
|
|
1071
|
+
}
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
/** Reset the release detection timer — called on every space press */
|
|
1075
|
+
function resetReleaseDetect() {
|
|
1076
|
+
clearReleaseTimer();
|
|
1077
|
+
// If we're in a hold state (threshold pending or recording),
|
|
1078
|
+
// start a timer to detect release
|
|
1079
|
+
if (spaceDownTime || spaceConsumed || voiceState === "recording") {
|
|
1080
|
+
releaseDetectTimer = setTimeout(onSpaceReleaseDetected, RELEASE_DETECT_MS);
|
|
1081
|
+
}
|
|
1082
|
+
}
|
|
1083
|
+
|
|
1041
1084
|
function setupHoldToTalk() {
|
|
1042
1085
|
if (!ctx?.hasUI) return;
|
|
1043
1086
|
|
|
@@ -1048,25 +1091,20 @@ export default function (pi: ExtensionAPI) {
|
|
|
1048
1091
|
|
|
1049
1092
|
// ── SPACE handling ──
|
|
1050
1093
|
if (matchesKey(data, "space")) {
|
|
1051
|
-
//
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
clearHoldTimer();
|
|
1055
|
-
spaceDownTime = null;
|
|
1056
|
-
spaceConsumed = false;
|
|
1057
|
-
return undefined; // let the default space character through
|
|
1058
|
-
}
|
|
1094
|
+
// Check editor content — hold-to-talk still works even with content,
|
|
1095
|
+
// but a quick tap types a space as normal
|
|
1096
|
+
const editorHasContent = !!(ctx?.hasUI && ctx.ui.getEditorText()?.trim().length);
|
|
1059
1097
|
|
|
1060
1098
|
// ── Kitty key-release ──
|
|
1061
1099
|
if (isKeyRelease(data)) {
|
|
1062
1100
|
kittyReleaseDetected = true;
|
|
1101
|
+
clearReleaseTimer();
|
|
1063
1102
|
|
|
1064
1103
|
// Released before threshold → type a space character
|
|
1065
1104
|
if (spaceDownTime && !spaceConsumed) {
|
|
1066
1105
|
clearHoldTimer();
|
|
1067
1106
|
spaceDownTime = null;
|
|
1068
1107
|
spaceConsumed = false;
|
|
1069
|
-
// Insert a space into editor
|
|
1070
1108
|
if (ctx?.hasUI) ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1071
1109
|
return { consume: true };
|
|
1072
1110
|
}
|
|
@@ -1088,40 +1126,39 @@ export default function (pi: ExtensionAPI) {
|
|
|
1088
1126
|
// ── Kitty key-repeat: ALWAYS suppress while holding/recording ──
|
|
1089
1127
|
if (isKeyRepeat(data)) {
|
|
1090
1128
|
if (spaceDownTime || spaceConsumed || isHolding || voiceState === "recording") {
|
|
1129
|
+
resetReleaseDetect(); // keep resetting — still holding
|
|
1091
1130
|
return { consume: true };
|
|
1092
1131
|
}
|
|
1093
1132
|
return undefined;
|
|
1094
1133
|
}
|
|
1095
1134
|
|
|
1096
|
-
// === Key PRESS
|
|
1135
|
+
// === Key PRESS ===
|
|
1136
|
+
// In non-Kitty terminals, holding a key sends rapid press events.
|
|
1137
|
+
// We use these to detect "still holding" and the gap to detect "released".
|
|
1097
1138
|
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
// Debounce: ignore rapid presses within 100ms (terminal key-repeat
|
|
1101
|
-
// generates press events in non-Kitty terminals since there's no
|
|
1102
|
-
// key-repeat flag — they all look like fresh presses)
|
|
1103
|
-
if (now - lastSpacePressTime < 100) {
|
|
1104
|
-
lastSpacePressTime = now;
|
|
1105
|
-
return { consume: true }; // suppress repeat
|
|
1106
|
-
}
|
|
1107
|
-
lastSpacePressTime = now;
|
|
1139
|
+
// Reset release detection — user is still holding
|
|
1140
|
+
resetReleaseDetect();
|
|
1108
1141
|
|
|
1109
1142
|
// If transcribing → ignore
|
|
1110
1143
|
if (voiceState === "transcribing") {
|
|
1111
1144
|
return { consume: true };
|
|
1112
1145
|
}
|
|
1113
1146
|
|
|
1114
|
-
// If already recording
|
|
1115
|
-
// In non-Kitty, we can't safely detect "real second press" vs
|
|
1116
|
-
// key-repeat. Use Ctrl+Shift+V instead. Just consume.
|
|
1147
|
+
// If already recording → just consume (release detect handles stop)
|
|
1117
1148
|
if (voiceState === "recording") {
|
|
1118
1149
|
return { consume: true };
|
|
1119
1150
|
}
|
|
1120
1151
|
|
|
1121
|
-
//
|
|
1122
|
-
if (
|
|
1123
|
-
|
|
1152
|
+
// If we already started the hold timer, this is a repeat → consume
|
|
1153
|
+
if (spaceDownTime) {
|
|
1154
|
+
return { consume: true };
|
|
1155
|
+
}
|
|
1156
|
+
|
|
1157
|
+
// Idle, first press → start the hold timer
|
|
1158
|
+
if (voiceState === "idle") {
|
|
1159
|
+
spaceDownTime = Date.now();
|
|
1124
1160
|
spaceConsumed = false;
|
|
1161
|
+
lastSpacePressTime = Date.now();
|
|
1125
1162
|
|
|
1126
1163
|
// Show a subtle "preparing" indicator
|
|
1127
1164
|
if (ctx?.hasUI) {
|
|
@@ -1129,11 +1166,10 @@ export default function (pi: ExtensionAPI) {
|
|
|
1129
1166
|
}
|
|
1130
1167
|
|
|
1131
1168
|
// After threshold: activate voice recording
|
|
1169
|
+
// Works regardless of whether editor has content — hold always activates voice
|
|
1132
1170
|
holdActivationTimer = setTimeout(() => {
|
|
1133
1171
|
holdActivationTimer = null;
|
|
1134
|
-
|
|
1135
|
-
const currentText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
|
|
1136
|
-
if (voiceState === "idle" && spaceDownTime && !(currentText && currentText.trim().length > 0)) {
|
|
1172
|
+
if (voiceState === "idle" && spaceDownTime) {
|
|
1137
1173
|
spaceConsumed = true;
|
|
1138
1174
|
isHolding = true;
|
|
1139
1175
|
startVoiceRecording("editor").then((ok) => {
|
|
@@ -1149,7 +1185,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1149
1185
|
}
|
|
1150
1186
|
}, HOLD_THRESHOLD_MS);
|
|
1151
1187
|
|
|
1152
|
-
return { consume: true };
|
|
1188
|
+
return { consume: true };
|
|
1153
1189
|
}
|
|
1154
1190
|
|
|
1155
1191
|
if (isHolding || spaceConsumed) return { consume: true };
|
|
@@ -1159,14 +1195,13 @@ export default function (pi: ExtensionAPI) {
|
|
|
1159
1195
|
// ── Any other key while holding space (pre-threshold) → cancel hold, insert space ──
|
|
1160
1196
|
if (spaceDownTime && !spaceConsumed && !matchesKey(data, "space")) {
|
|
1161
1197
|
clearHoldTimer();
|
|
1162
|
-
|
|
1198
|
+
clearReleaseTimer();
|
|
1163
1199
|
if (ctx?.hasUI) {
|
|
1164
1200
|
ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1165
1201
|
hideHoldHintWidget();
|
|
1166
1202
|
}
|
|
1167
1203
|
spaceDownTime = null;
|
|
1168
1204
|
spaceConsumed = false;
|
|
1169
|
-
// Don't consume this key — let it through
|
|
1170
1205
|
return undefined;
|
|
1171
1206
|
}
|
|
1172
1207
|
|
|
@@ -1187,7 +1222,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
1187
1222
|
return undefined;
|
|
1188
1223
|
}
|
|
1189
1224
|
|
|
1190
|
-
// Toggle: stop if recording
|
|
1191
1225
|
if (voiceState === "recording") {
|
|
1192
1226
|
isHolding = false;
|
|
1193
1227
|
stopVoiceRecording("btw");
|