@codexstar/pi-listen 1.0.14 → 1.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extensions/voice.ts +121 -43
- package/package.json +1 -1
package/extensions/voice.ts
CHANGED
|
@@ -567,6 +567,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
567
567
|
function voiceCleanup() {
|
|
568
568
|
if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
|
|
569
569
|
clearHoldTimer();
|
|
570
|
+
clearReleaseTimer();
|
|
570
571
|
stopRecordingWidgetAnimation();
|
|
571
572
|
if (activeSession) {
|
|
572
573
|
finalizeSession(activeSession);
|
|
@@ -636,23 +637,31 @@ export default function (pi: ExtensionAPI) {
|
|
|
636
637
|
/** Animated recording indicator with live waveform */
|
|
637
638
|
function showRecordingWidget(target: "editor" | "btw") {
|
|
638
639
|
if (!ctx?.hasUI) return;
|
|
639
|
-
let frame = 0;
|
|
640
|
-
const waveChars = ["▁", "▂", "▃", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
|
|
641
640
|
|
|
642
|
-
//
|
|
641
|
+
// Store initial state — once live transcription arrives,
|
|
642
|
+
// updateLiveTranscriptWidget takes over and we stop the animation.
|
|
643
|
+
(showRecordingWidget as any)._target = target;
|
|
644
|
+
(showRecordingWidget as any)._frame = 0;
|
|
645
|
+
(showRecordingWidget as any)._hasTranscript = false;
|
|
646
|
+
|
|
647
|
+
// Animate the widget every 300ms (only while no transcript is showing)
|
|
643
648
|
const animTimer = setInterval(() => {
|
|
644
|
-
|
|
645
|
-
if (
|
|
646
|
-
|
|
647
|
-
|
|
649
|
+
// Stop animating once live transcript takes over
|
|
650
|
+
if ((showRecordingWidget as any)?._hasTranscript) return;
|
|
651
|
+
|
|
652
|
+
(showRecordingWidget as any)._frame = ((showRecordingWidget as any)._frame || 0) + 1;
|
|
653
|
+
showRecordingWidgetFrame(target, (showRecordingWidget as any)._frame);
|
|
654
|
+
}, 300);
|
|
648
655
|
|
|
649
656
|
// Store the timer so we can clean it up
|
|
650
657
|
(showRecordingWidget as any)._animTimer = animTimer;
|
|
651
658
|
|
|
652
|
-
showRecordingWidgetFrame(target,
|
|
659
|
+
showRecordingWidgetFrame(target, 0);
|
|
653
660
|
}
|
|
654
661
|
|
|
655
|
-
|
|
662
|
+
const waveChars = ["▁", "▂", "▃", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
|
|
663
|
+
|
|
664
|
+
function showRecordingWidgetFrame(target: "editor" | "btw", frame: number) {
|
|
656
665
|
if (!ctx?.hasUI) return;
|
|
657
666
|
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
658
667
|
return {
|
|
@@ -691,7 +700,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
691
700
|
? theme.fg("dim", " Press Ctrl+Shift+B to stop")
|
|
692
701
|
: kittyReleaseDetected
|
|
693
702
|
? theme.fg("dim", " Release SPACE to finalize")
|
|
694
|
-
: theme.fg("dim", "
|
|
703
|
+
: theme.fg("dim", " Release SPACE to stop");
|
|
695
704
|
|
|
696
705
|
const lines = [
|
|
697
706
|
topBorder,
|
|
@@ -717,6 +726,10 @@ export default function (pi: ExtensionAPI) {
|
|
|
717
726
|
function updateLiveTranscriptWidget(interim: string, finals: string[]) {
|
|
718
727
|
if (!ctx?.hasUI) return;
|
|
719
728
|
|
|
729
|
+
// Stop the recording animation — live transcript takes over
|
|
730
|
+
(showRecordingWidget as any)._hasTranscript = true;
|
|
731
|
+
stopRecordingWidgetAnimation();
|
|
732
|
+
|
|
720
733
|
const finalized = finals.join(" ");
|
|
721
734
|
const displayText = finalized + (interim ? (finalized ? " " : "") + interim : "");
|
|
722
735
|
|
|
@@ -743,10 +756,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
743
756
|
const label = theme.bold(theme.fg("accent", " VOICE "));
|
|
744
757
|
const timeStyled = theme.fg("muted", timeStr);
|
|
745
758
|
const titleLine = ` ${dot} ${label} ${timeStyled}`;
|
|
746
|
-
const hint =
|
|
747
|
-
? theme.fg("dim", " Release SPACE to finalize")
|
|
748
|
-
: theme.fg("dim", " Press SPACE again to stop");
|
|
749
|
-
|
|
759
|
+
const hint = theme.fg("dim", " Release SPACE to stop");
|
|
750
760
|
const lines = [topBorder, side(titleLine)];
|
|
751
761
|
|
|
752
762
|
if (!displayText.trim()) {
|
|
@@ -999,19 +1009,27 @@ export default function (pi: ExtensionAPI) {
|
|
|
999
1009
|
// If SPACE is released before the threshold, a regular space character
|
|
1000
1010
|
// is typed into the editor (normal typing behavior).
|
|
1001
1011
|
//
|
|
1002
|
-
//
|
|
1003
|
-
//
|
|
1012
|
+
// KEY DESIGN for non-Kitty terminals (no key-release events):
|
|
1013
|
+
// Holding a key generates rapid press events (~30ms apart). We detect
|
|
1014
|
+
// "release" by watching for the stream of space presses to STOP.
|
|
1015
|
+
// Once the gap exceeds RELEASE_DETECT_MS (200ms), we know the user
|
|
1016
|
+
// lifted their finger and we stop recording.
|
|
1004
1017
|
//
|
|
1005
|
-
//
|
|
1006
|
-
//
|
|
1007
|
-
//
|
|
1008
|
-
//
|
|
1018
|
+
// Flow:
|
|
1019
|
+
// Hold SPACE → rapid presses arrive → first press starts 500ms timer →
|
|
1020
|
+
// timer fires → recording starts → presses keep coming (consumed) →
|
|
1021
|
+
// user releases → presses stop → 200ms silence → auto-stop recording
|
|
1022
|
+
//
|
|
1023
|
+
// Kitty protocol terminals get true key-release events and work natively.
|
|
1009
1024
|
|
|
1010
1025
|
const HOLD_THRESHOLD_MS = 500; // minimum hold time before voice activates
|
|
1026
|
+
const RELEASE_DETECT_MS = 200; // gap in key-repeat that means "released"
|
|
1011
1027
|
let kittyReleaseDetected = false;
|
|
1012
|
-
let spaceDownTime: number | null = null;
|
|
1028
|
+
let spaceDownTime: number | null = null;
|
|
1013
1029
|
let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
|
|
1014
|
-
let spaceConsumed = false;
|
|
1030
|
+
let spaceConsumed = false;
|
|
1031
|
+
let lastSpacePressTime = 0;
|
|
1032
|
+
let releaseDetectTimer: ReturnType<typeof setTimeout> | null = null;
|
|
1015
1033
|
|
|
1016
1034
|
function clearHoldTimer() {
|
|
1017
1035
|
if (holdActivationTimer) {
|
|
@@ -1020,6 +1038,49 @@ export default function (pi: ExtensionAPI) {
|
|
|
1020
1038
|
}
|
|
1021
1039
|
}
|
|
1022
1040
|
|
|
1041
|
+
function clearReleaseTimer() {
|
|
1042
|
+
if (releaseDetectTimer) {
|
|
1043
|
+
clearTimeout(releaseDetectTimer);
|
|
1044
|
+
releaseDetectTimer = null;
|
|
1045
|
+
}
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
/** Called when we detect the user has released SPACE (non-Kitty) */
|
|
1049
|
+
function onSpaceReleaseDetected() {
|
|
1050
|
+
releaseDetectTimer = null;
|
|
1051
|
+
|
|
1052
|
+
// If we're still in the threshold wait (< 500ms), user just tapped space
|
|
1053
|
+
if (spaceDownTime && !spaceConsumed) {
|
|
1054
|
+
clearHoldTimer();
|
|
1055
|
+
spaceDownTime = null;
|
|
1056
|
+
spaceConsumed = false;
|
|
1057
|
+
// Insert a space character
|
|
1058
|
+
if (ctx?.hasUI) {
|
|
1059
|
+
ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1060
|
+
hideHoldHintWidget();
|
|
1061
|
+
}
|
|
1062
|
+
return;
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
// If we're recording, stop
|
|
1066
|
+
if (spaceConsumed && voiceState === "recording") {
|
|
1067
|
+
isHolding = false;
|
|
1068
|
+
spaceConsumed = false;
|
|
1069
|
+
spaceDownTime = null;
|
|
1070
|
+
stopVoiceRecording("editor");
|
|
1071
|
+
}
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
/** Reset the release detection timer — called on every space press */
|
|
1075
|
+
function resetReleaseDetect() {
|
|
1076
|
+
clearReleaseTimer();
|
|
1077
|
+
// If we're in a hold state (threshold pending or recording),
|
|
1078
|
+
// start a timer to detect release
|
|
1079
|
+
if (spaceDownTime || spaceConsumed || voiceState === "recording") {
|
|
1080
|
+
releaseDetectTimer = setTimeout(onSpaceReleaseDetected, RELEASE_DETECT_MS);
|
|
1081
|
+
}
|
|
1082
|
+
}
|
|
1083
|
+
|
|
1023
1084
|
function setupHoldToTalk() {
|
|
1024
1085
|
if (!ctx?.hasUI) return;
|
|
1025
1086
|
|
|
@@ -1034,6 +1095,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1034
1095
|
const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
|
|
1035
1096
|
if (editorText && editorText.trim().length > 0) {
|
|
1036
1097
|
clearHoldTimer();
|
|
1098
|
+
clearReleaseTimer();
|
|
1037
1099
|
spaceDownTime = null;
|
|
1038
1100
|
spaceConsumed = false;
|
|
1039
1101
|
return undefined; // let the default space character through
|
|
@@ -1042,19 +1104,19 @@ export default function (pi: ExtensionAPI) {
|
|
|
1042
1104
|
// ── Kitty key-release ──
|
|
1043
1105
|
if (isKeyRelease(data)) {
|
|
1044
1106
|
kittyReleaseDetected = true;
|
|
1107
|
+
clearReleaseTimer();
|
|
1045
1108
|
|
|
1046
1109
|
// Released before threshold → type a space character
|
|
1047
1110
|
if (spaceDownTime && !spaceConsumed) {
|
|
1048
1111
|
clearHoldTimer();
|
|
1049
1112
|
spaceDownTime = null;
|
|
1050
1113
|
spaceConsumed = false;
|
|
1051
|
-
// Insert a space into editor
|
|
1052
1114
|
if (ctx?.hasUI) ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1053
1115
|
return { consume: true };
|
|
1054
1116
|
}
|
|
1055
1117
|
|
|
1056
1118
|
// Released after threshold → stop recording (true hold-to-talk)
|
|
1057
|
-
if (spaceConsumed &&
|
|
1119
|
+
if (spaceConsumed && voiceState === "recording") {
|
|
1058
1120
|
isHolding = false;
|
|
1059
1121
|
spaceConsumed = false;
|
|
1060
1122
|
spaceDownTime = null;
|
|
@@ -1067,33 +1129,42 @@ export default function (pi: ExtensionAPI) {
|
|
|
1067
1129
|
return undefined;
|
|
1068
1130
|
}
|
|
1069
1131
|
|
|
1070
|
-
// ── Kitty key-repeat: suppress while holding
|
|
1132
|
+
// ── Kitty key-repeat: ALWAYS suppress while holding/recording ──
|
|
1071
1133
|
if (isKeyRepeat(data)) {
|
|
1072
|
-
if (spaceConsumed || isHolding
|
|
1134
|
+
if (spaceDownTime || spaceConsumed || isHolding || voiceState === "recording") {
|
|
1135
|
+
resetReleaseDetect(); // keep resetting — still holding
|
|
1136
|
+
return { consume: true };
|
|
1137
|
+
}
|
|
1073
1138
|
return undefined;
|
|
1074
1139
|
}
|
|
1075
1140
|
|
|
1076
1141
|
// === Key PRESS ===
|
|
1142
|
+
// In non-Kitty terminals, holding a key sends rapid press events.
|
|
1143
|
+
// We use these to detect "still holding" and the gap to detect "released".
|
|
1077
1144
|
|
|
1078
|
-
//
|
|
1079
|
-
|
|
1080
|
-
isHolding = false;
|
|
1081
|
-
spaceConsumed = false;
|
|
1082
|
-
spaceDownTime = null;
|
|
1083
|
-
clearHoldTimer();
|
|
1084
|
-
stopVoiceRecording("editor");
|
|
1085
|
-
return { consume: true };
|
|
1086
|
-
}
|
|
1145
|
+
// Reset release detection — user is still holding
|
|
1146
|
+
resetReleaseDetect();
|
|
1087
1147
|
|
|
1088
1148
|
// If transcribing → ignore
|
|
1089
1149
|
if (voiceState === "transcribing") {
|
|
1090
1150
|
return { consume: true };
|
|
1091
1151
|
}
|
|
1092
1152
|
|
|
1093
|
-
//
|
|
1094
|
-
if (voiceState === "
|
|
1153
|
+
// If already recording → just consume (release detect handles stop)
|
|
1154
|
+
if (voiceState === "recording") {
|
|
1155
|
+
return { consume: true };
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
// If we already started the hold timer, this is a repeat → consume
|
|
1159
|
+
if (spaceDownTime) {
|
|
1160
|
+
return { consume: true };
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
// Idle, first press → start the hold timer
|
|
1164
|
+
if (voiceState === "idle") {
|
|
1095
1165
|
spaceDownTime = Date.now();
|
|
1096
1166
|
spaceConsumed = false;
|
|
1167
|
+
lastSpacePressTime = Date.now();
|
|
1097
1168
|
|
|
1098
1169
|
// Show a subtle "preparing" indicator
|
|
1099
1170
|
if (ctx?.hasUI) {
|
|
@@ -1103,7 +1174,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
1103
1174
|
// After threshold: activate voice recording
|
|
1104
1175
|
holdActivationTimer = setTimeout(() => {
|
|
1105
1176
|
holdActivationTimer = null;
|
|
1106
|
-
// Double-check: still idle, still holding, editor still empty
|
|
1107
1177
|
const currentText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
|
|
1108
1178
|
if (voiceState === "idle" && spaceDownTime && !(currentText && currentText.trim().length > 0)) {
|
|
1109
1179
|
spaceConsumed = true;
|
|
@@ -1121,7 +1191,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1121
1191
|
}
|
|
1122
1192
|
}, HOLD_THRESHOLD_MS);
|
|
1123
1193
|
|
|
1124
|
-
return { consume: true };
|
|
1194
|
+
return { consume: true };
|
|
1125
1195
|
}
|
|
1126
1196
|
|
|
1127
1197
|
if (isHolding || spaceConsumed) return { consume: true };
|
|
@@ -1131,14 +1201,13 @@ export default function (pi: ExtensionAPI) {
|
|
|
1131
1201
|
// ── Any other key while holding space (pre-threshold) → cancel hold, insert space ──
|
|
1132
1202
|
if (spaceDownTime && !spaceConsumed && !matchesKey(data, "space")) {
|
|
1133
1203
|
clearHoldTimer();
|
|
1134
|
-
|
|
1204
|
+
clearReleaseTimer();
|
|
1135
1205
|
if (ctx?.hasUI) {
|
|
1136
1206
|
ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1137
1207
|
hideHoldHintWidget();
|
|
1138
1208
|
}
|
|
1139
1209
|
spaceDownTime = null;
|
|
1140
1210
|
spaceConsumed = false;
|
|
1141
|
-
// Don't consume this key — let it through
|
|
1142
1211
|
return undefined;
|
|
1143
1212
|
}
|
|
1144
1213
|
|
|
@@ -1159,7 +1228,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
1159
1228
|
return undefined;
|
|
1160
1229
|
}
|
|
1161
1230
|
|
|
1162
|
-
// Toggle: stop if recording
|
|
1163
1231
|
if (voiceState === "recording") {
|
|
1164
1232
|
isHolding = false;
|
|
1165
1233
|
stopVoiceRecording("btw");
|
|
@@ -1326,9 +1394,19 @@ export default function (pi: ExtensionAPI) {
|
|
|
1326
1394
|
return;
|
|
1327
1395
|
}
|
|
1328
1396
|
if (voiceState === "idle") {
|
|
1329
|
-
|
|
1397
|
+
// Direct start — bypass hold threshold
|
|
1398
|
+
spaceConsumed = true;
|
|
1399
|
+
isHolding = true;
|
|
1400
|
+
const ok = await startVoiceRecording("editor");
|
|
1401
|
+
if (!ok) {
|
|
1402
|
+
isHolding = false;
|
|
1403
|
+
spaceConsumed = false;
|
|
1404
|
+
}
|
|
1330
1405
|
} else if (voiceState === "recording") {
|
|
1331
1406
|
isHolding = false;
|
|
1407
|
+
spaceConsumed = false;
|
|
1408
|
+
spaceDownTime = null;
|
|
1409
|
+
clearHoldTimer();
|
|
1332
1410
|
await stopVoiceRecording("editor");
|
|
1333
1411
|
}
|
|
1334
1412
|
},
|