@codexstar/pi-listen 1.0.17 → 1.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extensions/voice.ts +415 -336
- package/package.json +1 -1
package/extensions/voice.ts
CHANGED
|
@@ -1,29 +1,48 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* pi-voice —
|
|
2
|
+
* pi-voice — Enterprise-grade voice STT for Pi CLI.
|
|
3
3
|
*
|
|
4
4
|
* Architecture (modeled after Claude Code's voice pipeline):
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
5
|
+
*
|
|
6
|
+
* STATE MACHINE
|
|
7
|
+
* ─────────────
|
|
8
|
+
* idle → warmup → recording → finalizing → idle
|
|
9
|
+
* ↑ │
|
|
10
|
+
* └─────────┘ (rapid re-press recovery)
|
|
11
|
+
*
|
|
12
|
+
* warmup: User holds SPACE for ≥ HOLD_THRESHOLD_MS (500ms).
|
|
13
|
+
* A "keep holding…" hint is shown. If released before
|
|
14
|
+
* the threshold, a normal space character is typed.
|
|
15
|
+
*
|
|
16
|
+
* recording: SoX captures PCM → Deepgram WebSocket streaming.
|
|
17
|
+
* Live interim + final transcripts update the widget.
|
|
18
|
+
* Release SPACE (or press again in toggle mode) → stop.
|
|
19
|
+
*
|
|
20
|
+
* finalizing: CloseStream sent to Deepgram. Waiting for final
|
|
21
|
+
* transcript. Safety timeout auto-completes.
|
|
22
|
+
*
|
|
23
|
+
* HOLD-TO-TALK DETECTION (non-Kitty terminals)
|
|
24
|
+
* ─────────────────────────────────────────────
|
|
25
|
+
* Holding a key sends rapid key-press events (~30ms apart).
|
|
26
|
+
* "Release" is detected when the gap between presses exceeds
|
|
27
|
+
* RELEASE_DETECT_MS (150ms).
|
|
28
|
+
*
|
|
29
|
+
* ENTERPRISE FALLBACKS
|
|
30
|
+
* ────────────────────
|
|
31
|
+
* • Session corruption guard: new recording request during
|
|
32
|
+
* finalizing automatically cancels the stale session first.
|
|
33
|
+
* • Transient failure retry: on WebSocket error during rapid
|
|
34
|
+
* push-to-talk re-press, auto-retry once after 300ms.
|
|
35
|
+
* • Stale transcript cleanup: any prior transcript is cleared
|
|
36
|
+
* before new recording begins.
|
|
37
|
+
* • Silence vs. no-speech: distinguishes "mic captured silence"
|
|
38
|
+
* from "no speech detected" with distinct user messages.
|
|
12
39
|
*
|
|
13
40
|
* Activation:
|
|
14
|
-
* - Hold SPACE (
|
|
15
|
-
* - Ctrl+Shift+V → toggle start/stop (
|
|
41
|
+
* - Hold SPACE (≥500ms) → release to finalize
|
|
42
|
+
* - Ctrl+Shift+V → toggle start/stop (always works)
|
|
16
43
|
* - Ctrl+Shift+B → hold to record → auto-send as /btw
|
|
17
44
|
*
|
|
18
|
-
* Config in ~/.pi/agent/settings.json:
|
|
19
|
-
* {
|
|
20
|
-
* "voice": {
|
|
21
|
-
* "enabled": true,
|
|
22
|
-
* "language": "en",
|
|
23
|
-
* "backend": "deepgram",
|
|
24
|
-
* "model": "nova-3"
|
|
25
|
-
* }
|
|
26
|
-
* }
|
|
45
|
+
* Config in ~/.pi/agent/settings.json under "voice": { ... }
|
|
27
46
|
*/
|
|
28
47
|
|
|
29
48
|
import type {
|
|
@@ -57,7 +76,14 @@ import { buildProvisioningPlan } from "./voice/install";
|
|
|
57
76
|
|
|
58
77
|
// ─── Types ───────────────────────────────────────────────────────────────────
|
|
59
78
|
|
|
60
|
-
|
|
79
|
+
/**
|
|
80
|
+
* Voice state machine — strict transitions only:
|
|
81
|
+
* idle → warmup → recording → finalizing → idle
|
|
82
|
+
* warmup → idle (released before threshold)
|
|
83
|
+
* recording → idle (on error)
|
|
84
|
+
* finalizing → idle (on completion or timeout)
|
|
85
|
+
*/
|
|
86
|
+
type VoiceState = "idle" | "warmup" | "recording" | "finalizing";
|
|
61
87
|
|
|
62
88
|
interface BtwExchange {
|
|
63
89
|
question: string;
|
|
@@ -76,7 +102,14 @@ const DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
|
76
102
|
const KEEPALIVE_INTERVAL_MS = 8000;
|
|
77
103
|
const FINALIZE_SAFETY_TIMEOUT_MS = 5000;
|
|
78
104
|
const FINALIZE_NO_DATA_TIMEOUT_MS = 1500;
|
|
79
|
-
const MAX_RECORDING_SECS = 120;
|
|
105
|
+
const MAX_RECORDING_SECS = 120;
|
|
106
|
+
|
|
107
|
+
// Hold-to-talk timing
|
|
108
|
+
const HOLD_THRESHOLD_MS = 500; // Must hold for this long before activation
|
|
109
|
+
const RELEASE_DETECT_MS = 150; // Gap in key-repeat → "released"
|
|
110
|
+
const RETRY_DELAY_MS = 300; // Auto-retry on transient failure during rapid re-press
|
|
111
|
+
const MAX_RETRY_ATTEMPTS = 1; // Max retries per activation attempt
|
|
112
|
+
const CORRUPTION_GUARD_MS = 200; // Min gap between stop and restart
|
|
80
113
|
|
|
81
114
|
const EXT_DIR = path.dirname(new URL(import.meta.url).pathname);
|
|
82
115
|
const PROJECT_ROOT = path.join(EXT_DIR, "..");
|
|
@@ -264,25 +297,17 @@ async function transcribeAudioFile(
|
|
|
264
297
|
interface StreamingSession {
|
|
265
298
|
ws: WebSocket;
|
|
266
299
|
recProcess: ChildProcess;
|
|
267
|
-
interimText: string;
|
|
268
|
-
finalizedParts: string[];
|
|
300
|
+
interimText: string;
|
|
301
|
+
finalizedParts: string[];
|
|
269
302
|
keepAliveTimer: ReturnType<typeof setInterval> | null;
|
|
270
303
|
closed: boolean;
|
|
304
|
+
hadAudioData: boolean; // Track if we received any audio data
|
|
305
|
+
hadSpeech: boolean; // Track if Deepgram detected any speech
|
|
271
306
|
onTranscript: (interim: string, finals: string[]) => void;
|
|
272
|
-
onDone: (fullText: string) => void;
|
|
307
|
+
onDone: (fullText: string, meta: { hadAudio: boolean; hadSpeech: boolean }) => void;
|
|
273
308
|
onError: (err: string) => void;
|
|
274
309
|
}
|
|
275
310
|
|
|
276
|
-
function getDeepgramApiKey(): string | null {
|
|
277
|
-
// Priority: env var → config file → null
|
|
278
|
-
return process.env.DEEPGRAM_API_KEY || null;
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
/**
|
|
282
|
-
* Resolve the Deepgram API key from all sources:
|
|
283
|
-
* 1. process.env.DEEPGRAM_API_KEY (shell)
|
|
284
|
-
* 2. config.deepgramApiKey (settings.json, persisted at setup time)
|
|
285
|
-
*/
|
|
286
311
|
function resolveDeepgramApiKey(config: VoiceConfig): string | null {
|
|
287
312
|
return process.env.DEEPGRAM_API_KEY || config.deepgramApiKey || null;
|
|
288
313
|
}
|
|
@@ -290,7 +315,6 @@ function resolveDeepgramApiKey(config: VoiceConfig): string | null {
|
|
|
290
315
|
function isDeepgramStreaming(config: VoiceConfig): boolean {
|
|
291
316
|
const key = resolveDeepgramApiKey(config);
|
|
292
317
|
if (!key) return false;
|
|
293
|
-
// Use streaming for deepgram backend, or auto mode when deepgram key is available
|
|
294
318
|
return config.backend === "deepgram" || (config.backend === "auto" && !!key);
|
|
295
319
|
}
|
|
296
320
|
|
|
@@ -299,8 +323,8 @@ function buildDeepgramWsUrl(config: VoiceConfig): string {
|
|
|
299
323
|
encoding: ENCODING,
|
|
300
324
|
sample_rate: String(SAMPLE_RATE),
|
|
301
325
|
channels: String(CHANNELS),
|
|
302
|
-
endpointing: "300",
|
|
303
|
-
utterance_end_ms: "1000",
|
|
326
|
+
endpointing: "300",
|
|
327
|
+
utterance_end_ms: "1000",
|
|
304
328
|
language: config.language || "en",
|
|
305
329
|
model: config.model || "nova-3",
|
|
306
330
|
smart_format: "true",
|
|
@@ -313,7 +337,7 @@ function startStreamingSession(
|
|
|
313
337
|
config: VoiceConfig,
|
|
314
338
|
callbacks: {
|
|
315
339
|
onTranscript: (interim: string, finals: string[]) => void;
|
|
316
|
-
onDone: (fullText: string) => void;
|
|
340
|
+
onDone: (fullText: string, meta: { hadAudio: boolean; hadSpeech: boolean }) => void;
|
|
317
341
|
onError: (err: string) => void;
|
|
318
342
|
},
|
|
319
343
|
): StreamingSession | null {
|
|
@@ -328,7 +352,6 @@ function startStreamingSession(
|
|
|
328
352
|
return null;
|
|
329
353
|
}
|
|
330
354
|
|
|
331
|
-
// Start SoX streaming raw PCM to stdout (no file)
|
|
332
355
|
const recProc = spawn("rec", [
|
|
333
356
|
"-q",
|
|
334
357
|
"-r", String(SAMPLE_RATE),
|
|
@@ -336,12 +359,11 @@ function startStreamingSession(
|
|
|
336
359
|
"-b", "16",
|
|
337
360
|
"-e", "signed-integer",
|
|
338
361
|
"-t", "raw",
|
|
339
|
-
"-",
|
|
362
|
+
"-",
|
|
340
363
|
], { stdio: ["pipe", "pipe", "pipe"] });
|
|
341
364
|
|
|
342
|
-
recProc.stderr?.on("data", () => {});
|
|
365
|
+
recProc.stderr?.on("data", () => {});
|
|
343
366
|
|
|
344
|
-
// Connect WebSocket to Deepgram
|
|
345
367
|
const wsUrl = buildDeepgramWsUrl(config);
|
|
346
368
|
const ws = new WebSocket(wsUrl, {
|
|
347
369
|
headers: {
|
|
@@ -356,25 +378,25 @@ function startStreamingSession(
|
|
|
356
378
|
finalizedParts: [],
|
|
357
379
|
keepAliveTimer: null,
|
|
358
380
|
closed: false,
|
|
381
|
+
hadAudioData: false,
|
|
382
|
+
hadSpeech: false,
|
|
359
383
|
onTranscript: callbacks.onTranscript,
|
|
360
384
|
onDone: callbacks.onDone,
|
|
361
385
|
onError: callbacks.onError,
|
|
362
386
|
};
|
|
363
387
|
|
|
364
388
|
ws.onopen = () => {
|
|
365
|
-
// Send initial KeepAlive
|
|
366
389
|
try { ws.send(JSON.stringify({ type: "KeepAlive" })); } catch {}
|
|
367
390
|
|
|
368
|
-
// Start keepalive timer
|
|
369
391
|
session.keepAliveTimer = setInterval(() => {
|
|
370
392
|
if (ws.readyState === WebSocket.OPEN) {
|
|
371
393
|
try { ws.send(JSON.stringify({ type: "KeepAlive" })); } catch {}
|
|
372
394
|
}
|
|
373
395
|
}, KEEPALIVE_INTERVAL_MS);
|
|
374
396
|
|
|
375
|
-
// Pipe SoX stdout → WebSocket as binary frames
|
|
376
397
|
recProc.stdout?.on("data", (chunk: Buffer) => {
|
|
377
398
|
if (ws.readyState === WebSocket.OPEN) {
|
|
399
|
+
session.hadAudioData = true;
|
|
378
400
|
try { ws.send(chunk); } catch {}
|
|
379
401
|
}
|
|
380
402
|
});
|
|
@@ -389,38 +411,27 @@ function startStreamingSession(
|
|
|
389
411
|
const alt = msg.channel?.alternatives?.[0];
|
|
390
412
|
const transcript = alt?.transcript || "";
|
|
391
413
|
|
|
414
|
+
if (transcript.trim()) {
|
|
415
|
+
session.hadSpeech = true;
|
|
416
|
+
}
|
|
417
|
+
|
|
392
418
|
if (msg.is_final) {
|
|
393
|
-
// Final result for this audio segment
|
|
394
419
|
if (transcript.trim()) {
|
|
395
420
|
session.finalizedParts.push(transcript.trim());
|
|
396
421
|
}
|
|
397
422
|
session.interimText = "";
|
|
398
423
|
} else {
|
|
399
|
-
// Interim result — live update
|
|
400
424
|
session.interimText = transcript;
|
|
401
425
|
}
|
|
402
426
|
|
|
403
427
|
session.onTranscript(session.interimText, session.finalizedParts);
|
|
404
|
-
|
|
405
|
-
// If speech_final is true, it's the end of an utterance
|
|
406
|
-
// (similar to TranscriptEndpoint in Claude Code's protocol)
|
|
407
|
-
if (msg.speech_final && transcript.trim()) {
|
|
408
|
-
// Already added to finalizedParts above when is_final was true
|
|
409
|
-
}
|
|
410
|
-
} else if (msg.type === "Metadata") {
|
|
411
|
-
// Connection metadata — ignore
|
|
412
|
-
} else if (msg.type === "UtteranceEnd") {
|
|
413
|
-
// Utterance boundary — Deepgram detected end of speech
|
|
414
|
-
// Nothing extra needed, is_final already handles finalization
|
|
415
428
|
} else if (msg.type === "Error" || msg.type === "error") {
|
|
416
429
|
session.onError(msg.message || msg.description || "Deepgram error");
|
|
417
430
|
}
|
|
418
|
-
} catch
|
|
419
|
-
// Ignore parse errors for binary data
|
|
420
|
-
}
|
|
431
|
+
} catch {}
|
|
421
432
|
};
|
|
422
433
|
|
|
423
|
-
ws.onerror = (
|
|
434
|
+
ws.onerror = () => {
|
|
424
435
|
if (!session.closed) {
|
|
425
436
|
session.onError("WebSocket connection error");
|
|
426
437
|
}
|
|
@@ -437,7 +448,6 @@ function startStreamingSession(
|
|
|
437
448
|
});
|
|
438
449
|
|
|
439
450
|
recProc.on("close", () => {
|
|
440
|
-
// SoX stopped — send CloseStream to Deepgram
|
|
441
451
|
if (ws.readyState === WebSocket.OPEN) {
|
|
442
452
|
try { ws.send(JSON.stringify({ type: "CloseStream" })); } catch {}
|
|
443
453
|
}
|
|
@@ -449,22 +459,20 @@ function startStreamingSession(
|
|
|
449
459
|
function stopStreamingSession(session: StreamingSession): void {
|
|
450
460
|
if (session.closed) return;
|
|
451
461
|
|
|
452
|
-
// Stop the microphone
|
|
453
462
|
try { session.recProcess.kill("SIGTERM"); } catch {}
|
|
454
463
|
|
|
455
|
-
// CloseStream tells Deepgram to flush remaining audio
|
|
456
464
|
if (session.ws.readyState === WebSocket.OPEN) {
|
|
457
465
|
try { session.ws.send(JSON.stringify({ type: "CloseStream" })); } catch {}
|
|
458
466
|
}
|
|
459
467
|
|
|
460
|
-
// Safety
|
|
468
|
+
// Safety timeout
|
|
461
469
|
setTimeout(() => {
|
|
462
470
|
if (!session.closed) {
|
|
463
471
|
finalizeSession(session);
|
|
464
472
|
}
|
|
465
473
|
}, FINALIZE_SAFETY_TIMEOUT_MS);
|
|
466
474
|
|
|
467
|
-
//
|
|
475
|
+
// Quick finalize if no new data
|
|
468
476
|
let lastDataTime = Date.now();
|
|
469
477
|
const origOnMessage = session.ws.onmessage;
|
|
470
478
|
session.ws.onmessage = (event: MessageEvent) => {
|
|
@@ -486,21 +494,32 @@ function finalizeSession(session: StreamingSession): void {
|
|
|
486
494
|
if (session.closed) return;
|
|
487
495
|
session.closed = true;
|
|
488
496
|
|
|
489
|
-
// Clean up keepalive
|
|
490
497
|
if (session.keepAliveTimer) {
|
|
491
498
|
clearInterval(session.keepAliveTimer);
|
|
492
499
|
session.keepAliveTimer = null;
|
|
493
500
|
}
|
|
494
501
|
|
|
495
|
-
// Close WebSocket
|
|
496
502
|
try { session.ws.close(); } catch {}
|
|
497
|
-
|
|
498
|
-
// Kill SoX if still running
|
|
499
503
|
try { session.recProcess.kill("SIGKILL"); } catch {}
|
|
500
504
|
|
|
501
|
-
// Deliver final transcript
|
|
502
505
|
const fullText = session.finalizedParts.join(" ").trim();
|
|
503
|
-
session.onDone(fullText
|
|
506
|
+
session.onDone(fullText, {
|
|
507
|
+
hadAudio: session.hadAudioData,
|
|
508
|
+
hadSpeech: session.hadSpeech,
|
|
509
|
+
});
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// ─── Abort helper — nuke everything synchronously ────────────────────────────
|
|
513
|
+
|
|
514
|
+
function abortSession(session: StreamingSession | null): void {
|
|
515
|
+
if (!session || session.closed) return;
|
|
516
|
+
session.closed = true;
|
|
517
|
+
if (session.keepAliveTimer) {
|
|
518
|
+
clearInterval(session.keepAliveTimer);
|
|
519
|
+
session.keepAliveTimer = null;
|
|
520
|
+
}
|
|
521
|
+
try { session.ws.close(); } catch {}
|
|
522
|
+
try { session.recProcess.kill("SIGKILL"); } catch {}
|
|
504
523
|
}
|
|
505
524
|
|
|
506
525
|
// ─── Extension ───────────────────────────────────────────────────────────────
|
|
@@ -515,11 +534,20 @@ export default function (pi: ExtensionAPI) {
|
|
|
515
534
|
let recordingStart = 0;
|
|
516
535
|
let statusTimer: ReturnType<typeof setInterval> | null = null;
|
|
517
536
|
let terminalInputUnsub: (() => void) | null = null;
|
|
518
|
-
let isHolding = false;
|
|
519
537
|
|
|
520
538
|
// Streaming session state
|
|
521
539
|
let activeSession: StreamingSession | null = null;
|
|
522
540
|
let currentTarget: "editor" | "btw" = "editor";
|
|
541
|
+
let retryAttempts = 0;
|
|
542
|
+
let lastStopTime = 0; // For corruption guard
|
|
543
|
+
|
|
544
|
+
// Hold-to-talk state
|
|
545
|
+
let kittyReleaseDetected = false;
|
|
546
|
+
let spaceDownTime: number | null = null;
|
|
547
|
+
let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
|
|
548
|
+
let spaceConsumed = false; // True once threshold passed and recording started
|
|
549
|
+
let releaseDetectTimer: ReturnType<typeof setTimeout> | null = null;
|
|
550
|
+
let warmupWidgetTimer: ReturnType<typeof setInterval> | null = null;
|
|
523
551
|
|
|
524
552
|
// ─── BTW State ───────────────────────────────────────────────────────────
|
|
525
553
|
|
|
@@ -548,13 +576,16 @@ export default function (pi: ExtensionAPI) {
|
|
|
548
576
|
ctx.ui.setStatus("voice", `MIC ${modeTag}`);
|
|
549
577
|
break;
|
|
550
578
|
}
|
|
579
|
+
case "warmup":
|
|
580
|
+
ctx.ui.setStatus("voice", "🎙️ HOLD...");
|
|
581
|
+
break;
|
|
551
582
|
case "recording": {
|
|
552
583
|
const secs = Math.round((Date.now() - recordingStart) / 1000);
|
|
553
584
|
ctx.ui.setStatus("voice", `🔴 REC ${secs}s`);
|
|
554
585
|
break;
|
|
555
586
|
}
|
|
556
|
-
case "
|
|
557
|
-
ctx.ui.setStatus("voice", "STT...");
|
|
587
|
+
case "finalizing":
|
|
588
|
+
ctx.ui.setStatus("voice", "⏳ STT...");
|
|
558
589
|
break;
|
|
559
590
|
}
|
|
560
591
|
}
|
|
@@ -564,20 +595,57 @@ export default function (pi: ExtensionAPI) {
|
|
|
564
595
|
updateVoiceStatus();
|
|
565
596
|
}
|
|
566
597
|
|
|
598
|
+
// ─── Cleanup helpers ─────────────────────────────────────────────────────
|
|
599
|
+
|
|
600
|
+
function clearHoldTimer() {
|
|
601
|
+
if (holdActivationTimer) {
|
|
602
|
+
clearTimeout(holdActivationTimer);
|
|
603
|
+
holdActivationTimer = null;
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
function clearReleaseTimer() {
|
|
608
|
+
if (releaseDetectTimer) {
|
|
609
|
+
clearTimeout(releaseDetectTimer);
|
|
610
|
+
releaseDetectTimer = null;
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
function clearWarmupWidget() {
|
|
615
|
+
if (warmupWidgetTimer) {
|
|
616
|
+
clearInterval(warmupWidgetTimer);
|
|
617
|
+
warmupWidgetTimer = null;
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
function clearRecordingAnimTimer() {
|
|
622
|
+
const timer = (showRecordingWidget as any)?._animTimer;
|
|
623
|
+
if (timer) {
|
|
624
|
+
clearInterval(timer);
|
|
625
|
+
(showRecordingWidget as any)._animTimer = null;
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
function hideWidget() {
|
|
630
|
+
if (ctx?.hasUI) ctx.ui.setWidget("voice-recording", undefined);
|
|
631
|
+
}
|
|
632
|
+
|
|
567
633
|
function voiceCleanup() {
|
|
568
634
|
if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
|
|
569
635
|
clearHoldTimer();
|
|
570
636
|
clearReleaseTimer();
|
|
571
|
-
|
|
637
|
+
clearWarmupWidget();
|
|
638
|
+
clearRecordingAnimTimer();
|
|
572
639
|
if (activeSession) {
|
|
573
|
-
|
|
640
|
+
abortSession(activeSession);
|
|
574
641
|
activeSession = null;
|
|
575
642
|
}
|
|
576
643
|
if (legacyRecProcess) { legacyRecProcess.kill("SIGTERM"); legacyRecProcess = null; }
|
|
577
644
|
if (tempFile) { try { fs.unlinkSync(tempFile); } catch {} tempFile = null; }
|
|
578
|
-
isHolding = false;
|
|
579
645
|
spaceConsumed = false;
|
|
580
646
|
spaceDownTime = null;
|
|
647
|
+
retryAttempts = 0;
|
|
648
|
+
hideWidget();
|
|
581
649
|
setVoiceState("idle");
|
|
582
650
|
}
|
|
583
651
|
|
|
@@ -609,58 +677,64 @@ export default function (pi: ExtensionAPI) {
|
|
|
609
677
|
].join("\n"), validated ? "info" : "warning");
|
|
610
678
|
}
|
|
611
679
|
|
|
612
|
-
// ───
|
|
680
|
+
// ─── Warmup Widget ──────────────────────────────────────────────────────
|
|
681
|
+
//
|
|
682
|
+
// During the 500ms hold threshold, show a subtle "keep holding…" hint
|
|
683
|
+
// with a progress indicator. This matches Claude Code's warmup pattern.
|
|
613
684
|
|
|
614
|
-
|
|
615
|
-
function showHoldHintWidget() {
|
|
685
|
+
function showWarmupWidget() {
|
|
616
686
|
if (!ctx?.hasUI) return;
|
|
617
|
-
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
618
|
-
return {
|
|
619
|
-
invalidate() {},
|
|
620
|
-
render(width: number): string[] {
|
|
621
|
-
const bar = theme.fg("muted", "─".repeat(Math.min(width - 2, 60)));
|
|
622
|
-
return [
|
|
623
|
-
bar,
|
|
624
|
-
theme.fg("dim", " Hold " + theme.bold("SPACE") + " for voice input..."),
|
|
625
|
-
bar,
|
|
626
|
-
];
|
|
627
|
-
},
|
|
628
|
-
};
|
|
629
|
-
}, { placement: "aboveEditor" });
|
|
630
|
-
}
|
|
631
687
|
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
688
|
+
const startTime = Date.now();
|
|
689
|
+
|
|
690
|
+
const renderWarmup = () => {
|
|
691
|
+
if (!ctx?.hasUI) return;
|
|
692
|
+
const elapsed = Date.now() - startTime;
|
|
693
|
+
const progress = Math.min(elapsed / HOLD_THRESHOLD_MS, 1);
|
|
694
|
+
const barLen = 20;
|
|
695
|
+
const filled = Math.round(progress * barLen);
|
|
696
|
+
const empty = barLen - filled;
|
|
697
|
+
|
|
698
|
+
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
699
|
+
return {
|
|
700
|
+
invalidate() {},
|
|
701
|
+
render(width: number): string[] {
|
|
702
|
+
const maxW = Math.min(width - 2, 60);
|
|
703
|
+
const bar = theme.fg("accent", "█".repeat(filled)) + theme.fg("muted", "░".repeat(empty));
|
|
704
|
+
const hint = progress < 0.6
|
|
705
|
+
? theme.fg("dim", "Keep holding " + theme.bold("SPACE") + " for voice…")
|
|
706
|
+
: theme.fg("accent", "Almost there… keep holding…");
|
|
707
|
+
const border = theme.fg("border", "─".repeat(maxW));
|
|
708
|
+
return [border, ` ${bar} ${hint}`, border];
|
|
709
|
+
},
|
|
710
|
+
};
|
|
711
|
+
}, { placement: "aboveEditor" });
|
|
712
|
+
};
|
|
713
|
+
|
|
714
|
+
renderWarmup();
|
|
715
|
+
warmupWidgetTimer = setInterval(renderWarmup, 50);
|
|
635
716
|
}
|
|
636
717
|
|
|
637
|
-
|
|
718
|
+
// ─── Recording Widget ───────────────────────────────────────────────────
|
|
719
|
+
|
|
720
|
+
const waveChars = ["▁", "▂", "▃", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
|
|
721
|
+
|
|
638
722
|
function showRecordingWidget(target: "editor" | "btw") {
|
|
639
723
|
if (!ctx?.hasUI) return;
|
|
640
724
|
|
|
641
|
-
// Store initial state — once live transcription arrives,
|
|
642
|
-
// updateLiveTranscriptWidget takes over and we stop the animation.
|
|
643
|
-
(showRecordingWidget as any)._target = target;
|
|
644
725
|
(showRecordingWidget as any)._frame = 0;
|
|
645
726
|
(showRecordingWidget as any)._hasTranscript = false;
|
|
646
727
|
|
|
647
|
-
// Animate the widget every 300ms (only while no transcript is showing)
|
|
648
728
|
const animTimer = setInterval(() => {
|
|
649
|
-
// Stop animating once live transcript takes over
|
|
650
729
|
if ((showRecordingWidget as any)?._hasTranscript) return;
|
|
651
|
-
|
|
652
730
|
(showRecordingWidget as any)._frame = ((showRecordingWidget as any)._frame || 0) + 1;
|
|
653
731
|
showRecordingWidgetFrame(target, (showRecordingWidget as any)._frame);
|
|
654
732
|
}, 300);
|
|
655
733
|
|
|
656
|
-
// Store the timer so we can clean it up
|
|
657
734
|
(showRecordingWidget as any)._animTimer = animTimer;
|
|
658
|
-
|
|
659
735
|
showRecordingWidgetFrame(target, 0);
|
|
660
736
|
}
|
|
661
737
|
|
|
662
|
-
const waveChars = ["▁", "▂", "▃", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
|
|
663
|
-
|
|
664
738
|
function showRecordingWidgetFrame(target: "editor" | "btw", frame: number) {
|
|
665
739
|
if (!ctx?.hasUI) return;
|
|
666
740
|
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
@@ -673,7 +747,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
673
747
|
const secs = elapsed % 60;
|
|
674
748
|
const timeStr = mins > 0 ? `${mins}:${String(secs).padStart(2, "0")}` : `${secs}s`;
|
|
675
749
|
|
|
676
|
-
// Animated waveform
|
|
677
750
|
const waveLen = 12;
|
|
678
751
|
let wave = "";
|
|
679
752
|
for (let i = 0; i < waveLen; i++) {
|
|
@@ -702,33 +775,25 @@ export default function (pi: ExtensionAPI) {
|
|
|
702
775
|
? theme.fg("dim", " Release SPACE to finalize")
|
|
703
776
|
: theme.fg("dim", " Release SPACE to stop");
|
|
704
777
|
|
|
705
|
-
|
|
778
|
+
return [
|
|
706
779
|
topBorder,
|
|
707
780
|
theme.fg("borderAccent", "│") + pad(titleLine, maxW) + theme.fg("borderAccent", "│"),
|
|
708
781
|
theme.fg("borderAccent", "│") + pad(hint, maxW) + theme.fg("borderAccent", "│"),
|
|
709
782
|
botBorder,
|
|
710
783
|
];
|
|
711
|
-
return lines;
|
|
712
784
|
},
|
|
713
785
|
};
|
|
714
786
|
}, { placement: "aboveEditor" });
|
|
715
787
|
}
|
|
716
788
|
|
|
717
|
-
|
|
718
|
-
const timer = (showRecordingWidget as any)?._animTimer;
|
|
719
|
-
if (timer) {
|
|
720
|
-
clearInterval(timer);
|
|
721
|
-
(showRecordingWidget as any)._animTimer = null;
|
|
722
|
-
}
|
|
723
|
-
}
|
|
789
|
+
// ─── Live Transcript Widget ─────────────────────────────────────────────
|
|
724
790
|
|
|
725
|
-
/** Show live transcript inside a themed box */
|
|
726
791
|
function updateLiveTranscriptWidget(interim: string, finals: string[]) {
|
|
727
792
|
if (!ctx?.hasUI) return;
|
|
728
793
|
|
|
729
|
-
// Stop the
|
|
794
|
+
// Stop the waveform animation — live transcript takes over
|
|
730
795
|
(showRecordingWidget as any)._hasTranscript = true;
|
|
731
|
-
|
|
796
|
+
clearRecordingAnimTimer();
|
|
732
797
|
|
|
733
798
|
const finalized = finals.join(" ");
|
|
734
799
|
const displayText = finalized + (interim ? (finalized ? " " : "") + interim : "");
|
|
@@ -756,15 +821,14 @@ export default function (pi: ExtensionAPI) {
|
|
|
756
821
|
const label = theme.bold(theme.fg("accent", " VOICE "));
|
|
757
822
|
const timeStyled = theme.fg("muted", timeStr);
|
|
758
823
|
const titleLine = ` ${dot} ${label} ${timeStyled}`;
|
|
759
|
-
const hint = theme.fg("dim", " Release SPACE to
|
|
824
|
+
const hint = theme.fg("dim", " Release SPACE to finalize");
|
|
760
825
|
const lines = [topBorder, side(titleLine)];
|
|
761
826
|
|
|
762
827
|
if (!displayText.trim()) {
|
|
763
|
-
lines.push(side(theme.fg("dim", " Listening
|
|
828
|
+
lines.push(side(theme.fg("dim", " Listening… speak now")));
|
|
764
829
|
} else {
|
|
765
830
|
lines.push(sep);
|
|
766
|
-
|
|
767
|
-
const innerMax = maxW - 4; // padding inside box
|
|
831
|
+
const innerMax = maxW - 4;
|
|
768
832
|
const words = displayText.split(" ");
|
|
769
833
|
const wrappedLines: string[] = [];
|
|
770
834
|
let currentLine = "";
|
|
@@ -779,11 +843,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
779
843
|
}
|
|
780
844
|
if (currentLine) wrappedLines.push(currentLine);
|
|
781
845
|
|
|
782
|
-
// Show last 3 lines of transcript
|
|
783
846
|
const visible = wrappedLines.slice(-3);
|
|
784
847
|
for (let i = 0; i < visible.length; i++) {
|
|
785
848
|
let line = visible[i];
|
|
786
|
-
// Style: finalized parts in normal text, interim in accent
|
|
787
849
|
if (i === visible.length - 1 && interim) {
|
|
788
850
|
line = theme.fg("text", line) + theme.fg("accent", "▍");
|
|
789
851
|
} else {
|
|
@@ -801,8 +863,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
801
863
|
}, { placement: "aboveEditor" });
|
|
802
864
|
}
|
|
803
865
|
|
|
804
|
-
|
|
805
|
-
|
|
866
|
+
// ─── Finalizing Widget ──────────────────────────────────────────────────
|
|
867
|
+
|
|
868
|
+
function showFinalizingWidget() {
|
|
806
869
|
if (!ctx?.hasUI) return;
|
|
807
870
|
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
808
871
|
return {
|
|
@@ -818,7 +881,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
818
881
|
};
|
|
819
882
|
const spinner = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
|
820
883
|
const idx = Math.floor(Date.now() / 100) % spinner.length;
|
|
821
|
-
const line = ` ${theme.fg("accent", spinner[idx])} ${theme.fg("dim", "Finalizing transcription
|
|
884
|
+
const line = ` ${theme.fg("accent", spinner[idx])} ${theme.fg("dim", "Finalizing transcription…")}`;
|
|
822
885
|
return [topBorder, side(line), botBorder];
|
|
823
886
|
},
|
|
824
887
|
};
|
|
@@ -828,104 +891,147 @@ export default function (pi: ExtensionAPI) {
|
|
|
828
891
|
// ─── Voice: Start / Stop (Streaming or Legacy) ───────────────────────────
|
|
829
892
|
|
|
830
893
|
async function startVoiceRecording(target: "editor" | "btw" = "editor"): Promise<boolean> {
|
|
831
|
-
if (
|
|
894
|
+
if (!ctx) return false;
|
|
895
|
+
|
|
896
|
+
// ── SESSION CORRUPTION GUARD ──
|
|
897
|
+
// If we're still finalizing from a previous recording, abort it first.
|
|
898
|
+
// This prevents the "slow connection overlaps new recording" bug.
|
|
899
|
+
if (voiceState === "finalizing" || voiceState === "recording") {
|
|
900
|
+
abortSession(activeSession);
|
|
901
|
+
activeSession = null;
|
|
902
|
+
clearRecordingAnimTimer();
|
|
903
|
+
clearWarmupWidget();
|
|
904
|
+
hideWidget();
|
|
905
|
+
setVoiceState("idle");
|
|
906
|
+
// Brief pause to let resources release
|
|
907
|
+
await new Promise((r) => setTimeout(r, CORRUPTION_GUARD_MS));
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
// ── STALE TRANSCRIPT CLEANUP ──
|
|
911
|
+
// Clear any prior transcript from the widget
|
|
912
|
+
hideWidget();
|
|
832
913
|
|
|
833
914
|
currentTarget = target;
|
|
834
915
|
recordingStart = Date.now();
|
|
916
|
+
retryAttempts = 0;
|
|
835
917
|
|
|
836
918
|
if (isDeepgramStreaming(config)) {
|
|
837
|
-
|
|
838
|
-
|
|
919
|
+
return startStreamingRecording(target);
|
|
920
|
+
} else {
|
|
921
|
+
return startLegacyRecording(target);
|
|
922
|
+
}
|
|
923
|
+
}
|
|
839
924
|
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
updateLiveTranscriptWidget(interim, finals);
|
|
843
|
-
updateVoiceStatus();
|
|
844
|
-
},
|
|
845
|
-
onDone: (fullText) => {
|
|
846
|
-
activeSession = null;
|
|
847
|
-
stopRecordingWidgetAnimation();
|
|
848
|
-
ctx?.ui.setWidget("voice-recording", undefined);
|
|
925
|
+
async function startStreamingRecording(target: "editor" | "btw"): Promise<boolean> {
|
|
926
|
+
setVoiceState("recording");
|
|
849
927
|
|
|
850
|
-
|
|
928
|
+
const session = startStreamingSession(config, {
|
|
929
|
+
onTranscript: (interim, finals) => {
|
|
930
|
+
// Live transcript update — this is the key UX feature
|
|
931
|
+
updateLiveTranscriptWidget(interim, finals);
|
|
932
|
+
updateVoiceStatus();
|
|
933
|
+
},
|
|
934
|
+
onDone: (fullText, meta) => {
|
|
935
|
+
activeSession = null;
|
|
936
|
+
clearRecordingAnimTimer();
|
|
937
|
+
hideWidget();
|
|
938
|
+
lastStopTime = Date.now();
|
|
939
|
+
|
|
940
|
+
if (!fullText.trim()) {
|
|
941
|
+
// ── DISTINGUISH SILENCE VS NO SPEECH ──
|
|
942
|
+
if (!meta.hadAudio) {
|
|
943
|
+
ctx?.ui.notify("Microphone captured no audio. Check mic permissions.", "error");
|
|
944
|
+
} else if (!meta.hadSpeech) {
|
|
945
|
+
ctx?.ui.notify("Microphone captured silence — no speech detected.", "warning");
|
|
946
|
+
} else {
|
|
851
947
|
ctx?.ui.notify("No speech detected.", "warning");
|
|
852
|
-
setVoiceState("idle");
|
|
853
|
-
return;
|
|
854
948
|
}
|
|
949
|
+
setVoiceState("idle");
|
|
950
|
+
return;
|
|
951
|
+
}
|
|
855
952
|
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
}
|
|
953
|
+
if (target === "btw") {
|
|
954
|
+
handleBtw(fullText);
|
|
955
|
+
} else {
|
|
956
|
+
if (ctx?.hasUI) {
|
|
957
|
+
const existing = ctx.ui.getEditorText();
|
|
958
|
+
ctx.ui.setEditorText(existing ? existing + " " + fullText : fullText);
|
|
959
|
+
const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
|
|
960
|
+
ctx.ui.notify(
|
|
961
|
+
`STT (${elapsed}s): ${fullText.slice(0, 80)}${fullText.length > 80 ? "…" : ""}`,
|
|
962
|
+
"info",
|
|
963
|
+
);
|
|
868
964
|
}
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
965
|
+
}
|
|
966
|
+
setVoiceState("idle");
|
|
967
|
+
},
|
|
968
|
+
onError: (err) => {
|
|
969
|
+
activeSession = null;
|
|
970
|
+
clearRecordingAnimTimer();
|
|
971
|
+
hideWidget();
|
|
972
|
+
|
|
973
|
+
// ── TRANSIENT FAILURE RETRY ──
|
|
974
|
+
// On WebSocket error during rapid push-to-talk re-press, auto-retry
|
|
975
|
+
if (retryAttempts < MAX_RETRY_ATTEMPTS) {
|
|
976
|
+
retryAttempts++;
|
|
977
|
+
ctx?.ui.notify(`Voice connection error — retrying (${retryAttempts}/${MAX_RETRY_ATTEMPTS})…`, "warning");
|
|
978
|
+
setTimeout(() => {
|
|
979
|
+
if (voiceState !== "idle") {
|
|
980
|
+
setVoiceState("idle");
|
|
981
|
+
}
|
|
982
|
+
startStreamingRecording(target);
|
|
983
|
+
}, RETRY_DELAY_MS);
|
|
984
|
+
return;
|
|
985
|
+
}
|
|
879
986
|
|
|
880
|
-
|
|
987
|
+
ctx?.ui.notify(`STT error: ${err}`, "error");
|
|
881
988
|
setVoiceState("idle");
|
|
882
|
-
|
|
883
|
-
|
|
989
|
+
},
|
|
990
|
+
});
|
|
884
991
|
|
|
885
|
-
|
|
992
|
+
if (!session) {
|
|
993
|
+
setVoiceState("idle");
|
|
994
|
+
return false;
|
|
995
|
+
}
|
|
886
996
|
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
997
|
+
activeSession = session;
|
|
998
|
+
|
|
999
|
+
// Status timer for elapsed time
|
|
1000
|
+
statusTimer = setInterval(() => {
|
|
1001
|
+
if (voiceState === "recording") {
|
|
1002
|
+
updateVoiceStatus();
|
|
1003
|
+
const elapsed = (Date.now() - recordingStart) / 1000;
|
|
1004
|
+
if (elapsed >= MAX_RECORDING_SECS) {
|
|
1005
|
+
stopVoiceRecording(target);
|
|
896
1006
|
}
|
|
897
|
-
}
|
|
1007
|
+
}
|
|
1008
|
+
}, 1000);
|
|
898
1009
|
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
1010
|
+
showRecordingWidget(target);
|
|
1011
|
+
return true;
|
|
1012
|
+
}
|
|
902
1013
|
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
1014
|
+
async function startLegacyRecording(target: "editor" | "btw"): Promise<boolean> {
|
|
1015
|
+
if (!ctx) return false;
|
|
1016
|
+
tempFile = path.join(os.tmpdir(), `pi-voice-${Date.now()}.wav`);
|
|
1017
|
+
if (!startLegacyRecordingToFile(tempFile)) {
|
|
1018
|
+
ctx.ui.notify("Voice requires SoX. Install: brew install sox", "error");
|
|
1019
|
+
return false;
|
|
1020
|
+
}
|
|
910
1021
|
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
stopVoiceRecording(target);
|
|
919
|
-
}
|
|
1022
|
+
setVoiceState("recording");
|
|
1023
|
+
statusTimer = setInterval(() => {
|
|
1024
|
+
if (voiceState === "recording") {
|
|
1025
|
+
updateVoiceStatus();
|
|
1026
|
+
const elapsed = (Date.now() - recordingStart) / 1000;
|
|
1027
|
+
if (elapsed >= MAX_RECORDING_SECS) {
|
|
1028
|
+
stopVoiceRecording(target);
|
|
920
1029
|
}
|
|
921
|
-
}, 1000);
|
|
922
|
-
|
|
923
|
-
if (ctx.hasUI) {
|
|
924
|
-
// Show themed recording widget for legacy path
|
|
925
|
-
showRecordingWidget(target);
|
|
926
1030
|
}
|
|
927
|
-
|
|
928
|
-
|
|
1031
|
+
}, 1000);
|
|
1032
|
+
|
|
1033
|
+
showRecordingWidget(target);
|
|
1034
|
+
return true;
|
|
929
1035
|
}
|
|
930
1036
|
|
|
931
1037
|
async function stopVoiceRecording(target: "editor" | "btw" = "editor") {
|
|
@@ -933,34 +1039,35 @@ export default function (pi: ExtensionAPI) {
|
|
|
933
1039
|
if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
|
|
934
1040
|
|
|
935
1041
|
if (activeSession) {
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
showTranscribingWidget();
|
|
1042
|
+
setVoiceState("finalizing");
|
|
1043
|
+
clearRecordingAnimTimer();
|
|
1044
|
+
showFinalizingWidget();
|
|
940
1045
|
stopStreamingSession(activeSession);
|
|
941
1046
|
return;
|
|
942
1047
|
}
|
|
943
1048
|
|
|
944
|
-
//
|
|
1049
|
+
// Legacy path
|
|
945
1050
|
const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
|
|
946
1051
|
const audioFile = tempFile;
|
|
947
|
-
setVoiceState("
|
|
948
|
-
|
|
949
|
-
|
|
1052
|
+
setVoiceState("finalizing");
|
|
1053
|
+
clearRecordingAnimTimer();
|
|
1054
|
+
showFinalizingWidget();
|
|
950
1055
|
|
|
951
1056
|
await stopLegacyRecording();
|
|
952
1057
|
|
|
953
1058
|
if (!audioFile || !fs.existsSync(audioFile)) {
|
|
954
1059
|
ctx.ui.notify("No audio recorded.", "warning");
|
|
1060
|
+
hideWidget();
|
|
955
1061
|
setVoiceState("idle");
|
|
956
1062
|
return;
|
|
957
1063
|
}
|
|
958
1064
|
|
|
959
1065
|
const stats = fs.statSync(audioFile);
|
|
960
1066
|
if (stats.size < 1000) {
|
|
961
|
-
ctx.ui.notify("Recording too short.", "warning");
|
|
1067
|
+
ctx.ui.notify("Recording too short — mic captured silence.", "warning");
|
|
962
1068
|
try { fs.unlinkSync(audioFile); } catch {}
|
|
963
1069
|
tempFile = null;
|
|
1070
|
+
hideWidget();
|
|
964
1071
|
setVoiceState("idle");
|
|
965
1072
|
return;
|
|
966
1073
|
}
|
|
@@ -971,6 +1078,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
971
1078
|
try { fs.unlinkSync(audioFile); } catch {}
|
|
972
1079
|
if (tempFile === audioFile) tempFile = null;
|
|
973
1080
|
|
|
1081
|
+
hideWidget();
|
|
1082
|
+
|
|
974
1083
|
if (result.error) {
|
|
975
1084
|
ctx.ui.notify(`STT error: ${result.error}`, "error");
|
|
976
1085
|
setVoiceState("idle");
|
|
@@ -991,7 +1100,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
991
1100
|
const existing = ctx.ui.getEditorText();
|
|
992
1101
|
ctx.ui.setEditorText(existing ? existing + " " + transcript : transcript);
|
|
993
1102
|
ctx.ui.notify(
|
|
994
|
-
`STT (${elapsed}s): ${transcript.slice(0, 80)}${transcript.length > 80 ? "
|
|
1103
|
+
`STT (${elapsed}s): ${transcript.slice(0, 80)}${transcript.length > 80 ? "…" : ""}`,
|
|
995
1104
|
"info",
|
|
996
1105
|
);
|
|
997
1106
|
}
|
|
@@ -1000,83 +1109,46 @@ export default function (pi: ExtensionAPI) {
|
|
|
1000
1109
|
setVoiceState("idle");
|
|
1001
1110
|
}
|
|
1002
1111
|
|
|
1003
|
-
// ─── Hold-to-
|
|
1112
|
+
// ─── Hold-to-Talk State Machine ─────────────────────────────────────────
|
|
1004
1113
|
//
|
|
1005
|
-
// SPACE
|
|
1006
|
-
// 1. The editor is empty (no text typed yet)
|
|
1007
|
-
// 2. SPACE is held for ≥ HOLD_THRESHOLD_MS (500ms)
|
|
1114
|
+
// SPACE key handling with strict hold-duration detection:
|
|
1008
1115
|
//
|
|
1009
|
-
//
|
|
1010
|
-
//
|
|
1116
|
+
// 1. SPACE press (first) → enter "warmup" state, start 500ms timer
|
|
1117
|
+
// 2. During warmup: show progress bar, consume repeat presses
|
|
1118
|
+
// 3. Timer fires → transition to "recording", start voice capture
|
|
1119
|
+
// 4. SPACE release → stop recording, finalize
|
|
1120
|
+
// 5. If released during warmup → cancel, type a space character
|
|
1011
1121
|
//
|
|
1012
|
-
//
|
|
1013
|
-
// Holding a key generates rapid press events (~30ms apart). We detect
|
|
1014
|
-
// "release" by watching for the stream of space presses to STOP.
|
|
1015
|
-
// Once the gap exceeds RELEASE_DETECT_MS (200ms), we know the user
|
|
1016
|
-
// lifted their finger and we stop recording.
|
|
1017
|
-
//
|
|
1018
|
-
// Flow:
|
|
1019
|
-
// Hold SPACE → rapid presses arrive → first press starts 500ms timer →
|
|
1020
|
-
// timer fires → recording starts → presses keep coming (consumed) →
|
|
1021
|
-
// user releases → presses stop → 200ms silence → auto-stop recording
|
|
1022
|
-
//
|
|
1023
|
-
// Kitty protocol terminals get true key-release events and work natively.
|
|
1024
|
-
|
|
1025
|
-
const HOLD_THRESHOLD_MS = 500; // minimum hold time before voice activates
|
|
1026
|
-
const RELEASE_DETECT_MS = 200; // gap in key-repeat that means "released"
|
|
1027
|
-
let kittyReleaseDetected = false;
|
|
1028
|
-
let spaceDownTime: number | null = null;
|
|
1029
|
-
let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
|
|
1030
|
-
let spaceConsumed = false;
|
|
1031
|
-
let lastSpacePressTime = 0;
|
|
1032
|
-
let releaseDetectTimer: ReturnType<typeof setTimeout> | null = null;
|
|
1033
|
-
|
|
1034
|
-
function clearHoldTimer() {
|
|
1035
|
-
if (holdActivationTimer) {
|
|
1036
|
-
clearTimeout(holdActivationTimer);
|
|
1037
|
-
holdActivationTimer = null;
|
|
1038
|
-
}
|
|
1039
|
-
}
|
|
1040
|
-
|
|
1041
|
-
function clearReleaseTimer() {
|
|
1042
|
-
if (releaseDetectTimer) {
|
|
1043
|
-
clearTimeout(releaseDetectTimer);
|
|
1044
|
-
releaseDetectTimer = null;
|
|
1045
|
-
}
|
|
1046
|
-
}
|
|
1122
|
+
// Non-Kitty detection: rapid press events = "holding", gap > 150ms = "released"
|
|
1047
1123
|
|
|
1048
|
-
/** Called when we detect the user has released SPACE (non-Kitty) */
|
|
1049
1124
|
function onSpaceReleaseDetected() {
|
|
1050
1125
|
releaseDetectTimer = null;
|
|
1051
1126
|
|
|
1052
|
-
//
|
|
1053
|
-
if (
|
|
1127
|
+
// Released during warmup — cancel, type a space
|
|
1128
|
+
if (voiceState === "warmup") {
|
|
1054
1129
|
clearHoldTimer();
|
|
1130
|
+
clearWarmupWidget();
|
|
1131
|
+
hideWidget();
|
|
1132
|
+
setVoiceState("idle");
|
|
1055
1133
|
spaceDownTime = null;
|
|
1056
1134
|
spaceConsumed = false;
|
|
1057
|
-
// Insert a space character
|
|
1058
1135
|
if (ctx?.hasUI) {
|
|
1059
1136
|
ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1060
|
-
hideHoldHintWidget();
|
|
1061
1137
|
}
|
|
1062
1138
|
return;
|
|
1063
1139
|
}
|
|
1064
1140
|
|
|
1065
|
-
//
|
|
1141
|
+
// Released during recording — stop
|
|
1066
1142
|
if (spaceConsumed && voiceState === "recording") {
|
|
1067
|
-
isHolding = false;
|
|
1068
1143
|
spaceConsumed = false;
|
|
1069
1144
|
spaceDownTime = null;
|
|
1070
1145
|
stopVoiceRecording("editor");
|
|
1071
1146
|
}
|
|
1072
1147
|
}
|
|
1073
1148
|
|
|
1074
|
-
/** Reset the release detection timer — called on every space press */
|
|
1075
1149
|
function resetReleaseDetect() {
|
|
1076
1150
|
clearReleaseTimer();
|
|
1077
|
-
|
|
1078
|
-
// start a timer to detect release
|
|
1079
|
-
if (spaceDownTime || spaceConsumed || voiceState === "recording") {
|
|
1151
|
+
if (voiceState === "warmup" || voiceState === "recording" || spaceDownTime || spaceConsumed) {
|
|
1080
1152
|
releaseDetectTimer = setTimeout(onSpaceReleaseDetected, RELEASE_DETECT_MS);
|
|
1081
1153
|
}
|
|
1082
1154
|
}
|
|
@@ -1091,27 +1163,26 @@ export default function (pi: ExtensionAPI) {
|
|
|
1091
1163
|
|
|
1092
1164
|
// ── SPACE handling ──
|
|
1093
1165
|
if (matchesKey(data, "space")) {
|
|
1094
|
-
// Check editor content — hold-to-talk still works even with content,
|
|
1095
|
-
// but a quick tap types a space as normal
|
|
1096
|
-
const editorHasContent = !!(ctx?.hasUI && ctx.ui.getEditorText()?.trim().length);
|
|
1097
1166
|
|
|
1098
1167
|
// ── Kitty key-release ──
|
|
1099
1168
|
if (isKeyRelease(data)) {
|
|
1100
1169
|
kittyReleaseDetected = true;
|
|
1101
1170
|
clearReleaseTimer();
|
|
1102
1171
|
|
|
1103
|
-
// Released
|
|
1104
|
-
if (
|
|
1172
|
+
// Released during warmup → cancel, type a space
|
|
1173
|
+
if (voiceState === "warmup") {
|
|
1105
1174
|
clearHoldTimer();
|
|
1175
|
+
clearWarmupWidget();
|
|
1176
|
+
hideWidget();
|
|
1177
|
+
setVoiceState("idle");
|
|
1106
1178
|
spaceDownTime = null;
|
|
1107
1179
|
spaceConsumed = false;
|
|
1108
1180
|
if (ctx?.hasUI) ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1109
1181
|
return { consume: true };
|
|
1110
1182
|
}
|
|
1111
1183
|
|
|
1112
|
-
// Released
|
|
1184
|
+
// Released during recording → stop
|
|
1113
1185
|
if (spaceConsumed && voiceState === "recording") {
|
|
1114
|
-
isHolding = false;
|
|
1115
1186
|
spaceConsumed = false;
|
|
1116
1187
|
spaceDownTime = null;
|
|
1117
1188
|
stopVoiceRecording("editor");
|
|
@@ -1123,60 +1194,58 @@ export default function (pi: ExtensionAPI) {
|
|
|
1123
1194
|
return undefined;
|
|
1124
1195
|
}
|
|
1125
1196
|
|
|
1126
|
-
// ── Kitty key-repeat:
|
|
1197
|
+
// ── Kitty key-repeat: suppress while in warmup/recording ──
|
|
1127
1198
|
if (isKeyRepeat(data)) {
|
|
1128
|
-
if (
|
|
1129
|
-
resetReleaseDetect();
|
|
1199
|
+
if (voiceState === "warmup" || voiceState === "recording" || voiceState === "finalizing" || spaceConsumed) {
|
|
1200
|
+
resetReleaseDetect();
|
|
1130
1201
|
return { consume: true };
|
|
1131
1202
|
}
|
|
1132
1203
|
return undefined;
|
|
1133
1204
|
}
|
|
1134
1205
|
|
|
1135
1206
|
// === Key PRESS ===
|
|
1136
|
-
// In non-Kitty terminals, holding a key sends rapid press events.
|
|
1137
|
-
// We use these to detect "still holding" and the gap to detect "released".
|
|
1138
|
-
|
|
1139
|
-
// Reset release detection — user is still holding
|
|
1140
1207
|
resetReleaseDetect();
|
|
1141
1208
|
|
|
1142
|
-
// If
|
|
1143
|
-
if (voiceState === "
|
|
1209
|
+
// If finalizing → ignore
|
|
1210
|
+
if (voiceState === "finalizing") {
|
|
1144
1211
|
return { consume: true };
|
|
1145
1212
|
}
|
|
1146
1213
|
|
|
1147
|
-
// If already recording → just consume (release
|
|
1214
|
+
// If already recording → just consume (release handles stop)
|
|
1148
1215
|
if (voiceState === "recording") {
|
|
1149
1216
|
return { consume: true };
|
|
1150
1217
|
}
|
|
1151
1218
|
|
|
1152
|
-
// If
|
|
1153
|
-
if (
|
|
1219
|
+
// If already in warmup → consume (threshold timer is running)
|
|
1220
|
+
if (voiceState === "warmup") {
|
|
1221
|
+
return { consume: true };
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
// If we've already consumed space for this hold → consume
|
|
1225
|
+
if (spaceConsumed || spaceDownTime) {
|
|
1154
1226
|
return { consume: true };
|
|
1155
1227
|
}
|
|
1156
1228
|
|
|
1157
|
-
//
|
|
1229
|
+
// IDLE — first press → start warmup
|
|
1158
1230
|
if (voiceState === "idle") {
|
|
1159
1231
|
spaceDownTime = Date.now();
|
|
1160
1232
|
spaceConsumed = false;
|
|
1161
|
-
lastSpacePressTime = Date.now();
|
|
1162
1233
|
|
|
1163
|
-
//
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
}
|
|
1234
|
+
// Transition to warmup state
|
|
1235
|
+
setVoiceState("warmup");
|
|
1236
|
+
showWarmupWidget();
|
|
1167
1237
|
|
|
1168
1238
|
// After threshold: activate voice recording
|
|
1169
|
-
// Works regardless of whether editor has content — hold always activates voice
|
|
1170
1239
|
holdActivationTimer = setTimeout(() => {
|
|
1171
1240
|
holdActivationTimer = null;
|
|
1172
|
-
if (voiceState === "
|
|
1241
|
+
if (voiceState === "warmup" && spaceDownTime) {
|
|
1242
|
+
clearWarmupWidget();
|
|
1173
1243
|
spaceConsumed = true;
|
|
1174
|
-
isHolding = true;
|
|
1175
1244
|
startVoiceRecording("editor").then((ok) => {
|
|
1176
1245
|
if (!ok) {
|
|
1177
|
-
isHolding = false;
|
|
1178
1246
|
spaceConsumed = false;
|
|
1179
1247
|
spaceDownTime = null;
|
|
1248
|
+
setVoiceState("idle");
|
|
1180
1249
|
}
|
|
1181
1250
|
});
|
|
1182
1251
|
} else {
|
|
@@ -1188,17 +1257,19 @@ export default function (pi: ExtensionAPI) {
|
|
|
1188
1257
|
return { consume: true };
|
|
1189
1258
|
}
|
|
1190
1259
|
|
|
1191
|
-
if (
|
|
1260
|
+
if (spaceConsumed) return { consume: true };
|
|
1192
1261
|
return undefined;
|
|
1193
1262
|
}
|
|
1194
1263
|
|
|
1195
|
-
// ── Any other key
|
|
1196
|
-
if (
|
|
1264
|
+
// ── Any other key during warmup → cancel hold, type a space ──
|
|
1265
|
+
if (voiceState === "warmup" && spaceDownTime && !spaceConsumed) {
|
|
1197
1266
|
clearHoldTimer();
|
|
1198
1267
|
clearReleaseTimer();
|
|
1268
|
+
clearWarmupWidget();
|
|
1269
|
+
hideWidget();
|
|
1270
|
+
setVoiceState("idle");
|
|
1199
1271
|
if (ctx?.hasUI) {
|
|
1200
1272
|
ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1201
|
-
hideHoldHintWidget();
|
|
1202
1273
|
}
|
|
1203
1274
|
spaceDownTime = null;
|
|
1204
1275
|
spaceConsumed = false;
|
|
@@ -1209,8 +1280,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1209
1280
|
if (matchesKey(data, "ctrl+shift+b")) {
|
|
1210
1281
|
if (isKeyRelease(data)) {
|
|
1211
1282
|
kittyReleaseDetected = true;
|
|
1212
|
-
if (
|
|
1213
|
-
isHolding = false;
|
|
1283
|
+
if (voiceState === "recording" && currentTarget === "btw") {
|
|
1214
1284
|
stopVoiceRecording("btw");
|
|
1215
1285
|
return { consume: true };
|
|
1216
1286
|
}
|
|
@@ -1218,25 +1288,23 @@ export default function (pi: ExtensionAPI) {
|
|
|
1218
1288
|
}
|
|
1219
1289
|
|
|
1220
1290
|
if (isKeyRepeat(data)) {
|
|
1221
|
-
if (
|
|
1291
|
+
if (voiceState === "recording" && currentTarget === "btw") return { consume: true };
|
|
1222
1292
|
return undefined;
|
|
1223
1293
|
}
|
|
1224
1294
|
|
|
1225
|
-
if (voiceState === "recording") {
|
|
1226
|
-
isHolding = false;
|
|
1295
|
+
if (voiceState === "recording" && currentTarget === "btw") {
|
|
1227
1296
|
stopVoiceRecording("btw");
|
|
1228
1297
|
return { consume: true };
|
|
1229
1298
|
}
|
|
1230
1299
|
|
|
1231
|
-
if (voiceState === "idle"
|
|
1232
|
-
|
|
1233
|
-
startVoiceRecording("btw").then((ok) => {
|
|
1234
|
-
if (!ok) isHolding = false;
|
|
1235
|
-
});
|
|
1300
|
+
if (voiceState === "idle") {
|
|
1301
|
+
startVoiceRecording("btw");
|
|
1236
1302
|
return { consume: true };
|
|
1237
1303
|
}
|
|
1238
1304
|
|
|
1239
|
-
if (
|
|
1305
|
+
if (voiceState === "recording" || voiceState === "finalizing" || voiceState === "warmup") {
|
|
1306
|
+
return { consume: true };
|
|
1307
|
+
}
|
|
1240
1308
|
return undefined;
|
|
1241
1309
|
}
|
|
1242
1310
|
|
|
@@ -1280,12 +1348,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
1280
1348
|
"",
|
|
1281
1349
|
];
|
|
1282
1350
|
|
|
1283
|
-
lines.push(` Q: ${last.question.slice(0, 100)}${last.question.length > 100 ? "
|
|
1351
|
+
lines.push(` Q: ${last.question.slice(0, 100)}${last.question.length > 100 ? "…" : ""}`);
|
|
1284
1352
|
const answerLines = last.answer.split("\n");
|
|
1285
1353
|
for (const line of answerLines.slice(0, 8)) {
|
|
1286
1354
|
lines.push(` ${line}`);
|
|
1287
1355
|
}
|
|
1288
|
-
if (answerLines.length > 8) lines.push("
|
|
1356
|
+
if (answerLines.length > 8) lines.push(" …");
|
|
1289
1357
|
|
|
1290
1358
|
lines.push("");
|
|
1291
1359
|
lines.push(" /btw:clear to dismiss | /btw:inject to send to agent");
|
|
@@ -1301,9 +1369,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
1301
1369
|
ctx.ui.setWidget("btw", [
|
|
1302
1370
|
" BTW",
|
|
1303
1371
|
"",
|
|
1304
|
-
` Q: ${message.slice(0, 100)}${message.length > 100 ? "
|
|
1372
|
+
` Q: ${message.slice(0, 100)}${message.length > 100 ? "…" : ""}`,
|
|
1305
1373
|
"",
|
|
1306
|
-
" Thinking
|
|
1374
|
+
" Thinking…",
|
|
1307
1375
|
], { placement: "aboveEditor" });
|
|
1308
1376
|
|
|
1309
1377
|
const btwContext = buildBtwContext();
|
|
@@ -1388,16 +1456,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
1388
1456
|
return;
|
|
1389
1457
|
}
|
|
1390
1458
|
if (voiceState === "idle") {
|
|
1391
|
-
// Direct start — bypass hold threshold
|
|
1392
1459
|
spaceConsumed = true;
|
|
1393
|
-
isHolding = true;
|
|
1394
1460
|
const ok = await startVoiceRecording("editor");
|
|
1395
1461
|
if (!ok) {
|
|
1396
|
-
isHolding = false;
|
|
1397
1462
|
spaceConsumed = false;
|
|
1398
1463
|
}
|
|
1399
1464
|
} else if (voiceState === "recording") {
|
|
1400
|
-
isHolding = false;
|
|
1401
1465
|
spaceConsumed = false;
|
|
1402
1466
|
spaceDownTime = null;
|
|
1403
1467
|
clearHoldTimer();
|
|
@@ -1416,9 +1480,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1416
1480
|
configSource = loaded.source;
|
|
1417
1481
|
updateSocketPath(config, currentCwd);
|
|
1418
1482
|
|
|
1419
|
-
// Auto-capture DEEPGRAM_API_KEY from env into config
|
|
1420
|
-
// This ensures streaming works even when Pi is launched from a context
|
|
1421
|
-
// that doesn't source .zshrc (GUI app, tmux, etc.)
|
|
1483
|
+
// Auto-capture DEEPGRAM_API_KEY from env into config
|
|
1422
1484
|
if (process.env.DEEPGRAM_API_KEY && !config.deepgramApiKey) {
|
|
1423
1485
|
config.deepgramApiKey = process.env.DEEPGRAM_API_KEY;
|
|
1424
1486
|
if (configSource !== "default") {
|
|
@@ -1426,7 +1488,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1426
1488
|
}
|
|
1427
1489
|
}
|
|
1428
1490
|
|
|
1429
|
-
//
|
|
1491
|
+
// Try to load DEEPGRAM_API_KEY from shell if not available
|
|
1430
1492
|
if (!resolveDeepgramApiKey(config) && config.backend === "deepgram") {
|
|
1431
1493
|
try {
|
|
1432
1494
|
const result = spawnSync("zsh", ["-ic", "echo $DEEPGRAM_API_KEY"], {
|
|
@@ -1437,7 +1499,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1437
1499
|
const shellKey = result.stdout?.toString().trim();
|
|
1438
1500
|
if (shellKey && shellKey.length > 5) {
|
|
1439
1501
|
config.deepgramApiKey = shellKey;
|
|
1440
|
-
process.env.DEEPGRAM_API_KEY = shellKey;
|
|
1502
|
+
process.env.DEEPGRAM_API_KEY = shellKey;
|
|
1441
1503
|
if (configSource !== "default") {
|
|
1442
1504
|
saveConfig(config, config.scope, currentCwd);
|
|
1443
1505
|
}
|
|
@@ -1448,7 +1510,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
1448
1510
|
if (config.enabled && config.onboarding.completed) {
|
|
1449
1511
|
updateVoiceStatus();
|
|
1450
1512
|
setupHoldToTalk();
|
|
1451
|
-
// Only start daemon for non-streaming backends
|
|
1452
1513
|
if (!isDeepgramStreaming(config)) {
|
|
1453
1514
|
ensureDaemon(config).catch(() => {});
|
|
1454
1515
|
}
|
|
@@ -1509,7 +1570,15 @@ export default function (pi: ExtensionAPI) {
|
|
|
1509
1570
|
ensureDaemon(config).catch(() => {});
|
|
1510
1571
|
}
|
|
1511
1572
|
const mode = isDeepgramStreaming(config) ? "Deepgram streaming" : config.backend;
|
|
1512
|
-
cmdCtx.ui.notify(
|
|
1573
|
+
cmdCtx.ui.notify([
|
|
1574
|
+
`Voice enabled (${mode}).`,
|
|
1575
|
+
"",
|
|
1576
|
+
" Hold SPACE (500ms) → release to transcribe",
|
|
1577
|
+
" Ctrl+Shift+V → toggle recording on/off",
|
|
1578
|
+
" Quick SPACE tap → types a space (no voice)",
|
|
1579
|
+
"",
|
|
1580
|
+
" Live transcription shown while speaking",
|
|
1581
|
+
].join("\n"), "info");
|
|
1513
1582
|
return;
|
|
1514
1583
|
}
|
|
1515
1584
|
|
|
@@ -1524,9 +1593,14 @@ export default function (pi: ExtensionAPI) {
|
|
|
1524
1593
|
|
|
1525
1594
|
if (sub === "stop") {
|
|
1526
1595
|
if (voiceState === "recording") {
|
|
1527
|
-
isHolding = false;
|
|
1528
1596
|
await stopVoiceRecording("editor");
|
|
1529
1597
|
cmdCtx.ui.notify("Recording stopped and transcribed.", "info");
|
|
1598
|
+
} else if (voiceState === "warmup") {
|
|
1599
|
+
clearHoldTimer();
|
|
1600
|
+
clearWarmupWidget();
|
|
1601
|
+
hideWidget();
|
|
1602
|
+
setVoiceState("idle");
|
|
1603
|
+
cmdCtx.ui.notify("Warmup cancelled.", "info");
|
|
1530
1604
|
} else {
|
|
1531
1605
|
cmdCtx.ui.notify("No recording in progress.", "info");
|
|
1532
1606
|
}
|
|
@@ -1534,7 +1608,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1534
1608
|
}
|
|
1535
1609
|
|
|
1536
1610
|
if (sub === "test") {
|
|
1537
|
-
cmdCtx.ui.notify("Testing voice setup
|
|
1611
|
+
cmdCtx.ui.notify("Testing voice setup…", "info");
|
|
1538
1612
|
const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
|
|
1539
1613
|
const dgKey = resolveDeepgramApiKey(config);
|
|
1540
1614
|
const streaming = isDeepgramStreaming(config);
|
|
@@ -1551,11 +1625,15 @@ export default function (pi: ExtensionAPI) {
|
|
|
1551
1625
|
` model status: ${modelReadiness}`,
|
|
1552
1626
|
` language: ${config.language}`,
|
|
1553
1627
|
` streaming: ${streaming ? "YES (Deepgram WS)" : "NO (batch)"}`,
|
|
1554
|
-
` DEEPGRAM_API_KEY: ${dgKey ? "set (" + dgKey.slice(0, 8) + "
|
|
1628
|
+
` DEEPGRAM_API_KEY: ${dgKey ? "set (" + dgKey.slice(0, 8) + "…)" : "NOT SET"}`,
|
|
1555
1629
|
` onboarding: ${config.onboarding.completed ? "complete" : "incomplete"}`,
|
|
1556
1630
|
` python3: ${diagnostics.hasPython ? "OK" : "missing"}`,
|
|
1557
1631
|
` sox/rec: ${diagnostics.hasSox ? "OK" : "missing"}`,
|
|
1558
1632
|
` daemon: ${daemonUp ? "running" : "not running"}`,
|
|
1633
|
+
` state: ${voiceState}`,
|
|
1634
|
+
` hold threshold: ${HOLD_THRESHOLD_MS}ms`,
|
|
1635
|
+
` release detect: ${RELEASE_DETECT_MS}ms`,
|
|
1636
|
+
` kitty protocol: ${kittyReleaseDetected ? "detected" : "not detected"}`,
|
|
1559
1637
|
];
|
|
1560
1638
|
|
|
1561
1639
|
if (diagnostics.hasSox) {
|
|
@@ -1609,14 +1687,15 @@ export default function (pi: ExtensionAPI) {
|
|
|
1609
1687
|
` setup: ${config.onboarding.completed ? `complete (${config.onboarding.source ?? "unknown"})` : "incomplete"}`,
|
|
1610
1688
|
` socket: ${activeSocketPath}`,
|
|
1611
1689
|
` daemon: ${daemonUp ? "running" : "stopped"}${daemonInfo}`,
|
|
1612
|
-
` hold-key: SPACE (
|
|
1690
|
+
` hold-key: SPACE (hold ≥${HOLD_THRESHOLD_MS}ms) or Ctrl+Shift+V (toggle)`,
|
|
1613
1691
|
` btw-key: Ctrl+Shift+B (hold to record → auto-btw)`,
|
|
1692
|
+
` kitty: ${kittyReleaseDetected ? "yes" : "no"}`,
|
|
1614
1693
|
].join("\n"), "info");
|
|
1615
1694
|
return;
|
|
1616
1695
|
}
|
|
1617
1696
|
|
|
1618
1697
|
if (sub === "daemon" || sub === "daemon start") {
|
|
1619
|
-
cmdCtx.ui.notify("Starting STT daemon
|
|
1698
|
+
cmdCtx.ui.notify("Starting STT daemon…", "info");
|
|
1620
1699
|
const ok = await ensureDaemon(config);
|
|
1621
1700
|
cmdCtx.ui.notify(ok ? "Daemon started." : "Failed to start daemon.", ok ? "info" : "error");
|
|
1622
1701
|
return;
|
|
@@ -1858,7 +1937,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1858
1937
|
return;
|
|
1859
1938
|
}
|
|
1860
1939
|
|
|
1861
|
-
cmdCtx.ui.notify("Summarizing BTW thread
|
|
1940
|
+
cmdCtx.ui.notify("Summarizing BTW thread…", "info");
|
|
1862
1941
|
|
|
1863
1942
|
try {
|
|
1864
1943
|
let summary = "";
|