@codexstar/pi-listen 1.0.16 → 1.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extensions/voice.ts +415 -342
- package/package.json +1 -1
package/extensions/voice.ts
CHANGED
|
@@ -1,29 +1,48 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* pi-voice —
|
|
2
|
+
* pi-voice — Enterprise-grade voice STT for Pi CLI.
|
|
3
3
|
*
|
|
4
4
|
* Architecture (modeled after Claude Code's voice pipeline):
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
5
|
+
*
|
|
6
|
+
* STATE MACHINE
|
|
7
|
+
* ─────────────
|
|
8
|
+
* idle → warmup → recording → finalizing → idle
|
|
9
|
+
* ↑ │
|
|
10
|
+
* └─────────┘ (rapid re-press recovery)
|
|
11
|
+
*
|
|
12
|
+
* warmup: User holds SPACE for ≥ HOLD_THRESHOLD_MS (500ms).
|
|
13
|
+
* A "keep holding…" hint is shown. If released before
|
|
14
|
+
* the threshold, a normal space character is typed.
|
|
15
|
+
*
|
|
16
|
+
* recording: SoX captures PCM → Deepgram WebSocket streaming.
|
|
17
|
+
* Live interim + final transcripts update the widget.
|
|
18
|
+
* Release SPACE (or press again in toggle mode) → stop.
|
|
19
|
+
*
|
|
20
|
+
* finalizing: CloseStream sent to Deepgram. Waiting for final
|
|
21
|
+
* transcript. Safety timeout auto-completes.
|
|
22
|
+
*
|
|
23
|
+
* HOLD-TO-TALK DETECTION (non-Kitty terminals)
|
|
24
|
+
* ─────────────────────────────────────────────
|
|
25
|
+
* Holding a key sends rapid key-press events (~30ms apart).
|
|
26
|
+
* "Release" is detected when the gap between presses exceeds
|
|
27
|
+
* RELEASE_DETECT_MS (150ms).
|
|
28
|
+
*
|
|
29
|
+
* ENTERPRISE FALLBACKS
|
|
30
|
+
* ────────────────────
|
|
31
|
+
* • Session corruption guard: new recording request during
|
|
32
|
+
* finalizing automatically cancels the stale session first.
|
|
33
|
+
* • Transient failure retry: on WebSocket error during rapid
|
|
34
|
+
* push-to-talk re-press, auto-retry once after 300ms.
|
|
35
|
+
* • Stale transcript cleanup: any prior transcript is cleared
|
|
36
|
+
* before new recording begins.
|
|
37
|
+
* • Silence vs. no-speech: distinguishes "mic captured silence"
|
|
38
|
+
* from "no speech detected" with distinct user messages.
|
|
12
39
|
*
|
|
13
40
|
* Activation:
|
|
14
|
-
* - Hold SPACE (
|
|
15
|
-
* - Ctrl+Shift+V → toggle start/stop (
|
|
41
|
+
* - Hold SPACE (≥500ms) → release to finalize
|
|
42
|
+
* - Ctrl+Shift+V → toggle start/stop (always works)
|
|
16
43
|
* - Ctrl+Shift+B → hold to record → auto-send as /btw
|
|
17
44
|
*
|
|
18
|
-
* Config in ~/.pi/agent/settings.json:
|
|
19
|
-
* {
|
|
20
|
-
* "voice": {
|
|
21
|
-
* "enabled": true,
|
|
22
|
-
* "language": "en",
|
|
23
|
-
* "backend": "deepgram",
|
|
24
|
-
* "model": "nova-3"
|
|
25
|
-
* }
|
|
26
|
-
* }
|
|
45
|
+
* Config in ~/.pi/agent/settings.json under "voice": { ... }
|
|
27
46
|
*/
|
|
28
47
|
|
|
29
48
|
import type {
|
|
@@ -57,7 +76,14 @@ import { buildProvisioningPlan } from "./voice/install";
|
|
|
57
76
|
|
|
58
77
|
// ─── Types ───────────────────────────────────────────────────────────────────
|
|
59
78
|
|
|
60
|
-
|
|
79
|
+
/**
|
|
80
|
+
* Voice state machine — strict transitions only:
|
|
81
|
+
* idle → warmup → recording → finalizing → idle
|
|
82
|
+
* warmup → idle (released before threshold)
|
|
83
|
+
* recording → idle (on error)
|
|
84
|
+
* finalizing → idle (on completion or timeout)
|
|
85
|
+
*/
|
|
86
|
+
type VoiceState = "idle" | "warmup" | "recording" | "finalizing";
|
|
61
87
|
|
|
62
88
|
interface BtwExchange {
|
|
63
89
|
question: string;
|
|
@@ -76,7 +102,14 @@ const DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
|
76
102
|
const KEEPALIVE_INTERVAL_MS = 8000;
|
|
77
103
|
const FINALIZE_SAFETY_TIMEOUT_MS = 5000;
|
|
78
104
|
const FINALIZE_NO_DATA_TIMEOUT_MS = 1500;
|
|
79
|
-
const MAX_RECORDING_SECS = 120;
|
|
105
|
+
const MAX_RECORDING_SECS = 120;
|
|
106
|
+
|
|
107
|
+
// Hold-to-talk timing
|
|
108
|
+
const HOLD_THRESHOLD_MS = 500; // Must hold for this long before activation
|
|
109
|
+
const RELEASE_DETECT_MS = 150; // Gap in key-repeat → "released"
|
|
110
|
+
const RETRY_DELAY_MS = 300; // Auto-retry on transient failure during rapid re-press
|
|
111
|
+
const MAX_RETRY_ATTEMPTS = 1; // Max retries per activation attempt
|
|
112
|
+
const CORRUPTION_GUARD_MS = 200; // Min gap between stop and restart
|
|
80
113
|
|
|
81
114
|
const EXT_DIR = path.dirname(new URL(import.meta.url).pathname);
|
|
82
115
|
const PROJECT_ROOT = path.join(EXT_DIR, "..");
|
|
@@ -264,25 +297,17 @@ async function transcribeAudioFile(
|
|
|
264
297
|
interface StreamingSession {
|
|
265
298
|
ws: WebSocket;
|
|
266
299
|
recProcess: ChildProcess;
|
|
267
|
-
interimText: string;
|
|
268
|
-
finalizedParts: string[];
|
|
300
|
+
interimText: string;
|
|
301
|
+
finalizedParts: string[];
|
|
269
302
|
keepAliveTimer: ReturnType<typeof setInterval> | null;
|
|
270
303
|
closed: boolean;
|
|
304
|
+
hadAudioData: boolean; // Track if we received any audio data
|
|
305
|
+
hadSpeech: boolean; // Track if Deepgram detected any speech
|
|
271
306
|
onTranscript: (interim: string, finals: string[]) => void;
|
|
272
|
-
onDone: (fullText: string) => void;
|
|
307
|
+
onDone: (fullText: string, meta: { hadAudio: boolean; hadSpeech: boolean }) => void;
|
|
273
308
|
onError: (err: string) => void;
|
|
274
309
|
}
|
|
275
310
|
|
|
276
|
-
function getDeepgramApiKey(): string | null {
|
|
277
|
-
// Priority: env var → config file → null
|
|
278
|
-
return process.env.DEEPGRAM_API_KEY || null;
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
/**
|
|
282
|
-
* Resolve the Deepgram API key from all sources:
|
|
283
|
-
* 1. process.env.DEEPGRAM_API_KEY (shell)
|
|
284
|
-
* 2. config.deepgramApiKey (settings.json, persisted at setup time)
|
|
285
|
-
*/
|
|
286
311
|
function resolveDeepgramApiKey(config: VoiceConfig): string | null {
|
|
287
312
|
return process.env.DEEPGRAM_API_KEY || config.deepgramApiKey || null;
|
|
288
313
|
}
|
|
@@ -290,7 +315,6 @@ function resolveDeepgramApiKey(config: VoiceConfig): string | null {
|
|
|
290
315
|
function isDeepgramStreaming(config: VoiceConfig): boolean {
|
|
291
316
|
const key = resolveDeepgramApiKey(config);
|
|
292
317
|
if (!key) return false;
|
|
293
|
-
// Use streaming for deepgram backend, or auto mode when deepgram key is available
|
|
294
318
|
return config.backend === "deepgram" || (config.backend === "auto" && !!key);
|
|
295
319
|
}
|
|
296
320
|
|
|
@@ -299,8 +323,8 @@ function buildDeepgramWsUrl(config: VoiceConfig): string {
|
|
|
299
323
|
encoding: ENCODING,
|
|
300
324
|
sample_rate: String(SAMPLE_RATE),
|
|
301
325
|
channels: String(CHANNELS),
|
|
302
|
-
endpointing: "300",
|
|
303
|
-
utterance_end_ms: "1000",
|
|
326
|
+
endpointing: "300",
|
|
327
|
+
utterance_end_ms: "1000",
|
|
304
328
|
language: config.language || "en",
|
|
305
329
|
model: config.model || "nova-3",
|
|
306
330
|
smart_format: "true",
|
|
@@ -313,7 +337,7 @@ function startStreamingSession(
|
|
|
313
337
|
config: VoiceConfig,
|
|
314
338
|
callbacks: {
|
|
315
339
|
onTranscript: (interim: string, finals: string[]) => void;
|
|
316
|
-
onDone: (fullText: string) => void;
|
|
340
|
+
onDone: (fullText: string, meta: { hadAudio: boolean; hadSpeech: boolean }) => void;
|
|
317
341
|
onError: (err: string) => void;
|
|
318
342
|
},
|
|
319
343
|
): StreamingSession | null {
|
|
@@ -328,7 +352,6 @@ function startStreamingSession(
|
|
|
328
352
|
return null;
|
|
329
353
|
}
|
|
330
354
|
|
|
331
|
-
// Start SoX streaming raw PCM to stdout (no file)
|
|
332
355
|
const recProc = spawn("rec", [
|
|
333
356
|
"-q",
|
|
334
357
|
"-r", String(SAMPLE_RATE),
|
|
@@ -336,12 +359,11 @@ function startStreamingSession(
|
|
|
336
359
|
"-b", "16",
|
|
337
360
|
"-e", "signed-integer",
|
|
338
361
|
"-t", "raw",
|
|
339
|
-
"-",
|
|
362
|
+
"-",
|
|
340
363
|
], { stdio: ["pipe", "pipe", "pipe"] });
|
|
341
364
|
|
|
342
|
-
recProc.stderr?.on("data", () => {});
|
|
365
|
+
recProc.stderr?.on("data", () => {});
|
|
343
366
|
|
|
344
|
-
// Connect WebSocket to Deepgram
|
|
345
367
|
const wsUrl = buildDeepgramWsUrl(config);
|
|
346
368
|
const ws = new WebSocket(wsUrl, {
|
|
347
369
|
headers: {
|
|
@@ -356,25 +378,25 @@ function startStreamingSession(
|
|
|
356
378
|
finalizedParts: [],
|
|
357
379
|
keepAliveTimer: null,
|
|
358
380
|
closed: false,
|
|
381
|
+
hadAudioData: false,
|
|
382
|
+
hadSpeech: false,
|
|
359
383
|
onTranscript: callbacks.onTranscript,
|
|
360
384
|
onDone: callbacks.onDone,
|
|
361
385
|
onError: callbacks.onError,
|
|
362
386
|
};
|
|
363
387
|
|
|
364
388
|
ws.onopen = () => {
|
|
365
|
-
// Send initial KeepAlive
|
|
366
389
|
try { ws.send(JSON.stringify({ type: "KeepAlive" })); } catch {}
|
|
367
390
|
|
|
368
|
-
// Start keepalive timer
|
|
369
391
|
session.keepAliveTimer = setInterval(() => {
|
|
370
392
|
if (ws.readyState === WebSocket.OPEN) {
|
|
371
393
|
try { ws.send(JSON.stringify({ type: "KeepAlive" })); } catch {}
|
|
372
394
|
}
|
|
373
395
|
}, KEEPALIVE_INTERVAL_MS);
|
|
374
396
|
|
|
375
|
-
// Pipe SoX stdout → WebSocket as binary frames
|
|
376
397
|
recProc.stdout?.on("data", (chunk: Buffer) => {
|
|
377
398
|
if (ws.readyState === WebSocket.OPEN) {
|
|
399
|
+
session.hadAudioData = true;
|
|
378
400
|
try { ws.send(chunk); } catch {}
|
|
379
401
|
}
|
|
380
402
|
});
|
|
@@ -389,38 +411,27 @@ function startStreamingSession(
|
|
|
389
411
|
const alt = msg.channel?.alternatives?.[0];
|
|
390
412
|
const transcript = alt?.transcript || "";
|
|
391
413
|
|
|
414
|
+
if (transcript.trim()) {
|
|
415
|
+
session.hadSpeech = true;
|
|
416
|
+
}
|
|
417
|
+
|
|
392
418
|
if (msg.is_final) {
|
|
393
|
-
// Final result for this audio segment
|
|
394
419
|
if (transcript.trim()) {
|
|
395
420
|
session.finalizedParts.push(transcript.trim());
|
|
396
421
|
}
|
|
397
422
|
session.interimText = "";
|
|
398
423
|
} else {
|
|
399
|
-
// Interim result — live update
|
|
400
424
|
session.interimText = transcript;
|
|
401
425
|
}
|
|
402
426
|
|
|
403
427
|
session.onTranscript(session.interimText, session.finalizedParts);
|
|
404
|
-
|
|
405
|
-
// If speech_final is true, it's the end of an utterance
|
|
406
|
-
// (similar to TranscriptEndpoint in Claude Code's protocol)
|
|
407
|
-
if (msg.speech_final && transcript.trim()) {
|
|
408
|
-
// Already added to finalizedParts above when is_final was true
|
|
409
|
-
}
|
|
410
|
-
} else if (msg.type === "Metadata") {
|
|
411
|
-
// Connection metadata — ignore
|
|
412
|
-
} else if (msg.type === "UtteranceEnd") {
|
|
413
|
-
// Utterance boundary — Deepgram detected end of speech
|
|
414
|
-
// Nothing extra needed, is_final already handles finalization
|
|
415
428
|
} else if (msg.type === "Error" || msg.type === "error") {
|
|
416
429
|
session.onError(msg.message || msg.description || "Deepgram error");
|
|
417
430
|
}
|
|
418
|
-
} catch
|
|
419
|
-
// Ignore parse errors for binary data
|
|
420
|
-
}
|
|
431
|
+
} catch {}
|
|
421
432
|
};
|
|
422
433
|
|
|
423
|
-
ws.onerror = (
|
|
434
|
+
ws.onerror = () => {
|
|
424
435
|
if (!session.closed) {
|
|
425
436
|
session.onError("WebSocket connection error");
|
|
426
437
|
}
|
|
@@ -437,7 +448,6 @@ function startStreamingSession(
|
|
|
437
448
|
});
|
|
438
449
|
|
|
439
450
|
recProc.on("close", () => {
|
|
440
|
-
// SoX stopped — send CloseStream to Deepgram
|
|
441
451
|
if (ws.readyState === WebSocket.OPEN) {
|
|
442
452
|
try { ws.send(JSON.stringify({ type: "CloseStream" })); } catch {}
|
|
443
453
|
}
|
|
@@ -449,22 +459,20 @@ function startStreamingSession(
|
|
|
449
459
|
function stopStreamingSession(session: StreamingSession): void {
|
|
450
460
|
if (session.closed) return;
|
|
451
461
|
|
|
452
|
-
// Stop the microphone
|
|
453
462
|
try { session.recProcess.kill("SIGTERM"); } catch {}
|
|
454
463
|
|
|
455
|
-
// CloseStream tells Deepgram to flush remaining audio
|
|
456
464
|
if (session.ws.readyState === WebSocket.OPEN) {
|
|
457
465
|
try { session.ws.send(JSON.stringify({ type: "CloseStream" })); } catch {}
|
|
458
466
|
}
|
|
459
467
|
|
|
460
|
-
// Safety
|
|
468
|
+
// Safety timeout
|
|
461
469
|
setTimeout(() => {
|
|
462
470
|
if (!session.closed) {
|
|
463
471
|
finalizeSession(session);
|
|
464
472
|
}
|
|
465
473
|
}, FINALIZE_SAFETY_TIMEOUT_MS);
|
|
466
474
|
|
|
467
|
-
//
|
|
475
|
+
// Quick finalize if no new data
|
|
468
476
|
let lastDataTime = Date.now();
|
|
469
477
|
const origOnMessage = session.ws.onmessage;
|
|
470
478
|
session.ws.onmessage = (event: MessageEvent) => {
|
|
@@ -486,21 +494,32 @@ function finalizeSession(session: StreamingSession): void {
|
|
|
486
494
|
if (session.closed) return;
|
|
487
495
|
session.closed = true;
|
|
488
496
|
|
|
489
|
-
// Clean up keepalive
|
|
490
497
|
if (session.keepAliveTimer) {
|
|
491
498
|
clearInterval(session.keepAliveTimer);
|
|
492
499
|
session.keepAliveTimer = null;
|
|
493
500
|
}
|
|
494
501
|
|
|
495
|
-
// Close WebSocket
|
|
496
502
|
try { session.ws.close(); } catch {}
|
|
497
|
-
|
|
498
|
-
// Kill SoX if still running
|
|
499
503
|
try { session.recProcess.kill("SIGKILL"); } catch {}
|
|
500
504
|
|
|
501
|
-
// Deliver final transcript
|
|
502
505
|
const fullText = session.finalizedParts.join(" ").trim();
|
|
503
|
-
session.onDone(fullText
|
|
506
|
+
session.onDone(fullText, {
|
|
507
|
+
hadAudio: session.hadAudioData,
|
|
508
|
+
hadSpeech: session.hadSpeech,
|
|
509
|
+
});
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// ─── Abort helper — nuke everything synchronously ────────────────────────────
|
|
513
|
+
|
|
514
|
+
function abortSession(session: StreamingSession | null): void {
|
|
515
|
+
if (!session || session.closed) return;
|
|
516
|
+
session.closed = true;
|
|
517
|
+
if (session.keepAliveTimer) {
|
|
518
|
+
clearInterval(session.keepAliveTimer);
|
|
519
|
+
session.keepAliveTimer = null;
|
|
520
|
+
}
|
|
521
|
+
try { session.ws.close(); } catch {}
|
|
522
|
+
try { session.recProcess.kill("SIGKILL"); } catch {}
|
|
504
523
|
}
|
|
505
524
|
|
|
506
525
|
// ─── Extension ───────────────────────────────────────────────────────────────
|
|
@@ -515,11 +534,20 @@ export default function (pi: ExtensionAPI) {
|
|
|
515
534
|
let recordingStart = 0;
|
|
516
535
|
let statusTimer: ReturnType<typeof setInterval> | null = null;
|
|
517
536
|
let terminalInputUnsub: (() => void) | null = null;
|
|
518
|
-
let isHolding = false;
|
|
519
537
|
|
|
520
538
|
// Streaming session state
|
|
521
539
|
let activeSession: StreamingSession | null = null;
|
|
522
540
|
let currentTarget: "editor" | "btw" = "editor";
|
|
541
|
+
let retryAttempts = 0;
|
|
542
|
+
let lastStopTime = 0; // For corruption guard
|
|
543
|
+
|
|
544
|
+
// Hold-to-talk state
|
|
545
|
+
let kittyReleaseDetected = false;
|
|
546
|
+
let spaceDownTime: number | null = null;
|
|
547
|
+
let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
|
|
548
|
+
let spaceConsumed = false; // True once threshold passed and recording started
|
|
549
|
+
let releaseDetectTimer: ReturnType<typeof setTimeout> | null = null;
|
|
550
|
+
let warmupWidgetTimer: ReturnType<typeof setInterval> | null = null;
|
|
523
551
|
|
|
524
552
|
// ─── BTW State ───────────────────────────────────────────────────────────
|
|
525
553
|
|
|
@@ -548,13 +576,16 @@ export default function (pi: ExtensionAPI) {
|
|
|
548
576
|
ctx.ui.setStatus("voice", `MIC ${modeTag}`);
|
|
549
577
|
break;
|
|
550
578
|
}
|
|
579
|
+
case "warmup":
|
|
580
|
+
ctx.ui.setStatus("voice", "🎙️ HOLD...");
|
|
581
|
+
break;
|
|
551
582
|
case "recording": {
|
|
552
583
|
const secs = Math.round((Date.now() - recordingStart) / 1000);
|
|
553
584
|
ctx.ui.setStatus("voice", `🔴 REC ${secs}s`);
|
|
554
585
|
break;
|
|
555
586
|
}
|
|
556
|
-
case "
|
|
557
|
-
ctx.ui.setStatus("voice", "STT...");
|
|
587
|
+
case "finalizing":
|
|
588
|
+
ctx.ui.setStatus("voice", "⏳ STT...");
|
|
558
589
|
break;
|
|
559
590
|
}
|
|
560
591
|
}
|
|
@@ -564,20 +595,57 @@ export default function (pi: ExtensionAPI) {
|
|
|
564
595
|
updateVoiceStatus();
|
|
565
596
|
}
|
|
566
597
|
|
|
598
|
+
// ─── Cleanup helpers ─────────────────────────────────────────────────────
|
|
599
|
+
|
|
600
|
+
function clearHoldTimer() {
|
|
601
|
+
if (holdActivationTimer) {
|
|
602
|
+
clearTimeout(holdActivationTimer);
|
|
603
|
+
holdActivationTimer = null;
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
function clearReleaseTimer() {
|
|
608
|
+
if (releaseDetectTimer) {
|
|
609
|
+
clearTimeout(releaseDetectTimer);
|
|
610
|
+
releaseDetectTimer = null;
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
function clearWarmupWidget() {
|
|
615
|
+
if (warmupWidgetTimer) {
|
|
616
|
+
clearInterval(warmupWidgetTimer);
|
|
617
|
+
warmupWidgetTimer = null;
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
function clearRecordingAnimTimer() {
|
|
622
|
+
const timer = (showRecordingWidget as any)?._animTimer;
|
|
623
|
+
if (timer) {
|
|
624
|
+
clearInterval(timer);
|
|
625
|
+
(showRecordingWidget as any)._animTimer = null;
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
function hideWidget() {
|
|
630
|
+
if (ctx?.hasUI) ctx.ui.setWidget("voice-recording", undefined);
|
|
631
|
+
}
|
|
632
|
+
|
|
567
633
|
function voiceCleanup() {
|
|
568
634
|
if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
|
|
569
635
|
clearHoldTimer();
|
|
570
636
|
clearReleaseTimer();
|
|
571
|
-
|
|
637
|
+
clearWarmupWidget();
|
|
638
|
+
clearRecordingAnimTimer();
|
|
572
639
|
if (activeSession) {
|
|
573
|
-
|
|
640
|
+
abortSession(activeSession);
|
|
574
641
|
activeSession = null;
|
|
575
642
|
}
|
|
576
643
|
if (legacyRecProcess) { legacyRecProcess.kill("SIGTERM"); legacyRecProcess = null; }
|
|
577
644
|
if (tempFile) { try { fs.unlinkSync(tempFile); } catch {} tempFile = null; }
|
|
578
|
-
isHolding = false;
|
|
579
645
|
spaceConsumed = false;
|
|
580
646
|
spaceDownTime = null;
|
|
647
|
+
retryAttempts = 0;
|
|
648
|
+
hideWidget();
|
|
581
649
|
setVoiceState("idle");
|
|
582
650
|
}
|
|
583
651
|
|
|
@@ -609,58 +677,64 @@ export default function (pi: ExtensionAPI) {
|
|
|
609
677
|
].join("\n"), validated ? "info" : "warning");
|
|
610
678
|
}
|
|
611
679
|
|
|
612
|
-
// ───
|
|
680
|
+
// ─── Warmup Widget ──────────────────────────────────────────────────────
|
|
681
|
+
//
|
|
682
|
+
// During the 500ms hold threshold, show a subtle "keep holding…" hint
|
|
683
|
+
// with a progress indicator. This matches Claude Code's warmup pattern.
|
|
613
684
|
|
|
614
|
-
|
|
615
|
-
function showHoldHintWidget() {
|
|
685
|
+
function showWarmupWidget() {
|
|
616
686
|
if (!ctx?.hasUI) return;
|
|
617
|
-
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
618
|
-
return {
|
|
619
|
-
invalidate() {},
|
|
620
|
-
render(width: number): string[] {
|
|
621
|
-
const bar = theme.fg("muted", "─".repeat(Math.min(width - 2, 60)));
|
|
622
|
-
return [
|
|
623
|
-
bar,
|
|
624
|
-
theme.fg("dim", " Hold " + theme.bold("SPACE") + " for voice input..."),
|
|
625
|
-
bar,
|
|
626
|
-
];
|
|
627
|
-
},
|
|
628
|
-
};
|
|
629
|
-
}, { placement: "aboveEditor" });
|
|
630
|
-
}
|
|
631
687
|
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
688
|
+
const startTime = Date.now();
|
|
689
|
+
|
|
690
|
+
const renderWarmup = () => {
|
|
691
|
+
if (!ctx?.hasUI) return;
|
|
692
|
+
const elapsed = Date.now() - startTime;
|
|
693
|
+
const progress = Math.min(elapsed / HOLD_THRESHOLD_MS, 1);
|
|
694
|
+
const barLen = 20;
|
|
695
|
+
const filled = Math.round(progress * barLen);
|
|
696
|
+
const empty = barLen - filled;
|
|
697
|
+
|
|
698
|
+
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
699
|
+
return {
|
|
700
|
+
invalidate() {},
|
|
701
|
+
render(width: number): string[] {
|
|
702
|
+
const maxW = Math.min(width - 2, 60);
|
|
703
|
+
const bar = theme.fg("accent", "█".repeat(filled)) + theme.fg("muted", "░".repeat(empty));
|
|
704
|
+
const hint = progress < 0.6
|
|
705
|
+
? theme.fg("dim", "Keep holding " + theme.bold("SPACE") + " for voice…")
|
|
706
|
+
: theme.fg("accent", "Almost there… keep holding…");
|
|
707
|
+
const border = theme.fg("border", "─".repeat(maxW));
|
|
708
|
+
return [border, ` ${bar} ${hint}`, border];
|
|
709
|
+
},
|
|
710
|
+
};
|
|
711
|
+
}, { placement: "aboveEditor" });
|
|
712
|
+
};
|
|
713
|
+
|
|
714
|
+
renderWarmup();
|
|
715
|
+
warmupWidgetTimer = setInterval(renderWarmup, 50);
|
|
635
716
|
}
|
|
636
717
|
|
|
637
|
-
|
|
718
|
+
// ─── Recording Widget ───────────────────────────────────────────────────
|
|
719
|
+
|
|
720
|
+
const waveChars = ["▁", "▂", "▃", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
|
|
721
|
+
|
|
638
722
|
function showRecordingWidget(target: "editor" | "btw") {
|
|
639
723
|
if (!ctx?.hasUI) return;
|
|
640
724
|
|
|
641
|
-
// Store initial state — once live transcription arrives,
|
|
642
|
-
// updateLiveTranscriptWidget takes over and we stop the animation.
|
|
643
|
-
(showRecordingWidget as any)._target = target;
|
|
644
725
|
(showRecordingWidget as any)._frame = 0;
|
|
645
726
|
(showRecordingWidget as any)._hasTranscript = false;
|
|
646
727
|
|
|
647
|
-
// Animate the widget every 300ms (only while no transcript is showing)
|
|
648
728
|
const animTimer = setInterval(() => {
|
|
649
|
-
// Stop animating once live transcript takes over
|
|
650
729
|
if ((showRecordingWidget as any)?._hasTranscript) return;
|
|
651
|
-
|
|
652
730
|
(showRecordingWidget as any)._frame = ((showRecordingWidget as any)._frame || 0) + 1;
|
|
653
731
|
showRecordingWidgetFrame(target, (showRecordingWidget as any)._frame);
|
|
654
732
|
}, 300);
|
|
655
733
|
|
|
656
|
-
// Store the timer so we can clean it up
|
|
657
734
|
(showRecordingWidget as any)._animTimer = animTimer;
|
|
658
|
-
|
|
659
735
|
showRecordingWidgetFrame(target, 0);
|
|
660
736
|
}
|
|
661
737
|
|
|
662
|
-
const waveChars = ["▁", "▂", "▃", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
|
|
663
|
-
|
|
664
738
|
function showRecordingWidgetFrame(target: "editor" | "btw", frame: number) {
|
|
665
739
|
if (!ctx?.hasUI) return;
|
|
666
740
|
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
@@ -673,7 +747,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
673
747
|
const secs = elapsed % 60;
|
|
674
748
|
const timeStr = mins > 0 ? `${mins}:${String(secs).padStart(2, "0")}` : `${secs}s`;
|
|
675
749
|
|
|
676
|
-
// Animated waveform
|
|
677
750
|
const waveLen = 12;
|
|
678
751
|
let wave = "";
|
|
679
752
|
for (let i = 0; i < waveLen; i++) {
|
|
@@ -702,33 +775,25 @@ export default function (pi: ExtensionAPI) {
|
|
|
702
775
|
? theme.fg("dim", " Release SPACE to finalize")
|
|
703
776
|
: theme.fg("dim", " Release SPACE to stop");
|
|
704
777
|
|
|
705
|
-
|
|
778
|
+
return [
|
|
706
779
|
topBorder,
|
|
707
780
|
theme.fg("borderAccent", "│") + pad(titleLine, maxW) + theme.fg("borderAccent", "│"),
|
|
708
781
|
theme.fg("borderAccent", "│") + pad(hint, maxW) + theme.fg("borderAccent", "│"),
|
|
709
782
|
botBorder,
|
|
710
783
|
];
|
|
711
|
-
return lines;
|
|
712
784
|
},
|
|
713
785
|
};
|
|
714
786
|
}, { placement: "aboveEditor" });
|
|
715
787
|
}
|
|
716
788
|
|
|
717
|
-
|
|
718
|
-
const timer = (showRecordingWidget as any)?._animTimer;
|
|
719
|
-
if (timer) {
|
|
720
|
-
clearInterval(timer);
|
|
721
|
-
(showRecordingWidget as any)._animTimer = null;
|
|
722
|
-
}
|
|
723
|
-
}
|
|
789
|
+
// ─── Live Transcript Widget ─────────────────────────────────────────────
|
|
724
790
|
|
|
725
|
-
/** Show live transcript inside a themed box */
|
|
726
791
|
function updateLiveTranscriptWidget(interim: string, finals: string[]) {
|
|
727
792
|
if (!ctx?.hasUI) return;
|
|
728
793
|
|
|
729
|
-
// Stop the
|
|
794
|
+
// Stop the waveform animation — live transcript takes over
|
|
730
795
|
(showRecordingWidget as any)._hasTranscript = true;
|
|
731
|
-
|
|
796
|
+
clearRecordingAnimTimer();
|
|
732
797
|
|
|
733
798
|
const finalized = finals.join(" ");
|
|
734
799
|
const displayText = finalized + (interim ? (finalized ? " " : "") + interim : "");
|
|
@@ -756,15 +821,14 @@ export default function (pi: ExtensionAPI) {
|
|
|
756
821
|
const label = theme.bold(theme.fg("accent", " VOICE "));
|
|
757
822
|
const timeStyled = theme.fg("muted", timeStr);
|
|
758
823
|
const titleLine = ` ${dot} ${label} ${timeStyled}`;
|
|
759
|
-
const hint = theme.fg("dim", " Release SPACE to
|
|
824
|
+
const hint = theme.fg("dim", " Release SPACE to finalize");
|
|
760
825
|
const lines = [topBorder, side(titleLine)];
|
|
761
826
|
|
|
762
827
|
if (!displayText.trim()) {
|
|
763
|
-
lines.push(side(theme.fg("dim", " Listening
|
|
828
|
+
lines.push(side(theme.fg("dim", " Listening… speak now")));
|
|
764
829
|
} else {
|
|
765
830
|
lines.push(sep);
|
|
766
|
-
|
|
767
|
-
const innerMax = maxW - 4; // padding inside box
|
|
831
|
+
const innerMax = maxW - 4;
|
|
768
832
|
const words = displayText.split(" ");
|
|
769
833
|
const wrappedLines: string[] = [];
|
|
770
834
|
let currentLine = "";
|
|
@@ -779,11 +843,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
779
843
|
}
|
|
780
844
|
if (currentLine) wrappedLines.push(currentLine);
|
|
781
845
|
|
|
782
|
-
// Show last 3 lines of transcript
|
|
783
846
|
const visible = wrappedLines.slice(-3);
|
|
784
847
|
for (let i = 0; i < visible.length; i++) {
|
|
785
848
|
let line = visible[i];
|
|
786
|
-
// Style: finalized parts in normal text, interim in accent
|
|
787
849
|
if (i === visible.length - 1 && interim) {
|
|
788
850
|
line = theme.fg("text", line) + theme.fg("accent", "▍");
|
|
789
851
|
} else {
|
|
@@ -801,8 +863,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
801
863
|
}, { placement: "aboveEditor" });
|
|
802
864
|
}
|
|
803
865
|
|
|
804
|
-
|
|
805
|
-
|
|
866
|
+
// ─── Finalizing Widget ──────────────────────────────────────────────────
|
|
867
|
+
|
|
868
|
+
function showFinalizingWidget() {
|
|
806
869
|
if (!ctx?.hasUI) return;
|
|
807
870
|
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
808
871
|
return {
|
|
@@ -818,7 +881,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
818
881
|
};
|
|
819
882
|
const spinner = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
|
820
883
|
const idx = Math.floor(Date.now() / 100) % spinner.length;
|
|
821
|
-
const line = ` ${theme.fg("accent", spinner[idx])} ${theme.fg("dim", "Finalizing transcription
|
|
884
|
+
const line = ` ${theme.fg("accent", spinner[idx])} ${theme.fg("dim", "Finalizing transcription…")}`;
|
|
822
885
|
return [topBorder, side(line), botBorder];
|
|
823
886
|
},
|
|
824
887
|
};
|
|
@@ -828,104 +891,147 @@ export default function (pi: ExtensionAPI) {
|
|
|
828
891
|
// ─── Voice: Start / Stop (Streaming or Legacy) ───────────────────────────
|
|
829
892
|
|
|
830
893
|
async function startVoiceRecording(target: "editor" | "btw" = "editor"): Promise<boolean> {
|
|
831
|
-
if (
|
|
894
|
+
if (!ctx) return false;
|
|
895
|
+
|
|
896
|
+
// ── SESSION CORRUPTION GUARD ──
|
|
897
|
+
// If we're still finalizing from a previous recording, abort it first.
|
|
898
|
+
// This prevents the "slow connection overlaps new recording" bug.
|
|
899
|
+
if (voiceState === "finalizing" || voiceState === "recording") {
|
|
900
|
+
abortSession(activeSession);
|
|
901
|
+
activeSession = null;
|
|
902
|
+
clearRecordingAnimTimer();
|
|
903
|
+
clearWarmupWidget();
|
|
904
|
+
hideWidget();
|
|
905
|
+
setVoiceState("idle");
|
|
906
|
+
// Brief pause to let resources release
|
|
907
|
+
await new Promise((r) => setTimeout(r, CORRUPTION_GUARD_MS));
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
// ── STALE TRANSCRIPT CLEANUP ──
|
|
911
|
+
// Clear any prior transcript from the widget
|
|
912
|
+
hideWidget();
|
|
832
913
|
|
|
833
914
|
currentTarget = target;
|
|
834
915
|
recordingStart = Date.now();
|
|
916
|
+
retryAttempts = 0;
|
|
835
917
|
|
|
836
918
|
if (isDeepgramStreaming(config)) {
|
|
837
|
-
|
|
838
|
-
|
|
919
|
+
return startStreamingRecording(target);
|
|
920
|
+
} else {
|
|
921
|
+
return startLegacyRecording(target);
|
|
922
|
+
}
|
|
923
|
+
}
|
|
839
924
|
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
updateLiveTranscriptWidget(interim, finals);
|
|
843
|
-
updateVoiceStatus();
|
|
844
|
-
},
|
|
845
|
-
onDone: (fullText) => {
|
|
846
|
-
activeSession = null;
|
|
847
|
-
stopRecordingWidgetAnimation();
|
|
848
|
-
ctx?.ui.setWidget("voice-recording", undefined);
|
|
925
|
+
async function startStreamingRecording(target: "editor" | "btw"): Promise<boolean> {
|
|
926
|
+
setVoiceState("recording");
|
|
849
927
|
|
|
850
|
-
|
|
928
|
+
const session = startStreamingSession(config, {
|
|
929
|
+
onTranscript: (interim, finals) => {
|
|
930
|
+
// Live transcript update — this is the key UX feature
|
|
931
|
+
updateLiveTranscriptWidget(interim, finals);
|
|
932
|
+
updateVoiceStatus();
|
|
933
|
+
},
|
|
934
|
+
onDone: (fullText, meta) => {
|
|
935
|
+
activeSession = null;
|
|
936
|
+
clearRecordingAnimTimer();
|
|
937
|
+
hideWidget();
|
|
938
|
+
lastStopTime = Date.now();
|
|
939
|
+
|
|
940
|
+
if (!fullText.trim()) {
|
|
941
|
+
// ── DISTINGUISH SILENCE VS NO SPEECH ──
|
|
942
|
+
if (!meta.hadAudio) {
|
|
943
|
+
ctx?.ui.notify("Microphone captured no audio. Check mic permissions.", "error");
|
|
944
|
+
} else if (!meta.hadSpeech) {
|
|
945
|
+
ctx?.ui.notify("Microphone captured silence — no speech detected.", "warning");
|
|
946
|
+
} else {
|
|
851
947
|
ctx?.ui.notify("No speech detected.", "warning");
|
|
852
|
-
setVoiceState("idle");
|
|
853
|
-
return;
|
|
854
948
|
}
|
|
949
|
+
setVoiceState("idle");
|
|
950
|
+
return;
|
|
951
|
+
}
|
|
855
952
|
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
}
|
|
953
|
+
if (target === "btw") {
|
|
954
|
+
handleBtw(fullText);
|
|
955
|
+
} else {
|
|
956
|
+
if (ctx?.hasUI) {
|
|
957
|
+
const existing = ctx.ui.getEditorText();
|
|
958
|
+
ctx.ui.setEditorText(existing ? existing + " " + fullText : fullText);
|
|
959
|
+
const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
|
|
960
|
+
ctx.ui.notify(
|
|
961
|
+
`STT (${elapsed}s): ${fullText.slice(0, 80)}${fullText.length > 80 ? "…" : ""}`,
|
|
962
|
+
"info",
|
|
963
|
+
);
|
|
868
964
|
}
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
965
|
+
}
|
|
966
|
+
setVoiceState("idle");
|
|
967
|
+
},
|
|
968
|
+
onError: (err) => {
|
|
969
|
+
activeSession = null;
|
|
970
|
+
clearRecordingAnimTimer();
|
|
971
|
+
hideWidget();
|
|
972
|
+
|
|
973
|
+
// ── TRANSIENT FAILURE RETRY ──
|
|
974
|
+
// On WebSocket error during rapid push-to-talk re-press, auto-retry
|
|
975
|
+
if (retryAttempts < MAX_RETRY_ATTEMPTS) {
|
|
976
|
+
retryAttempts++;
|
|
977
|
+
ctx?.ui.notify(`Voice connection error — retrying (${retryAttempts}/${MAX_RETRY_ATTEMPTS})…`, "warning");
|
|
978
|
+
setTimeout(() => {
|
|
979
|
+
if (voiceState !== "idle") {
|
|
980
|
+
setVoiceState("idle");
|
|
981
|
+
}
|
|
982
|
+
startStreamingRecording(target);
|
|
983
|
+
}, RETRY_DELAY_MS);
|
|
984
|
+
return;
|
|
985
|
+
}
|
|
879
986
|
|
|
880
|
-
|
|
987
|
+
ctx?.ui.notify(`STT error: ${err}`, "error");
|
|
881
988
|
setVoiceState("idle");
|
|
882
|
-
|
|
883
|
-
|
|
989
|
+
},
|
|
990
|
+
});
|
|
991
|
+
|
|
992
|
+
if (!session) {
|
|
993
|
+
setVoiceState("idle");
|
|
994
|
+
return false;
|
|
995
|
+
}
|
|
884
996
|
|
|
885
|
-
|
|
997
|
+
activeSession = session;
|
|
886
998
|
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
stopVoiceRecording(target);
|
|
895
|
-
}
|
|
999
|
+
// Status timer for elapsed time
|
|
1000
|
+
statusTimer = setInterval(() => {
|
|
1001
|
+
if (voiceState === "recording") {
|
|
1002
|
+
updateVoiceStatus();
|
|
1003
|
+
const elapsed = (Date.now() - recordingStart) / 1000;
|
|
1004
|
+
if (elapsed >= MAX_RECORDING_SECS) {
|
|
1005
|
+
stopVoiceRecording(target);
|
|
896
1006
|
}
|
|
897
|
-
}
|
|
1007
|
+
}
|
|
1008
|
+
}, 1000);
|
|
898
1009
|
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
1010
|
+
showRecordingWidget(target);
|
|
1011
|
+
return true;
|
|
1012
|
+
}
|
|
902
1013
|
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
1014
|
+
async function startLegacyRecording(target: "editor" | "btw"): Promise<boolean> {
|
|
1015
|
+
if (!ctx) return false;
|
|
1016
|
+
tempFile = path.join(os.tmpdir(), `pi-voice-${Date.now()}.wav`);
|
|
1017
|
+
if (!startLegacyRecordingToFile(tempFile)) {
|
|
1018
|
+
ctx.ui.notify("Voice requires SoX. Install: brew install sox", "error");
|
|
1019
|
+
return false;
|
|
1020
|
+
}
|
|
910
1021
|
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
stopVoiceRecording(target);
|
|
919
|
-
}
|
|
1022
|
+
setVoiceState("recording");
|
|
1023
|
+
statusTimer = setInterval(() => {
|
|
1024
|
+
if (voiceState === "recording") {
|
|
1025
|
+
updateVoiceStatus();
|
|
1026
|
+
const elapsed = (Date.now() - recordingStart) / 1000;
|
|
1027
|
+
if (elapsed >= MAX_RECORDING_SECS) {
|
|
1028
|
+
stopVoiceRecording(target);
|
|
920
1029
|
}
|
|
921
|
-
}, 1000);
|
|
922
|
-
|
|
923
|
-
if (ctx.hasUI) {
|
|
924
|
-
// Show themed recording widget for legacy path
|
|
925
|
-
showRecordingWidget(target);
|
|
926
1030
|
}
|
|
927
|
-
|
|
928
|
-
|
|
1031
|
+
}, 1000);
|
|
1032
|
+
|
|
1033
|
+
showRecordingWidget(target);
|
|
1034
|
+
return true;
|
|
929
1035
|
}
|
|
930
1036
|
|
|
931
1037
|
async function stopVoiceRecording(target: "editor" | "btw" = "editor") {
|
|
@@ -933,34 +1039,35 @@ export default function (pi: ExtensionAPI) {
|
|
|
933
1039
|
if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
|
|
934
1040
|
|
|
935
1041
|
if (activeSession) {
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
showTranscribingWidget();
|
|
1042
|
+
setVoiceState("finalizing");
|
|
1043
|
+
clearRecordingAnimTimer();
|
|
1044
|
+
showFinalizingWidget();
|
|
940
1045
|
stopStreamingSession(activeSession);
|
|
941
1046
|
return;
|
|
942
1047
|
}
|
|
943
1048
|
|
|
944
|
-
//
|
|
1049
|
+
// Legacy path
|
|
945
1050
|
const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
|
|
946
1051
|
const audioFile = tempFile;
|
|
947
|
-
setVoiceState("
|
|
948
|
-
|
|
949
|
-
|
|
1052
|
+
setVoiceState("finalizing");
|
|
1053
|
+
clearRecordingAnimTimer();
|
|
1054
|
+
showFinalizingWidget();
|
|
950
1055
|
|
|
951
1056
|
await stopLegacyRecording();
|
|
952
1057
|
|
|
953
1058
|
if (!audioFile || !fs.existsSync(audioFile)) {
|
|
954
1059
|
ctx.ui.notify("No audio recorded.", "warning");
|
|
1060
|
+
hideWidget();
|
|
955
1061
|
setVoiceState("idle");
|
|
956
1062
|
return;
|
|
957
1063
|
}
|
|
958
1064
|
|
|
959
1065
|
const stats = fs.statSync(audioFile);
|
|
960
1066
|
if (stats.size < 1000) {
|
|
961
|
-
ctx.ui.notify("Recording too short.", "warning");
|
|
1067
|
+
ctx.ui.notify("Recording too short — mic captured silence.", "warning");
|
|
962
1068
|
try { fs.unlinkSync(audioFile); } catch {}
|
|
963
1069
|
tempFile = null;
|
|
1070
|
+
hideWidget();
|
|
964
1071
|
setVoiceState("idle");
|
|
965
1072
|
return;
|
|
966
1073
|
}
|
|
@@ -971,6 +1078,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
971
1078
|
try { fs.unlinkSync(audioFile); } catch {}
|
|
972
1079
|
if (tempFile === audioFile) tempFile = null;
|
|
973
1080
|
|
|
1081
|
+
hideWidget();
|
|
1082
|
+
|
|
974
1083
|
if (result.error) {
|
|
975
1084
|
ctx.ui.notify(`STT error: ${result.error}`, "error");
|
|
976
1085
|
setVoiceState("idle");
|
|
@@ -991,7 +1100,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
991
1100
|
const existing = ctx.ui.getEditorText();
|
|
992
1101
|
ctx.ui.setEditorText(existing ? existing + " " + transcript : transcript);
|
|
993
1102
|
ctx.ui.notify(
|
|
994
|
-
`STT (${elapsed}s): ${transcript.slice(0, 80)}${transcript.length > 80 ? "
|
|
1103
|
+
`STT (${elapsed}s): ${transcript.slice(0, 80)}${transcript.length > 80 ? "…" : ""}`,
|
|
995
1104
|
"info",
|
|
996
1105
|
);
|
|
997
1106
|
}
|
|
@@ -1000,83 +1109,46 @@ export default function (pi: ExtensionAPI) {
|
|
|
1000
1109
|
setVoiceState("idle");
|
|
1001
1110
|
}
|
|
1002
1111
|
|
|
1003
|
-
// ─── Hold-to-
|
|
1004
|
-
//
|
|
1005
|
-
// SPACE activates voice ONLY when:
|
|
1006
|
-
// 1. The editor is empty (no text typed yet)
|
|
1007
|
-
// 2. SPACE is held for ≥ HOLD_THRESHOLD_MS (500ms)
|
|
1112
|
+
// ─── Hold-to-Talk State Machine ─────────────────────────────────────────
|
|
1008
1113
|
//
|
|
1009
|
-
//
|
|
1010
|
-
// is typed into the editor (normal typing behavior).
|
|
1114
|
+
// SPACE key handling with strict hold-duration detection:
|
|
1011
1115
|
//
|
|
1012
|
-
//
|
|
1013
|
-
//
|
|
1014
|
-
//
|
|
1015
|
-
//
|
|
1016
|
-
//
|
|
1116
|
+
// 1. SPACE press (first) → enter "warmup" state, start 500ms timer
|
|
1117
|
+
// 2. During warmup: show progress bar, consume repeat presses
|
|
1118
|
+
// 3. Timer fires → transition to "recording", start voice capture
|
|
1119
|
+
// 4. SPACE release → stop recording, finalize
|
|
1120
|
+
// 5. If released during warmup → cancel, type a space character
|
|
1017
1121
|
//
|
|
1018
|
-
//
|
|
1019
|
-
// Hold SPACE → rapid presses arrive → first press starts 500ms timer →
|
|
1020
|
-
// timer fires → recording starts → presses keep coming (consumed) →
|
|
1021
|
-
// user releases → presses stop → 200ms silence → auto-stop recording
|
|
1022
|
-
//
|
|
1023
|
-
// Kitty protocol terminals get true key-release events and work natively.
|
|
1024
|
-
|
|
1025
|
-
const HOLD_THRESHOLD_MS = 500; // minimum hold time before voice activates
|
|
1026
|
-
const RELEASE_DETECT_MS = 200; // gap in key-repeat that means "released"
|
|
1027
|
-
let kittyReleaseDetected = false;
|
|
1028
|
-
let spaceDownTime: number | null = null;
|
|
1029
|
-
let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
|
|
1030
|
-
let spaceConsumed = false;
|
|
1031
|
-
let lastSpacePressTime = 0;
|
|
1032
|
-
let releaseDetectTimer: ReturnType<typeof setTimeout> | null = null;
|
|
1033
|
-
|
|
1034
|
-
function clearHoldTimer() {
|
|
1035
|
-
if (holdActivationTimer) {
|
|
1036
|
-
clearTimeout(holdActivationTimer);
|
|
1037
|
-
holdActivationTimer = null;
|
|
1038
|
-
}
|
|
1039
|
-
}
|
|
1040
|
-
|
|
1041
|
-
function clearReleaseTimer() {
|
|
1042
|
-
if (releaseDetectTimer) {
|
|
1043
|
-
clearTimeout(releaseDetectTimer);
|
|
1044
|
-
releaseDetectTimer = null;
|
|
1045
|
-
}
|
|
1046
|
-
}
|
|
1122
|
+
// Non-Kitty detection: rapid press events = "holding", gap > 150ms = "released"
|
|
1047
1123
|
|
|
1048
|
-
/** Called when we detect the user has released SPACE (non-Kitty) */
|
|
1049
1124
|
function onSpaceReleaseDetected() {
|
|
1050
1125
|
releaseDetectTimer = null;
|
|
1051
1126
|
|
|
1052
|
-
//
|
|
1053
|
-
if (
|
|
1127
|
+
// Released during warmup — cancel, type a space
|
|
1128
|
+
if (voiceState === "warmup") {
|
|
1054
1129
|
clearHoldTimer();
|
|
1130
|
+
clearWarmupWidget();
|
|
1131
|
+
hideWidget();
|
|
1132
|
+
setVoiceState("idle");
|
|
1055
1133
|
spaceDownTime = null;
|
|
1056
1134
|
spaceConsumed = false;
|
|
1057
|
-
// Insert a space character
|
|
1058
1135
|
if (ctx?.hasUI) {
|
|
1059
1136
|
ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1060
|
-
hideHoldHintWidget();
|
|
1061
1137
|
}
|
|
1062
1138
|
return;
|
|
1063
1139
|
}
|
|
1064
1140
|
|
|
1065
|
-
//
|
|
1141
|
+
// Released during recording — stop
|
|
1066
1142
|
if (spaceConsumed && voiceState === "recording") {
|
|
1067
|
-
isHolding = false;
|
|
1068
1143
|
spaceConsumed = false;
|
|
1069
1144
|
spaceDownTime = null;
|
|
1070
1145
|
stopVoiceRecording("editor");
|
|
1071
1146
|
}
|
|
1072
1147
|
}
|
|
1073
1148
|
|
|
1074
|
-
/** Reset the release detection timer — called on every space press */
|
|
1075
1149
|
function resetReleaseDetect() {
|
|
1076
1150
|
clearReleaseTimer();
|
|
1077
|
-
|
|
1078
|
-
// start a timer to detect release
|
|
1079
|
-
if (spaceDownTime || spaceConsumed || voiceState === "recording") {
|
|
1151
|
+
if (voiceState === "warmup" || voiceState === "recording" || spaceDownTime || spaceConsumed) {
|
|
1080
1152
|
releaseDetectTimer = setTimeout(onSpaceReleaseDetected, RELEASE_DETECT_MS);
|
|
1081
1153
|
}
|
|
1082
1154
|
}
|
|
@@ -1091,33 +1163,26 @@ export default function (pi: ExtensionAPI) {
|
|
|
1091
1163
|
|
|
1092
1164
|
// ── SPACE handling ──
|
|
1093
1165
|
if (matchesKey(data, "space")) {
|
|
1094
|
-
// RULE: If editor has content, SPACE always types a space — never voice
|
|
1095
|
-
const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
|
|
1096
|
-
if (editorText && editorText.trim().length > 0) {
|
|
1097
|
-
clearHoldTimer();
|
|
1098
|
-
clearReleaseTimer();
|
|
1099
|
-
spaceDownTime = null;
|
|
1100
|
-
spaceConsumed = false;
|
|
1101
|
-
return undefined; // let the default space character through
|
|
1102
|
-
}
|
|
1103
1166
|
|
|
1104
1167
|
// ── Kitty key-release ──
|
|
1105
1168
|
if (isKeyRelease(data)) {
|
|
1106
1169
|
kittyReleaseDetected = true;
|
|
1107
1170
|
clearReleaseTimer();
|
|
1108
1171
|
|
|
1109
|
-
// Released
|
|
1110
|
-
if (
|
|
1172
|
+
// Released during warmup → cancel, type a space
|
|
1173
|
+
if (voiceState === "warmup") {
|
|
1111
1174
|
clearHoldTimer();
|
|
1175
|
+
clearWarmupWidget();
|
|
1176
|
+
hideWidget();
|
|
1177
|
+
setVoiceState("idle");
|
|
1112
1178
|
spaceDownTime = null;
|
|
1113
1179
|
spaceConsumed = false;
|
|
1114
1180
|
if (ctx?.hasUI) ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1115
1181
|
return { consume: true };
|
|
1116
1182
|
}
|
|
1117
1183
|
|
|
1118
|
-
// Released
|
|
1184
|
+
// Released during recording → stop
|
|
1119
1185
|
if (spaceConsumed && voiceState === "recording") {
|
|
1120
|
-
isHolding = false;
|
|
1121
1186
|
spaceConsumed = false;
|
|
1122
1187
|
spaceDownTime = null;
|
|
1123
1188
|
stopVoiceRecording("editor");
|
|
@@ -1129,60 +1194,58 @@ export default function (pi: ExtensionAPI) {
|
|
|
1129
1194
|
return undefined;
|
|
1130
1195
|
}
|
|
1131
1196
|
|
|
1132
|
-
// ── Kitty key-repeat:
|
|
1197
|
+
// ── Kitty key-repeat: suppress while in warmup/recording ──
|
|
1133
1198
|
if (isKeyRepeat(data)) {
|
|
1134
|
-
if (
|
|
1135
|
-
resetReleaseDetect();
|
|
1199
|
+
if (voiceState === "warmup" || voiceState === "recording" || voiceState === "finalizing" || spaceConsumed) {
|
|
1200
|
+
resetReleaseDetect();
|
|
1136
1201
|
return { consume: true };
|
|
1137
1202
|
}
|
|
1138
1203
|
return undefined;
|
|
1139
1204
|
}
|
|
1140
1205
|
|
|
1141
1206
|
// === Key PRESS ===
|
|
1142
|
-
// In non-Kitty terminals, holding a key sends rapid press events.
|
|
1143
|
-
// We use these to detect "still holding" and the gap to detect "released".
|
|
1144
|
-
|
|
1145
|
-
// Reset release detection — user is still holding
|
|
1146
1207
|
resetReleaseDetect();
|
|
1147
1208
|
|
|
1148
|
-
// If
|
|
1149
|
-
if (voiceState === "
|
|
1209
|
+
// If finalizing → ignore
|
|
1210
|
+
if (voiceState === "finalizing") {
|
|
1150
1211
|
return { consume: true };
|
|
1151
1212
|
}
|
|
1152
1213
|
|
|
1153
|
-
// If already recording → just consume (release
|
|
1214
|
+
// If already recording → just consume (release handles stop)
|
|
1154
1215
|
if (voiceState === "recording") {
|
|
1155
1216
|
return { consume: true };
|
|
1156
1217
|
}
|
|
1157
1218
|
|
|
1158
|
-
// If
|
|
1159
|
-
if (
|
|
1219
|
+
// If already in warmup → consume (threshold timer is running)
|
|
1220
|
+
if (voiceState === "warmup") {
|
|
1221
|
+
return { consume: true };
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
// If we've already consumed space for this hold → consume
|
|
1225
|
+
if (spaceConsumed || spaceDownTime) {
|
|
1160
1226
|
return { consume: true };
|
|
1161
1227
|
}
|
|
1162
1228
|
|
|
1163
|
-
//
|
|
1229
|
+
// IDLE — first press → start warmup
|
|
1164
1230
|
if (voiceState === "idle") {
|
|
1165
1231
|
spaceDownTime = Date.now();
|
|
1166
1232
|
spaceConsumed = false;
|
|
1167
|
-
lastSpacePressTime = Date.now();
|
|
1168
1233
|
|
|
1169
|
-
//
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
}
|
|
1234
|
+
// Transition to warmup state
|
|
1235
|
+
setVoiceState("warmup");
|
|
1236
|
+
showWarmupWidget();
|
|
1173
1237
|
|
|
1174
1238
|
// After threshold: activate voice recording
|
|
1175
1239
|
holdActivationTimer = setTimeout(() => {
|
|
1176
1240
|
holdActivationTimer = null;
|
|
1177
|
-
|
|
1178
|
-
|
|
1241
|
+
if (voiceState === "warmup" && spaceDownTime) {
|
|
1242
|
+
clearWarmupWidget();
|
|
1179
1243
|
spaceConsumed = true;
|
|
1180
|
-
isHolding = true;
|
|
1181
1244
|
startVoiceRecording("editor").then((ok) => {
|
|
1182
1245
|
if (!ok) {
|
|
1183
|
-
isHolding = false;
|
|
1184
1246
|
spaceConsumed = false;
|
|
1185
1247
|
spaceDownTime = null;
|
|
1248
|
+
setVoiceState("idle");
|
|
1186
1249
|
}
|
|
1187
1250
|
});
|
|
1188
1251
|
} else {
|
|
@@ -1194,17 +1257,19 @@ export default function (pi: ExtensionAPI) {
|
|
|
1194
1257
|
return { consume: true };
|
|
1195
1258
|
}
|
|
1196
1259
|
|
|
1197
|
-
if (
|
|
1260
|
+
if (spaceConsumed) return { consume: true };
|
|
1198
1261
|
return undefined;
|
|
1199
1262
|
}
|
|
1200
1263
|
|
|
1201
|
-
// ── Any other key
|
|
1202
|
-
if (
|
|
1264
|
+
// ── Any other key during warmup → cancel hold, type a space ──
|
|
1265
|
+
if (voiceState === "warmup" && spaceDownTime && !spaceConsumed) {
|
|
1203
1266
|
clearHoldTimer();
|
|
1204
1267
|
clearReleaseTimer();
|
|
1268
|
+
clearWarmupWidget();
|
|
1269
|
+
hideWidget();
|
|
1270
|
+
setVoiceState("idle");
|
|
1205
1271
|
if (ctx?.hasUI) {
|
|
1206
1272
|
ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1207
|
-
hideHoldHintWidget();
|
|
1208
1273
|
}
|
|
1209
1274
|
spaceDownTime = null;
|
|
1210
1275
|
spaceConsumed = false;
|
|
@@ -1215,8 +1280,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1215
1280
|
if (matchesKey(data, "ctrl+shift+b")) {
|
|
1216
1281
|
if (isKeyRelease(data)) {
|
|
1217
1282
|
kittyReleaseDetected = true;
|
|
1218
|
-
if (
|
|
1219
|
-
isHolding = false;
|
|
1283
|
+
if (voiceState === "recording" && currentTarget === "btw") {
|
|
1220
1284
|
stopVoiceRecording("btw");
|
|
1221
1285
|
return { consume: true };
|
|
1222
1286
|
}
|
|
@@ -1224,25 +1288,23 @@ export default function (pi: ExtensionAPI) {
|
|
|
1224
1288
|
}
|
|
1225
1289
|
|
|
1226
1290
|
if (isKeyRepeat(data)) {
|
|
1227
|
-
if (
|
|
1291
|
+
if (voiceState === "recording" && currentTarget === "btw") return { consume: true };
|
|
1228
1292
|
return undefined;
|
|
1229
1293
|
}
|
|
1230
1294
|
|
|
1231
|
-
if (voiceState === "recording") {
|
|
1232
|
-
isHolding = false;
|
|
1295
|
+
if (voiceState === "recording" && currentTarget === "btw") {
|
|
1233
1296
|
stopVoiceRecording("btw");
|
|
1234
1297
|
return { consume: true };
|
|
1235
1298
|
}
|
|
1236
1299
|
|
|
1237
|
-
if (voiceState === "idle"
|
|
1238
|
-
|
|
1239
|
-
startVoiceRecording("btw").then((ok) => {
|
|
1240
|
-
if (!ok) isHolding = false;
|
|
1241
|
-
});
|
|
1300
|
+
if (voiceState === "idle") {
|
|
1301
|
+
startVoiceRecording("btw");
|
|
1242
1302
|
return { consume: true };
|
|
1243
1303
|
}
|
|
1244
1304
|
|
|
1245
|
-
if (
|
|
1305
|
+
if (voiceState === "recording" || voiceState === "finalizing" || voiceState === "warmup") {
|
|
1306
|
+
return { consume: true };
|
|
1307
|
+
}
|
|
1246
1308
|
return undefined;
|
|
1247
1309
|
}
|
|
1248
1310
|
|
|
@@ -1286,12 +1348,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
1286
1348
|
"",
|
|
1287
1349
|
];
|
|
1288
1350
|
|
|
1289
|
-
lines.push(` Q: ${last.question.slice(0, 100)}${last.question.length > 100 ? "
|
|
1351
|
+
lines.push(` Q: ${last.question.slice(0, 100)}${last.question.length > 100 ? "…" : ""}`);
|
|
1290
1352
|
const answerLines = last.answer.split("\n");
|
|
1291
1353
|
for (const line of answerLines.slice(0, 8)) {
|
|
1292
1354
|
lines.push(` ${line}`);
|
|
1293
1355
|
}
|
|
1294
|
-
if (answerLines.length > 8) lines.push("
|
|
1356
|
+
if (answerLines.length > 8) lines.push(" …");
|
|
1295
1357
|
|
|
1296
1358
|
lines.push("");
|
|
1297
1359
|
lines.push(" /btw:clear to dismiss | /btw:inject to send to agent");
|
|
@@ -1307,9 +1369,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
1307
1369
|
ctx.ui.setWidget("btw", [
|
|
1308
1370
|
" BTW",
|
|
1309
1371
|
"",
|
|
1310
|
-
` Q: ${message.slice(0, 100)}${message.length > 100 ? "
|
|
1372
|
+
` Q: ${message.slice(0, 100)}${message.length > 100 ? "…" : ""}`,
|
|
1311
1373
|
"",
|
|
1312
|
-
" Thinking
|
|
1374
|
+
" Thinking…",
|
|
1313
1375
|
], { placement: "aboveEditor" });
|
|
1314
1376
|
|
|
1315
1377
|
const btwContext = buildBtwContext();
|
|
@@ -1394,16 +1456,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
1394
1456
|
return;
|
|
1395
1457
|
}
|
|
1396
1458
|
if (voiceState === "idle") {
|
|
1397
|
-
// Direct start — bypass hold threshold
|
|
1398
1459
|
spaceConsumed = true;
|
|
1399
|
-
isHolding = true;
|
|
1400
1460
|
const ok = await startVoiceRecording("editor");
|
|
1401
1461
|
if (!ok) {
|
|
1402
|
-
isHolding = false;
|
|
1403
1462
|
spaceConsumed = false;
|
|
1404
1463
|
}
|
|
1405
1464
|
} else if (voiceState === "recording") {
|
|
1406
|
-
isHolding = false;
|
|
1407
1465
|
spaceConsumed = false;
|
|
1408
1466
|
spaceDownTime = null;
|
|
1409
1467
|
clearHoldTimer();
|
|
@@ -1422,9 +1480,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1422
1480
|
configSource = loaded.source;
|
|
1423
1481
|
updateSocketPath(config, currentCwd);
|
|
1424
1482
|
|
|
1425
|
-
// Auto-capture DEEPGRAM_API_KEY from env into config
|
|
1426
|
-
// This ensures streaming works even when Pi is launched from a context
|
|
1427
|
-
// that doesn't source .zshrc (GUI app, tmux, etc.)
|
|
1483
|
+
// Auto-capture DEEPGRAM_API_KEY from env into config
|
|
1428
1484
|
if (process.env.DEEPGRAM_API_KEY && !config.deepgramApiKey) {
|
|
1429
1485
|
config.deepgramApiKey = process.env.DEEPGRAM_API_KEY;
|
|
1430
1486
|
if (configSource !== "default") {
|
|
@@ -1432,7 +1488,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1432
1488
|
}
|
|
1433
1489
|
}
|
|
1434
1490
|
|
|
1435
|
-
//
|
|
1491
|
+
// Try to load DEEPGRAM_API_KEY from shell if not available
|
|
1436
1492
|
if (!resolveDeepgramApiKey(config) && config.backend === "deepgram") {
|
|
1437
1493
|
try {
|
|
1438
1494
|
const result = spawnSync("zsh", ["-ic", "echo $DEEPGRAM_API_KEY"], {
|
|
@@ -1443,7 +1499,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1443
1499
|
const shellKey = result.stdout?.toString().trim();
|
|
1444
1500
|
if (shellKey && shellKey.length > 5) {
|
|
1445
1501
|
config.deepgramApiKey = shellKey;
|
|
1446
|
-
process.env.DEEPGRAM_API_KEY = shellKey;
|
|
1502
|
+
process.env.DEEPGRAM_API_KEY = shellKey;
|
|
1447
1503
|
if (configSource !== "default") {
|
|
1448
1504
|
saveConfig(config, config.scope, currentCwd);
|
|
1449
1505
|
}
|
|
@@ -1454,7 +1510,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
1454
1510
|
if (config.enabled && config.onboarding.completed) {
|
|
1455
1511
|
updateVoiceStatus();
|
|
1456
1512
|
setupHoldToTalk();
|
|
1457
|
-
// Only start daemon for non-streaming backends
|
|
1458
1513
|
if (!isDeepgramStreaming(config)) {
|
|
1459
1514
|
ensureDaemon(config).catch(() => {});
|
|
1460
1515
|
}
|
|
@@ -1515,7 +1570,15 @@ export default function (pi: ExtensionAPI) {
|
|
|
1515
1570
|
ensureDaemon(config).catch(() => {});
|
|
1516
1571
|
}
|
|
1517
1572
|
const mode = isDeepgramStreaming(config) ? "Deepgram streaming" : config.backend;
|
|
1518
|
-
cmdCtx.ui.notify(
|
|
1573
|
+
cmdCtx.ui.notify([
|
|
1574
|
+
`Voice enabled (${mode}).`,
|
|
1575
|
+
"",
|
|
1576
|
+
" Hold SPACE (500ms) → release to transcribe",
|
|
1577
|
+
" Ctrl+Shift+V → toggle recording on/off",
|
|
1578
|
+
" Quick SPACE tap → types a space (no voice)",
|
|
1579
|
+
"",
|
|
1580
|
+
" Live transcription shown while speaking",
|
|
1581
|
+
].join("\n"), "info");
|
|
1519
1582
|
return;
|
|
1520
1583
|
}
|
|
1521
1584
|
|
|
@@ -1530,9 +1593,14 @@ export default function (pi: ExtensionAPI) {
|
|
|
1530
1593
|
|
|
1531
1594
|
if (sub === "stop") {
|
|
1532
1595
|
if (voiceState === "recording") {
|
|
1533
|
-
isHolding = false;
|
|
1534
1596
|
await stopVoiceRecording("editor");
|
|
1535
1597
|
cmdCtx.ui.notify("Recording stopped and transcribed.", "info");
|
|
1598
|
+
} else if (voiceState === "warmup") {
|
|
1599
|
+
clearHoldTimer();
|
|
1600
|
+
clearWarmupWidget();
|
|
1601
|
+
hideWidget();
|
|
1602
|
+
setVoiceState("idle");
|
|
1603
|
+
cmdCtx.ui.notify("Warmup cancelled.", "info");
|
|
1536
1604
|
} else {
|
|
1537
1605
|
cmdCtx.ui.notify("No recording in progress.", "info");
|
|
1538
1606
|
}
|
|
@@ -1540,7 +1608,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1540
1608
|
}
|
|
1541
1609
|
|
|
1542
1610
|
if (sub === "test") {
|
|
1543
|
-
cmdCtx.ui.notify("Testing voice setup
|
|
1611
|
+
cmdCtx.ui.notify("Testing voice setup…", "info");
|
|
1544
1612
|
const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
|
|
1545
1613
|
const dgKey = resolveDeepgramApiKey(config);
|
|
1546
1614
|
const streaming = isDeepgramStreaming(config);
|
|
@@ -1557,11 +1625,15 @@ export default function (pi: ExtensionAPI) {
|
|
|
1557
1625
|
` model status: ${modelReadiness}`,
|
|
1558
1626
|
` language: ${config.language}`,
|
|
1559
1627
|
` streaming: ${streaming ? "YES (Deepgram WS)" : "NO (batch)"}`,
|
|
1560
|
-
` DEEPGRAM_API_KEY: ${dgKey ? "set (" + dgKey.slice(0, 8) + "
|
|
1628
|
+
` DEEPGRAM_API_KEY: ${dgKey ? "set (" + dgKey.slice(0, 8) + "…)" : "NOT SET"}`,
|
|
1561
1629
|
` onboarding: ${config.onboarding.completed ? "complete" : "incomplete"}`,
|
|
1562
1630
|
` python3: ${diagnostics.hasPython ? "OK" : "missing"}`,
|
|
1563
1631
|
` sox/rec: ${diagnostics.hasSox ? "OK" : "missing"}`,
|
|
1564
1632
|
` daemon: ${daemonUp ? "running" : "not running"}`,
|
|
1633
|
+
` state: ${voiceState}`,
|
|
1634
|
+
` hold threshold: ${HOLD_THRESHOLD_MS}ms`,
|
|
1635
|
+
` release detect: ${RELEASE_DETECT_MS}ms`,
|
|
1636
|
+
` kitty protocol: ${kittyReleaseDetected ? "detected" : "not detected"}`,
|
|
1565
1637
|
];
|
|
1566
1638
|
|
|
1567
1639
|
if (diagnostics.hasSox) {
|
|
@@ -1615,14 +1687,15 @@ export default function (pi: ExtensionAPI) {
|
|
|
1615
1687
|
` setup: ${config.onboarding.completed ? `complete (${config.onboarding.source ?? "unknown"})` : "incomplete"}`,
|
|
1616
1688
|
` socket: ${activeSocketPath}`,
|
|
1617
1689
|
` daemon: ${daemonUp ? "running" : "stopped"}${daemonInfo}`,
|
|
1618
|
-
` hold-key: SPACE (
|
|
1690
|
+
` hold-key: SPACE (hold ≥${HOLD_THRESHOLD_MS}ms) or Ctrl+Shift+V (toggle)`,
|
|
1619
1691
|
` btw-key: Ctrl+Shift+B (hold to record → auto-btw)`,
|
|
1692
|
+
` kitty: ${kittyReleaseDetected ? "yes" : "no"}`,
|
|
1620
1693
|
].join("\n"), "info");
|
|
1621
1694
|
return;
|
|
1622
1695
|
}
|
|
1623
1696
|
|
|
1624
1697
|
if (sub === "daemon" || sub === "daemon start") {
|
|
1625
|
-
cmdCtx.ui.notify("Starting STT daemon
|
|
1698
|
+
cmdCtx.ui.notify("Starting STT daemon…", "info");
|
|
1626
1699
|
const ok = await ensureDaemon(config);
|
|
1627
1700
|
cmdCtx.ui.notify(ok ? "Daemon started." : "Failed to start daemon.", ok ? "info" : "error");
|
|
1628
1701
|
return;
|
|
@@ -1864,7 +1937,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1864
1937
|
return;
|
|
1865
1938
|
}
|
|
1866
1939
|
|
|
1867
|
-
cmdCtx.ui.notify("Summarizing BTW thread
|
|
1940
|
+
cmdCtx.ui.notify("Summarizing BTW thread…", "info");
|
|
1868
1941
|
|
|
1869
1942
|
try {
|
|
1870
1943
|
let summary = "";
|