@codexstar/pi-listen 1.0.12 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extensions/voice/config.ts +4 -0
- package/extensions/voice.ts +814 -141
- package/package.json +1 -1
|
@@ -31,6 +31,8 @@ export interface VoiceConfig {
|
|
|
31
31
|
scope: VoiceSettingsScope;
|
|
32
32
|
btwEnabled: boolean;
|
|
33
33
|
onboarding: VoiceOnboardingState;
|
|
34
|
+
/** Deepgram API key — stored in config so it's available even when env var isn't set */
|
|
35
|
+
deepgramApiKey?: string;
|
|
34
36
|
}
|
|
35
37
|
|
|
36
38
|
export interface LoadedVoiceConfig {
|
|
@@ -60,6 +62,7 @@ export const DEFAULT_CONFIG: VoiceConfig = {
|
|
|
60
62
|
model: "small",
|
|
61
63
|
scope: "global",
|
|
62
64
|
btwEnabled: true,
|
|
65
|
+
deepgramApiKey: undefined,
|
|
63
66
|
onboarding: {
|
|
64
67
|
completed: false,
|
|
65
68
|
schemaVersion: VOICE_CONFIG_VERSION,
|
|
@@ -121,6 +124,7 @@ function migrateConfig(rawVoice: any, source: VoiceConfigSource): VoiceConfig {
|
|
|
121
124
|
model: typeof rawVoice.model === "string" ? rawVoice.model : DEFAULT_CONFIG.model,
|
|
122
125
|
scope: (rawVoice.scope as VoiceSettingsScope | undefined) ?? (source === "project" ? "project" : "global"),
|
|
123
126
|
btwEnabled: typeof rawVoice.btwEnabled === "boolean" ? rawVoice.btwEnabled : DEFAULT_CONFIG.btwEnabled,
|
|
127
|
+
deepgramApiKey: typeof rawVoice.deepgramApiKey === "string" ? rawVoice.deepgramApiKey : undefined,
|
|
124
128
|
onboarding: normalizeOnboarding(rawVoice.onboarding, fallbackCompleted),
|
|
125
129
|
};
|
|
126
130
|
}
|
package/extensions/voice.ts
CHANGED
|
@@ -1,22 +1,27 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* pi-voice —
|
|
2
|
+
* pi-voice — Deepgram WebSocket streaming STT for Pi CLI.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* 1.
|
|
6
|
-
*
|
|
7
|
-
* 2.
|
|
8
|
-
* 3.
|
|
4
|
+
* Architecture (modeled after Claude Code's voice pipeline):
|
|
5
|
+
* 1. SoX `rec` captures mic audio as raw PCM (16kHz, mono, 16-bit)
|
|
6
|
+
* and pipes it to stdout (no file).
|
|
7
|
+
* 2. Raw PCM chunks are streamed over a WebSocket to Deepgram Nova 3.
|
|
8
|
+
* 3. Deepgram returns interim + final transcripts in real-time.
|
|
9
|
+
* 4. Interim transcripts update a live widget above the editor.
|
|
10
|
+
* 5. On key-release (or toggle stop), a CloseStream message is sent;
|
|
11
|
+
* final transcript is injected into the editor.
|
|
9
12
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
13
|
+
* Activation:
|
|
14
|
+
* - Hold SPACE (empty editor) → release to finalize
|
|
15
|
+
* - Ctrl+Shift+V → toggle start/stop (fallback for non-Kitty terminals)
|
|
16
|
+
* - Ctrl+Shift+B → hold to record → auto-send as /btw
|
|
12
17
|
*
|
|
13
|
-
* Config in ~/.pi/agent/settings.json
|
|
18
|
+
* Config in ~/.pi/agent/settings.json:
|
|
14
19
|
* {
|
|
15
20
|
* "voice": {
|
|
16
21
|
* "enabled": true,
|
|
17
22
|
* "language": "en",
|
|
18
|
-
* "backend": "
|
|
19
|
-
* "model": "
|
|
23
|
+
* "backend": "deepgram",
|
|
24
|
+
* "model": "nova-3"
|
|
20
25
|
* }
|
|
21
26
|
* }
|
|
22
27
|
*/
|
|
@@ -65,6 +70,14 @@ interface BtwExchange {
|
|
|
65
70
|
// ─── Constants ───────────────────────────────────────────────────────────────
|
|
66
71
|
|
|
67
72
|
const SAMPLE_RATE = 16000;
|
|
73
|
+
const CHANNELS = 1;
|
|
74
|
+
const ENCODING = "linear16";
|
|
75
|
+
const DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
76
|
+
const KEEPALIVE_INTERVAL_MS = 8000;
|
|
77
|
+
const FINALIZE_SAFETY_TIMEOUT_MS = 5000;
|
|
78
|
+
const FINALIZE_NO_DATA_TIMEOUT_MS = 1500;
|
|
79
|
+
const MAX_RECORDING_SECS = 120; // 2 minutes safety cap (streaming is efficient)
|
|
80
|
+
|
|
68
81
|
const EXT_DIR = path.dirname(new URL(import.meta.url).pathname);
|
|
69
82
|
const PROJECT_ROOT = path.join(EXT_DIR, "..");
|
|
70
83
|
const DAEMON_SCRIPT = path.join(PROJECT_ROOT, "daemon.py");
|
|
@@ -74,7 +87,7 @@ function commandExists(cmd: string): boolean {
|
|
|
74
87
|
return spawnSync("which", [cmd], { stdio: "pipe", timeout: 3000 }).status === 0;
|
|
75
88
|
}
|
|
76
89
|
|
|
77
|
-
// ─── Daemon Communication
|
|
90
|
+
// ─── Daemon Communication (kept for non-deepgram local backends) ─────────────
|
|
78
91
|
|
|
79
92
|
let activeSocketPath = getSocketPath({
|
|
80
93
|
scope: DEFAULT_CONFIG.scope,
|
|
@@ -135,8 +148,6 @@ async function isDaemonRunning(socketPath = activeSocketPath): Promise<boolean>
|
|
|
135
148
|
async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
|
|
136
149
|
if (await isDaemonRunning(activeSocketPath)) {
|
|
137
150
|
const status = await daemonSend({ cmd: "status" }, 3000, activeSocketPath);
|
|
138
|
-
// When backend is 'auto', accept any loaded backend — the daemon already
|
|
139
|
-
// resolved 'auto' to a concrete backend, so we don't need to reload.
|
|
140
151
|
if (config.backend === "auto" || (status.backend === config.backend && status.model === config.model)) return true;
|
|
141
152
|
const reloaded = await daemonSend({
|
|
142
153
|
cmd: "load",
|
|
@@ -175,7 +186,6 @@ async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
|
|
|
175
186
|
|
|
176
187
|
proc.on("error", () => resolve(false));
|
|
177
188
|
|
|
178
|
-
// Timeout: if daemon doesn't start in 10s, kill orphan and fall back
|
|
179
189
|
setTimeout(() => {
|
|
180
190
|
if (!started) {
|
|
181
191
|
try { proc.kill(); } catch {}
|
|
@@ -185,46 +195,40 @@ async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
|
|
|
185
195
|
});
|
|
186
196
|
}
|
|
187
197
|
|
|
188
|
-
// ───
|
|
198
|
+
// ─── Legacy file-based transcription (for non-deepgram backends) ─────────────
|
|
189
199
|
|
|
190
|
-
let
|
|
200
|
+
let legacyRecProcess: ChildProcess | null = null;
|
|
191
201
|
|
|
192
|
-
function
|
|
193
|
-
if (
|
|
194
|
-
|
|
195
|
-
|
|
202
|
+
function startLegacyRecordingToFile(outPath: string): boolean {
|
|
203
|
+
if (legacyRecProcess) {
|
|
204
|
+
legacyRecProcess.kill("SIGTERM");
|
|
205
|
+
legacyRecProcess = null;
|
|
196
206
|
}
|
|
197
|
-
|
|
198
207
|
if (!commandExists("rec")) return false;
|
|
199
|
-
|
|
200
|
-
recProcess = spawn("rec", [
|
|
208
|
+
legacyRecProcess = spawn("rec", [
|
|
201
209
|
"-q", "-r", String(SAMPLE_RATE), "-c", "1", "-b", "16", outPath,
|
|
202
210
|
], { stdio: ["pipe", "pipe", "pipe"] });
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
recProcess.on("error", () => { recProcess = null; });
|
|
211
|
+
legacyRecProcess.stderr?.on("data", () => {});
|
|
212
|
+
legacyRecProcess.on("error", () => { legacyRecProcess = null; });
|
|
206
213
|
return true;
|
|
207
214
|
}
|
|
208
215
|
|
|
209
|
-
function
|
|
216
|
+
function stopLegacyRecording(): Promise<void> {
|
|
210
217
|
return new Promise((resolve) => {
|
|
211
|
-
if (!
|
|
212
|
-
|
|
213
|
-
|
|
218
|
+
if (!legacyRecProcess) { resolve(); return; }
|
|
219
|
+
legacyRecProcess.on("close", () => { legacyRecProcess = null; resolve(); });
|
|
220
|
+
legacyRecProcess.kill("SIGTERM");
|
|
214
221
|
setTimeout(() => {
|
|
215
|
-
if (
|
|
222
|
+
if (legacyRecProcess) { legacyRecProcess.kill("SIGKILL"); legacyRecProcess = null; }
|
|
216
223
|
resolve();
|
|
217
224
|
}, 2000);
|
|
218
225
|
});
|
|
219
226
|
}
|
|
220
227
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
async function transcribeAudio(
|
|
228
|
+
async function transcribeAudioFile(
|
|
224
229
|
audioPath: string,
|
|
225
230
|
config: VoiceConfig,
|
|
226
231
|
): Promise<{ text: string; duration: number; error?: string }> {
|
|
227
|
-
// Try daemon first
|
|
228
232
|
if (await isDaemonRunning()) {
|
|
229
233
|
const resp = await daemonSend({
|
|
230
234
|
cmd: "transcribe",
|
|
@@ -238,13 +242,10 @@ async function transcribeAudio(
|
|
|
238
242
|
return resp as { text: string; duration: number };
|
|
239
243
|
}
|
|
240
244
|
}
|
|
241
|
-
|
|
242
|
-
// Fallback: direct subprocess
|
|
243
245
|
return new Promise((resolve) => {
|
|
244
246
|
const args = [TRANSCRIBE_SCRIPT, "--language", config.language, audioPath];
|
|
245
247
|
if (config.backend !== "auto") args.splice(1, 0, "--backend", config.backend);
|
|
246
248
|
if (config.model) args.splice(1, 0, "--model", config.model);
|
|
247
|
-
|
|
248
249
|
const proc = spawn("python3", args, { stdio: ["pipe", "pipe", "pipe"] });
|
|
249
250
|
let stdout = "";
|
|
250
251
|
let stderr = "";
|
|
@@ -258,6 +259,250 @@ async function transcribeAudio(
|
|
|
258
259
|
});
|
|
259
260
|
}
|
|
260
261
|
|
|
262
|
+
// ─── Deepgram WebSocket Streaming ────────────────────────────────────────────
|
|
263
|
+
|
|
264
|
+
interface StreamingSession {
|
|
265
|
+
ws: WebSocket;
|
|
266
|
+
recProcess: ChildProcess;
|
|
267
|
+
interimText: string; // Current interim (partial) transcript
|
|
268
|
+
finalizedParts: string[]; // All finalized transcript segments
|
|
269
|
+
keepAliveTimer: ReturnType<typeof setInterval> | null;
|
|
270
|
+
closed: boolean;
|
|
271
|
+
onTranscript: (interim: string, finals: string[]) => void;
|
|
272
|
+
onDone: (fullText: string) => void;
|
|
273
|
+
onError: (err: string) => void;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function getDeepgramApiKey(): string | null {
|
|
277
|
+
// Priority: env var → config file → null
|
|
278
|
+
return process.env.DEEPGRAM_API_KEY || null;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Resolve the Deepgram API key from all sources:
|
|
283
|
+
* 1. process.env.DEEPGRAM_API_KEY (shell)
|
|
284
|
+
* 2. config.deepgramApiKey (settings.json, persisted at setup time)
|
|
285
|
+
*/
|
|
286
|
+
function resolveDeepgramApiKey(config: VoiceConfig): string | null {
|
|
287
|
+
return process.env.DEEPGRAM_API_KEY || config.deepgramApiKey || null;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
function isDeepgramStreaming(config: VoiceConfig): boolean {
|
|
291
|
+
const key = resolveDeepgramApiKey(config);
|
|
292
|
+
if (!key) return false;
|
|
293
|
+
// Use streaming for deepgram backend, or auto mode when deepgram key is available
|
|
294
|
+
return config.backend === "deepgram" || (config.backend === "auto" && !!key);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
function buildDeepgramWsUrl(config: VoiceConfig): string {
|
|
298
|
+
const params = new URLSearchParams({
|
|
299
|
+
encoding: ENCODING,
|
|
300
|
+
sample_rate: String(SAMPLE_RATE),
|
|
301
|
+
channels: String(CHANNELS),
|
|
302
|
+
endpointing: "300", // ms of silence before phrase boundary
|
|
303
|
+
utterance_end_ms: "1000", // ms of silence before utterance is complete
|
|
304
|
+
language: config.language || "en",
|
|
305
|
+
model: config.model || "nova-3",
|
|
306
|
+
smart_format: "true",
|
|
307
|
+
interim_results: "true",
|
|
308
|
+
});
|
|
309
|
+
return `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
function startStreamingSession(
|
|
313
|
+
config: VoiceConfig,
|
|
314
|
+
callbacks: {
|
|
315
|
+
onTranscript: (interim: string, finals: string[]) => void;
|
|
316
|
+
onDone: (fullText: string) => void;
|
|
317
|
+
onError: (err: string) => void;
|
|
318
|
+
},
|
|
319
|
+
): StreamingSession | null {
|
|
320
|
+
const apiKey = resolveDeepgramApiKey(config);
|
|
321
|
+
if (!apiKey) {
|
|
322
|
+
callbacks.onError("DEEPGRAM_API_KEY not set");
|
|
323
|
+
return null;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
if (!commandExists("rec")) {
|
|
327
|
+
callbacks.onError("Voice requires SoX. Install: brew install sox");
|
|
328
|
+
return null;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Start SoX streaming raw PCM to stdout (no file)
|
|
332
|
+
const recProc = spawn("rec", [
|
|
333
|
+
"-q",
|
|
334
|
+
"-r", String(SAMPLE_RATE),
|
|
335
|
+
"-c", String(CHANNELS),
|
|
336
|
+
"-b", "16",
|
|
337
|
+
"-e", "signed-integer",
|
|
338
|
+
"-t", "raw",
|
|
339
|
+
"-", // output to stdout
|
|
340
|
+
], { stdio: ["pipe", "pipe", "pipe"] });
|
|
341
|
+
|
|
342
|
+
recProc.stderr?.on("data", () => {}); // suppress SoX warnings
|
|
343
|
+
|
|
344
|
+
// Connect WebSocket to Deepgram
|
|
345
|
+
const wsUrl = buildDeepgramWsUrl(config);
|
|
346
|
+
const ws = new WebSocket(wsUrl, {
|
|
347
|
+
headers: {
|
|
348
|
+
"Authorization": `Token ${apiKey}`,
|
|
349
|
+
},
|
|
350
|
+
} as any);
|
|
351
|
+
|
|
352
|
+
const session: StreamingSession = {
|
|
353
|
+
ws,
|
|
354
|
+
recProcess: recProc,
|
|
355
|
+
interimText: "",
|
|
356
|
+
finalizedParts: [],
|
|
357
|
+
keepAliveTimer: null,
|
|
358
|
+
closed: false,
|
|
359
|
+
onTranscript: callbacks.onTranscript,
|
|
360
|
+
onDone: callbacks.onDone,
|
|
361
|
+
onError: callbacks.onError,
|
|
362
|
+
};
|
|
363
|
+
|
|
364
|
+
ws.onopen = () => {
|
|
365
|
+
// Send initial KeepAlive
|
|
366
|
+
try { ws.send(JSON.stringify({ type: "KeepAlive" })); } catch {}
|
|
367
|
+
|
|
368
|
+
// Start keepalive timer
|
|
369
|
+
session.keepAliveTimer = setInterval(() => {
|
|
370
|
+
if (ws.readyState === WebSocket.OPEN) {
|
|
371
|
+
try { ws.send(JSON.stringify({ type: "KeepAlive" })); } catch {}
|
|
372
|
+
}
|
|
373
|
+
}, KEEPALIVE_INTERVAL_MS);
|
|
374
|
+
|
|
375
|
+
// Pipe SoX stdout → WebSocket as binary frames
|
|
376
|
+
recProc.stdout?.on("data", (chunk: Buffer) => {
|
|
377
|
+
if (ws.readyState === WebSocket.OPEN) {
|
|
378
|
+
try { ws.send(chunk); } catch {}
|
|
379
|
+
}
|
|
380
|
+
});
|
|
381
|
+
};
|
|
382
|
+
|
|
383
|
+
ws.onmessage = (event: MessageEvent) => {
|
|
384
|
+
try {
|
|
385
|
+
const msg = typeof event.data === "string" ? JSON.parse(event.data) : null;
|
|
386
|
+
if (!msg) return;
|
|
387
|
+
|
|
388
|
+
if (msg.type === "Results") {
|
|
389
|
+
const alt = msg.channel?.alternatives?.[0];
|
|
390
|
+
const transcript = alt?.transcript || "";
|
|
391
|
+
|
|
392
|
+
if (msg.is_final) {
|
|
393
|
+
// Final result for this audio segment
|
|
394
|
+
if (transcript.trim()) {
|
|
395
|
+
session.finalizedParts.push(transcript.trim());
|
|
396
|
+
}
|
|
397
|
+
session.interimText = "";
|
|
398
|
+
} else {
|
|
399
|
+
// Interim result — live update
|
|
400
|
+
session.interimText = transcript;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
session.onTranscript(session.interimText, session.finalizedParts);
|
|
404
|
+
|
|
405
|
+
// If speech_final is true, it's the end of an utterance
|
|
406
|
+
// (similar to TranscriptEndpoint in Claude Code's protocol)
|
|
407
|
+
if (msg.speech_final && transcript.trim()) {
|
|
408
|
+
// Already added to finalizedParts above when is_final was true
|
|
409
|
+
}
|
|
410
|
+
} else if (msg.type === "Metadata") {
|
|
411
|
+
// Connection metadata — ignore
|
|
412
|
+
} else if (msg.type === "UtteranceEnd") {
|
|
413
|
+
// Utterance boundary — Deepgram detected end of speech
|
|
414
|
+
// Nothing extra needed, is_final already handles finalization
|
|
415
|
+
} else if (msg.type === "Error" || msg.type === "error") {
|
|
416
|
+
session.onError(msg.message || msg.description || "Deepgram error");
|
|
417
|
+
}
|
|
418
|
+
} catch (e: any) {
|
|
419
|
+
// Ignore parse errors for binary data
|
|
420
|
+
}
|
|
421
|
+
};
|
|
422
|
+
|
|
423
|
+
ws.onerror = (event: Event) => {
|
|
424
|
+
if (!session.closed) {
|
|
425
|
+
session.onError("WebSocket connection error");
|
|
426
|
+
}
|
|
427
|
+
};
|
|
428
|
+
|
|
429
|
+
ws.onclose = () => {
|
|
430
|
+
if (!session.closed) {
|
|
431
|
+
finalizeSession(session);
|
|
432
|
+
}
|
|
433
|
+
};
|
|
434
|
+
|
|
435
|
+
recProc.on("error", (err) => {
|
|
436
|
+
session.onError(`SoX error: ${err.message}`);
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
recProc.on("close", () => {
|
|
440
|
+
// SoX stopped — send CloseStream to Deepgram
|
|
441
|
+
if (ws.readyState === WebSocket.OPEN) {
|
|
442
|
+
try { ws.send(JSON.stringify({ type: "CloseStream" })); } catch {}
|
|
443
|
+
}
|
|
444
|
+
});
|
|
445
|
+
|
|
446
|
+
return session;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
function stopStreamingSession(session: StreamingSession): void {
|
|
450
|
+
if (session.closed) return;
|
|
451
|
+
|
|
452
|
+
// Stop the microphone
|
|
453
|
+
try { session.recProcess.kill("SIGTERM"); } catch {}
|
|
454
|
+
|
|
455
|
+
// CloseStream tells Deepgram to flush remaining audio
|
|
456
|
+
if (session.ws.readyState === WebSocket.OPEN) {
|
|
457
|
+
try { session.ws.send(JSON.stringify({ type: "CloseStream" })); } catch {}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// Safety: finalize after timeout even if Deepgram doesn't respond
|
|
461
|
+
setTimeout(() => {
|
|
462
|
+
if (!session.closed) {
|
|
463
|
+
finalizeSession(session);
|
|
464
|
+
}
|
|
465
|
+
}, FINALIZE_SAFETY_TIMEOUT_MS);
|
|
466
|
+
|
|
467
|
+
// Shorter timeout: if no new data arrives for 1.5s, assume done
|
|
468
|
+
let lastDataTime = Date.now();
|
|
469
|
+
const origOnMessage = session.ws.onmessage;
|
|
470
|
+
session.ws.onmessage = (event: MessageEvent) => {
|
|
471
|
+
lastDataTime = Date.now();
|
|
472
|
+
if (origOnMessage) origOnMessage.call(session.ws, event);
|
|
473
|
+
};
|
|
474
|
+
|
|
475
|
+
const noDataCheck = setInterval(() => {
|
|
476
|
+
if (Date.now() - lastDataTime > FINALIZE_NO_DATA_TIMEOUT_MS) {
|
|
477
|
+
clearInterval(noDataCheck);
|
|
478
|
+
if (!session.closed) {
|
|
479
|
+
finalizeSession(session);
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
}, 500);
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
function finalizeSession(session: StreamingSession): void {
|
|
486
|
+
if (session.closed) return;
|
|
487
|
+
session.closed = true;
|
|
488
|
+
|
|
489
|
+
// Clean up keepalive
|
|
490
|
+
if (session.keepAliveTimer) {
|
|
491
|
+
clearInterval(session.keepAliveTimer);
|
|
492
|
+
session.keepAliveTimer = null;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// Close WebSocket
|
|
496
|
+
try { session.ws.close(); } catch {}
|
|
497
|
+
|
|
498
|
+
// Kill SoX if still running
|
|
499
|
+
try { session.recProcess.kill("SIGKILL"); } catch {}
|
|
500
|
+
|
|
501
|
+
// Deliver final transcript
|
|
502
|
+
const fullText = session.finalizedParts.join(" ").trim();
|
|
503
|
+
session.onDone(fullText);
|
|
504
|
+
}
|
|
505
|
+
|
|
261
506
|
// ─── Extension ───────────────────────────────────────────────────────────────
|
|
262
507
|
|
|
263
508
|
export default function (pi: ExtensionAPI) {
|
|
@@ -272,6 +517,10 @@ export default function (pi: ExtensionAPI) {
|
|
|
272
517
|
let terminalInputUnsub: (() => void) | null = null;
|
|
273
518
|
let isHolding = false;
|
|
274
519
|
|
|
520
|
+
// Streaming session state
|
|
521
|
+
let activeSession: StreamingSession | null = null;
|
|
522
|
+
let currentTarget: "editor" | "btw" = "editor";
|
|
523
|
+
|
|
275
524
|
// ─── BTW State ───────────────────────────────────────────────────────────
|
|
276
525
|
|
|
277
526
|
let btwThread: BtwExchange[] = [];
|
|
@@ -289,17 +538,19 @@ export default function (pi: ExtensionAPI) {
|
|
|
289
538
|
}
|
|
290
539
|
const modeTag = !config.onboarding.completed
|
|
291
540
|
? "SETUP"
|
|
292
|
-
: config
|
|
293
|
-
? "
|
|
294
|
-
: config.mode === "
|
|
295
|
-
? "
|
|
296
|
-
: "
|
|
541
|
+
: isDeepgramStreaming(config)
|
|
542
|
+
? "STREAM"
|
|
543
|
+
: config.mode === "api"
|
|
544
|
+
? "API"
|
|
545
|
+
: config.mode === "local"
|
|
546
|
+
? "LOCAL"
|
|
547
|
+
: "AUTO";
|
|
297
548
|
ctx.ui.setStatus("voice", `MIC ${modeTag}`);
|
|
298
549
|
break;
|
|
299
550
|
}
|
|
300
551
|
case "recording": {
|
|
301
552
|
const secs = Math.round((Date.now() - recordingStart) / 1000);
|
|
302
|
-
ctx.ui.setStatus("voice",
|
|
553
|
+
ctx.ui.setStatus("voice", `🔴 REC ${secs}s`);
|
|
303
554
|
break;
|
|
304
555
|
}
|
|
305
556
|
case "transcribing":
|
|
@@ -315,9 +566,17 @@ export default function (pi: ExtensionAPI) {
|
|
|
315
566
|
|
|
316
567
|
function voiceCleanup() {
|
|
317
568
|
if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
|
|
318
|
-
|
|
569
|
+
clearHoldTimer();
|
|
570
|
+
stopRecordingWidgetAnimation();
|
|
571
|
+
if (activeSession) {
|
|
572
|
+
finalizeSession(activeSession);
|
|
573
|
+
activeSession = null;
|
|
574
|
+
}
|
|
575
|
+
if (legacyRecProcess) { legacyRecProcess.kill("SIGTERM"); legacyRecProcess = null; }
|
|
319
576
|
if (tempFile) { try { fs.unlinkSync(tempFile); } catch {} tempFile = null; }
|
|
320
577
|
isHolding = false;
|
|
578
|
+
spaceConsumed = false;
|
|
579
|
+
spaceDownTime = null;
|
|
321
580
|
setVoiceState("idle");
|
|
322
581
|
}
|
|
323
582
|
|
|
@@ -332,7 +591,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
332
591
|
const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
|
|
333
592
|
const provisioningPlan = buildProvisioningPlan(nextConfig, diagnostics);
|
|
334
593
|
let validated = provisioningPlan.ready;
|
|
335
|
-
if (validated && nextConfig.enabled) {
|
|
594
|
+
if (validated && nextConfig.enabled && !isDeepgramStreaming(nextConfig)) {
|
|
336
595
|
validated = await ensureDaemon(nextConfig);
|
|
337
596
|
}
|
|
338
597
|
|
|
@@ -349,53 +608,337 @@ export default function (pi: ExtensionAPI) {
|
|
|
349
608
|
].join("\n"), validated ? "info" : "warning");
|
|
350
609
|
}
|
|
351
610
|
|
|
352
|
-
// ───
|
|
611
|
+
// ─── Live Transcript Widget (Component-based, themed) ───────────────────
|
|
353
612
|
|
|
354
|
-
|
|
613
|
+
/** Subtle hint shown during the hold threshold wait */
|
|
614
|
+
function showHoldHintWidget() {
|
|
615
|
+
if (!ctx?.hasUI) return;
|
|
616
|
+
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
617
|
+
return {
|
|
618
|
+
invalidate() {},
|
|
619
|
+
render(width: number): string[] {
|
|
620
|
+
const bar = theme.fg("muted", "─".repeat(Math.min(width - 2, 60)));
|
|
621
|
+
return [
|
|
622
|
+
bar,
|
|
623
|
+
theme.fg("dim", " Hold " + theme.bold("SPACE") + " for voice input..."),
|
|
624
|
+
bar,
|
|
625
|
+
];
|
|
626
|
+
},
|
|
627
|
+
};
|
|
628
|
+
}, { placement: "aboveEditor" });
|
|
629
|
+
}
|
|
355
630
|
|
|
356
|
-
|
|
357
|
-
if (
|
|
631
|
+
function hideHoldHintWidget() {
|
|
632
|
+
if (!ctx?.hasUI) return;
|
|
633
|
+
ctx.ui.setWidget("voice-recording", undefined);
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
/** Animated recording indicator with live waveform */
|
|
637
|
+
function showRecordingWidget(target: "editor" | "btw") {
|
|
638
|
+
if (!ctx?.hasUI) return;
|
|
639
|
+
let frame = 0;
|
|
640
|
+
const waveChars = ["▁", "▂", "▃", "▅", "▆", "▇", "▆", "▅", "▃", "▂"];
|
|
358
641
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
642
|
+
// Animate the widget every 200ms
|
|
643
|
+
const animTimer = setInterval(() => {
|
|
644
|
+
frame++;
|
|
645
|
+
if (ctx?.hasUI) ctx.ui.setWidget("voice-recording", undefined); // force re-render
|
|
646
|
+
showRecordingWidgetFrame(target, frame, waveChars);
|
|
647
|
+
}, 200);
|
|
648
|
+
|
|
649
|
+
// Store the timer so we can clean it up
|
|
650
|
+
(showRecordingWidget as any)._animTimer = animTimer;
|
|
651
|
+
|
|
652
|
+
showRecordingWidgetFrame(target, frame, waveChars);
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
function showRecordingWidgetFrame(target: "editor" | "btw", frame: number, waveChars: string[]) {
|
|
656
|
+
if (!ctx?.hasUI) return;
|
|
657
|
+
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
658
|
+
return {
|
|
659
|
+
invalidate() {},
|
|
660
|
+
render(width: number): string[] {
|
|
661
|
+
const maxW = Math.min(width - 2, 72);
|
|
662
|
+
const elapsed = Math.round((Date.now() - recordingStart) / 1000);
|
|
663
|
+
const mins = Math.floor(elapsed / 60);
|
|
664
|
+
const secs = elapsed % 60;
|
|
665
|
+
const timeStr = mins > 0 ? `${mins}:${String(secs).padStart(2, "0")}` : `${secs}s`;
|
|
666
|
+
|
|
667
|
+
// Animated waveform
|
|
668
|
+
const waveLen = 12;
|
|
669
|
+
let wave = "";
|
|
670
|
+
for (let i = 0; i < waveLen; i++) {
|
|
671
|
+
wave += waveChars[(frame + i) % waveChars.length];
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
const topBorder = theme.fg("borderAccent", "╭" + "─".repeat(maxW) + "╮");
|
|
675
|
+
const botBorder = theme.fg("borderAccent", "╰" + "─".repeat(maxW) + "╯");
|
|
676
|
+
const pad = (s: string, w: number) => {
|
|
677
|
+
const visible = s.replace(/\x1b\[[^m]*m/g, "").length;
|
|
678
|
+
return s + " ".repeat(Math.max(0, w - visible));
|
|
679
|
+
};
|
|
680
|
+
|
|
681
|
+
const dot = theme.fg("error", "●");
|
|
682
|
+
const label = target === "btw"
|
|
683
|
+
? theme.bold(theme.fg("accent", " BTW "))
|
|
684
|
+
: theme.bold(theme.fg("accent", " VOICE "));
|
|
685
|
+
const waveStyled = theme.fg("accent", wave);
|
|
686
|
+
const timeStyled = theme.fg("muted", timeStr);
|
|
687
|
+
|
|
688
|
+
const titleLine = ` ${dot} ${label} ${waveStyled} ${timeStyled}`;
|
|
689
|
+
|
|
690
|
+
const hint = target === "btw"
|
|
691
|
+
? theme.fg("dim", " Press Ctrl+Shift+B to stop")
|
|
692
|
+
: kittyReleaseDetected
|
|
693
|
+
? theme.fg("dim", " Release SPACE to finalize")
|
|
694
|
+
: theme.fg("dim", " Press SPACE again to stop");
|
|
695
|
+
|
|
696
|
+
const lines = [
|
|
697
|
+
topBorder,
|
|
698
|
+
theme.fg("borderAccent", "│") + pad(titleLine, maxW) + theme.fg("borderAccent", "│"),
|
|
699
|
+
theme.fg("borderAccent", "│") + pad(hint, maxW) + theme.fg("borderAccent", "│"),
|
|
700
|
+
botBorder,
|
|
701
|
+
];
|
|
702
|
+
return lines;
|
|
703
|
+
},
|
|
704
|
+
};
|
|
705
|
+
}, { placement: "aboveEditor" });
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
function stopRecordingWidgetAnimation() {
|
|
709
|
+
const timer = (showRecordingWidget as any)?._animTimer;
|
|
710
|
+
if (timer) {
|
|
711
|
+
clearInterval(timer);
|
|
712
|
+
(showRecordingWidget as any)._animTimer = null;
|
|
363
713
|
}
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
/** Show live transcript inside a themed box */
|
|
717
|
+
function updateLiveTranscriptWidget(interim: string, finals: string[]) {
|
|
718
|
+
if (!ctx?.hasUI) return;
|
|
719
|
+
|
|
720
|
+
const finalized = finals.join(" ");
|
|
721
|
+
const displayText = finalized + (interim ? (finalized ? " " : "") + interim : "");
|
|
722
|
+
|
|
723
|
+
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
724
|
+
return {
|
|
725
|
+
invalidate() {},
|
|
726
|
+
render(width: number): string[] {
|
|
727
|
+
const maxW = Math.min(width - 2, 72);
|
|
728
|
+
const elapsed = Math.round((Date.now() - recordingStart) / 1000);
|
|
729
|
+
const mins = Math.floor(elapsed / 60);
|
|
730
|
+
const secs = elapsed % 60;
|
|
731
|
+
const timeStr = mins > 0 ? `${mins}:${String(secs).padStart(2, "0")}` : `${secs}s`;
|
|
732
|
+
|
|
733
|
+
const topBorder = theme.fg("borderAccent", "╭" + "─".repeat(maxW) + "╮");
|
|
734
|
+
const botBorder = theme.fg("borderAccent", "╰" + "─".repeat(maxW) + "╯");
|
|
735
|
+
const sep = theme.fg("borderAccent", "│") + theme.fg("borderAccent", "─".repeat(maxW)) + theme.fg("borderAccent", "│");
|
|
736
|
+
const side = (content: string) => {
|
|
737
|
+
const stripped = content.replace(/\x1b\[[^m]*m/g, "");
|
|
738
|
+
const padding = Math.max(0, maxW - stripped.length);
|
|
739
|
+
return theme.fg("borderAccent", "│") + content + " ".repeat(padding) + theme.fg("borderAccent", "│");
|
|
740
|
+
};
|
|
741
|
+
|
|
742
|
+
const dot = theme.fg("error", "●");
|
|
743
|
+
const label = theme.bold(theme.fg("accent", " VOICE "));
|
|
744
|
+
const timeStyled = theme.fg("muted", timeStr);
|
|
745
|
+
const titleLine = ` ${dot} ${label} ${timeStyled}`;
|
|
746
|
+
const hint = kittyReleaseDetected
|
|
747
|
+
? theme.fg("dim", " Release SPACE to finalize")
|
|
748
|
+
: theme.fg("dim", " Press SPACE again to stop");
|
|
749
|
+
|
|
750
|
+
const lines = [topBorder, side(titleLine)];
|
|
751
|
+
|
|
752
|
+
if (!displayText.trim()) {
|
|
753
|
+
lines.push(side(theme.fg("dim", " Listening... speak now")));
|
|
754
|
+
} else {
|
|
755
|
+
lines.push(sep);
|
|
756
|
+
// Word-wrap the transcript text
|
|
757
|
+
const innerMax = maxW - 4; // padding inside box
|
|
758
|
+
const words = displayText.split(" ");
|
|
759
|
+
const wrappedLines: string[] = [];
|
|
760
|
+
let currentLine = "";
|
|
761
|
+
|
|
762
|
+
for (const word of words) {
|
|
763
|
+
if ((currentLine + " " + word).trim().length > innerMax && currentLine) {
|
|
764
|
+
wrappedLines.push(currentLine);
|
|
765
|
+
currentLine = word;
|
|
766
|
+
} else {
|
|
767
|
+
currentLine = currentLine ? currentLine + " " + word : word;
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
if (currentLine) wrappedLines.push(currentLine);
|
|
771
|
+
|
|
772
|
+
// Show last 3 lines of transcript
|
|
773
|
+
const visible = wrappedLines.slice(-3);
|
|
774
|
+
for (let i = 0; i < visible.length; i++) {
|
|
775
|
+
let line = visible[i];
|
|
776
|
+
// Style: finalized parts in normal text, interim in accent
|
|
777
|
+
if (i === visible.length - 1 && interim) {
|
|
778
|
+
line = theme.fg("text", line) + theme.fg("accent", "▍");
|
|
779
|
+
} else {
|
|
780
|
+
line = theme.fg("text", line);
|
|
781
|
+
}
|
|
782
|
+
lines.push(side(" " + line));
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
lines.push(side(hint));
|
|
787
|
+
lines.push(botBorder);
|
|
788
|
+
return lines;
|
|
789
|
+
},
|
|
790
|
+
};
|
|
791
|
+
}, { placement: "aboveEditor" });
|
|
792
|
+
}
|
|
364
793
|
|
|
794
|
+
/** Transcribing state — show a processing indicator */
|
|
795
|
+
function showTranscribingWidget() {
|
|
796
|
+
if (!ctx?.hasUI) return;
|
|
797
|
+
ctx.ui.setWidget("voice-recording", (tui, theme) => {
|
|
798
|
+
return {
|
|
799
|
+
invalidate() {},
|
|
800
|
+
render(width: number): string[] {
|
|
801
|
+
const maxW = Math.min(width - 2, 72);
|
|
802
|
+
const topBorder = theme.fg("border", "╭" + "─".repeat(maxW) + "╮");
|
|
803
|
+
const botBorder = theme.fg("border", "╰" + "─".repeat(maxW) + "╯");
|
|
804
|
+
const side = (content: string) => {
|
|
805
|
+
const stripped = content.replace(/\x1b\[[^m]*m/g, "");
|
|
806
|
+
const padding = Math.max(0, maxW - stripped.length);
|
|
807
|
+
return theme.fg("border", "│") + content + " ".repeat(padding) + theme.fg("border", "│");
|
|
808
|
+
};
|
|
809
|
+
const spinner = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
|
810
|
+
const idx = Math.floor(Date.now() / 100) % spinner.length;
|
|
811
|
+
const line = ` ${theme.fg("accent", spinner[idx])} ${theme.fg("dim", "Finalizing transcription...")}`;
|
|
812
|
+
return [topBorder, side(line), botBorder];
|
|
813
|
+
},
|
|
814
|
+
};
|
|
815
|
+
}, { placement: "aboveEditor" });
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
// ─── Voice: Start / Stop (Streaming or Legacy) ───────────────────────────
|
|
819
|
+
|
|
820
|
+
async function startVoiceRecording(target: "editor" | "btw" = "editor"): Promise<boolean> {
|
|
821
|
+
if (voiceState !== "idle" || !ctx) return false;
|
|
822
|
+
|
|
823
|
+
currentTarget = target;
|
|
365
824
|
recordingStart = Date.now();
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
825
|
+
|
|
826
|
+
if (isDeepgramStreaming(config)) {
|
|
827
|
+
// === STREAMING PATH === (Deepgram WebSocket)
|
|
828
|
+
setVoiceState("recording");
|
|
829
|
+
|
|
830
|
+
const session = startStreamingSession(config, {
|
|
831
|
+
onTranscript: (interim, finals) => {
|
|
832
|
+
updateLiveTranscriptWidget(interim, finals);
|
|
833
|
+
updateVoiceStatus();
|
|
834
|
+
},
|
|
835
|
+
onDone: (fullText) => {
|
|
836
|
+
activeSession = null;
|
|
837
|
+
stopRecordingWidgetAnimation();
|
|
838
|
+
ctx?.ui.setWidget("voice-recording", undefined);
|
|
839
|
+
|
|
840
|
+
if (!fullText.trim()) {
|
|
841
|
+
ctx?.ui.notify("No speech detected.", "warning");
|
|
842
|
+
setVoiceState("idle");
|
|
843
|
+
return;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
if (target === "btw") {
|
|
847
|
+
handleBtw(fullText);
|
|
848
|
+
} else {
|
|
849
|
+
if (ctx?.hasUI) {
|
|
850
|
+
const existing = ctx.ui.getEditorText();
|
|
851
|
+
ctx.ui.setEditorText(existing ? existing + " " + fullText : fullText);
|
|
852
|
+
const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
|
|
853
|
+
ctx.ui.notify(
|
|
854
|
+
`STT (${elapsed}s): ${fullText.slice(0, 80)}${fullText.length > 80 ? "..." : ""}`,
|
|
855
|
+
"info",
|
|
856
|
+
);
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
setVoiceState("idle");
|
|
860
|
+
},
|
|
861
|
+
onError: (err) => {
|
|
862
|
+
activeSession = null;
|
|
863
|
+
stopRecordingWidgetAnimation();
|
|
864
|
+
ctx?.ui.setWidget("voice-recording", undefined);
|
|
865
|
+
ctx?.ui.notify(`STT error: ${err}`, "error");
|
|
866
|
+
setVoiceState("idle");
|
|
867
|
+
},
|
|
868
|
+
});
|
|
869
|
+
|
|
870
|
+
if (!session) {
|
|
871
|
+
setVoiceState("idle");
|
|
872
|
+
return false;
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
activeSession = session;
|
|
876
|
+
|
|
877
|
+
// Status timer for elapsed time
|
|
878
|
+
statusTimer = setInterval(() => {
|
|
879
|
+
if (voiceState === "recording") {
|
|
880
|
+
updateVoiceStatus();
|
|
881
|
+
const elapsed = (Date.now() - recordingStart) / 1000;
|
|
882
|
+
if (elapsed >= MAX_RECORDING_SECS) {
|
|
883
|
+
isHolding = false;
|
|
884
|
+
stopVoiceRecording(target);
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
}, 1000);
|
|
888
|
+
|
|
889
|
+
// Show the themed recording widget
|
|
890
|
+
showRecordingWidget(target);
|
|
891
|
+
return true;
|
|
892
|
+
|
|
893
|
+
} else {
|
|
894
|
+
// === LEGACY PATH === (file-based for local backends)
|
|
895
|
+
tempFile = path.join(os.tmpdir(), `pi-voice-${Date.now()}.wav`);
|
|
896
|
+
if (!startLegacyRecordingToFile(tempFile)) {
|
|
897
|
+
ctx.ui.notify("Voice requires SoX. Install: brew install sox", "error");
|
|
898
|
+
return false;
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
setVoiceState("recording");
|
|
902
|
+
statusTimer = setInterval(() => {
|
|
903
|
+
if (voiceState === "recording") {
|
|
904
|
+
updateVoiceStatus();
|
|
905
|
+
const elapsed = (Date.now() - recordingStart) / 1000;
|
|
906
|
+
if (elapsed >= MAX_RECORDING_SECS) {
|
|
907
|
+
isHolding = false;
|
|
908
|
+
stopVoiceRecording(target);
|
|
909
|
+
}
|
|
375
910
|
}
|
|
911
|
+
}, 1000);
|
|
912
|
+
|
|
913
|
+
if (ctx.hasUI) {
|
|
914
|
+
// Show themed recording widget for legacy path
|
|
915
|
+
showRecordingWidget(target);
|
|
376
916
|
}
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
if (ctx.hasUI) {
|
|
380
|
-
ctx.ui.setWidget("voice-recording", [
|
|
381
|
-
target === "btw"
|
|
382
|
-
? " 🎙 BTW Recording... Ctrl+Shift+V to stop"
|
|
383
|
-
: " 🎙 Recording... Ctrl+Shift+V to stop (or release SPACE)",
|
|
384
|
-
], { placement: "aboveEditor" });
|
|
917
|
+
return true;
|
|
385
918
|
}
|
|
386
|
-
return true;
|
|
387
919
|
}
|
|
388
920
|
|
|
389
921
|
async function stopVoiceRecording(target: "editor" | "btw" = "editor") {
|
|
390
922
|
if (voiceState !== "recording" || !ctx) return;
|
|
391
923
|
if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
|
|
392
924
|
|
|
925
|
+
if (activeSession) {
|
|
926
|
+
// === STREAMING PATH === Stop the stream, finalize will call onDone
|
|
927
|
+
setVoiceState("transcribing");
|
|
928
|
+
stopRecordingWidgetAnimation();
|
|
929
|
+
showTranscribingWidget();
|
|
930
|
+
stopStreamingSession(activeSession);
|
|
931
|
+
return;
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
// === LEGACY PATH ===
|
|
393
935
|
const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
|
|
394
|
-
const audioFile = tempFile;
|
|
936
|
+
const audioFile = tempFile;
|
|
395
937
|
setVoiceState("transcribing");
|
|
396
|
-
|
|
938
|
+
stopRecordingWidgetAnimation();
|
|
939
|
+
showTranscribingWidget();
|
|
397
940
|
|
|
398
|
-
await
|
|
941
|
+
await stopLegacyRecording();
|
|
399
942
|
|
|
400
943
|
if (!audioFile || !fs.existsSync(audioFile)) {
|
|
401
944
|
ctx.ui.notify("No audio recorded.", "warning");
|
|
@@ -412,12 +955,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
412
955
|
return;
|
|
413
956
|
}
|
|
414
957
|
|
|
415
|
-
// Ensure daemon is up before transcribing — await so the warm path
|
|
416
|
-
// is available for this request instead of falling through to the
|
|
417
|
-
// cold subprocess fallback.
|
|
418
958
|
await ensureDaemon(config).catch(() => {});
|
|
419
959
|
|
|
420
|
-
const result = await
|
|
960
|
+
const result = await transcribeAudioFile(audioFile, config);
|
|
421
961
|
try { fs.unlinkSync(audioFile); } catch {}
|
|
422
962
|
if (tempFile === audioFile) tempFile = null;
|
|
423
963
|
|
|
@@ -437,7 +977,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
437
977
|
if (target === "btw") {
|
|
438
978
|
await handleBtw(transcript);
|
|
439
979
|
} else {
|
|
440
|
-
// Inject into editor
|
|
441
980
|
if (ctx.hasUI) {
|
|
442
981
|
const existing = ctx.ui.getEditorText();
|
|
443
982
|
ctx.ui.setEditorText(existing ? existing + " " + transcript : transcript);
|
|
@@ -451,54 +990,163 @@ export default function (pi: ExtensionAPI) {
|
|
|
451
990
|
setVoiceState("idle");
|
|
452
991
|
}
|
|
453
992
|
|
|
454
|
-
// ─── Hold-to-talk
|
|
993
|
+
// ─── Hold-to-talk with Duration Threshold ──────────────────────────────
|
|
994
|
+
//
|
|
995
|
+
// SPACE activates voice ONLY when:
|
|
996
|
+
// 1. The editor is empty (no text typed yet)
|
|
997
|
+
// 2. SPACE is held for ≥ HOLD_THRESHOLD_MS (500ms)
|
|
998
|
+
//
|
|
999
|
+
// If SPACE is released before the threshold, a regular space character
|
|
1000
|
+
// is typed into the editor (normal typing behavior).
|
|
1001
|
+
//
|
|
1002
|
+
// This prevents accidental voice activation when typing and matches
|
|
1003
|
+
// Claude Code's hold-to-talk UX pattern.
|
|
1004
|
+
//
|
|
1005
|
+
// For Kitty protocol terminals: hold → wait threshold → activate →
|
|
1006
|
+
// release → stop recording. True hold-to-talk.
|
|
1007
|
+
// For non-Kitty terminals: hold → wait threshold → activate →
|
|
1008
|
+
// press SPACE again → stop recording. Toggle after activation.
|
|
1009
|
+
|
|
1010
|
+
const HOLD_THRESHOLD_MS = 500; // minimum hold time before voice activates
|
|
1011
|
+
let kittyReleaseDetected = false;
|
|
1012
|
+
let spaceDownTime: number | null = null; // timestamp when SPACE was first pressed
|
|
1013
|
+
let holdActivationTimer: ReturnType<typeof setTimeout> | null = null;
|
|
1014
|
+
let spaceConsumed = false; // whether we've committed to voice (past threshold)
|
|
1015
|
+
|
|
1016
|
+
function clearHoldTimer() {
|
|
1017
|
+
if (holdActivationTimer) {
|
|
1018
|
+
clearTimeout(holdActivationTimer);
|
|
1019
|
+
holdActivationTimer = null;
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
455
1022
|
|
|
456
1023
|
function setupHoldToTalk() {
|
|
457
1024
|
if (!ctx?.hasUI) return;
|
|
458
1025
|
|
|
459
|
-
// Remove previous listener
|
|
460
1026
|
if (terminalInputUnsub) { terminalInputUnsub(); terminalInputUnsub = null; }
|
|
461
1027
|
|
|
462
1028
|
terminalInputUnsub = ctx.ui.onTerminalInput((data: string) => {
|
|
463
1029
|
if (!config.enabled) return undefined;
|
|
464
1030
|
|
|
465
|
-
//
|
|
1031
|
+
// ── SPACE handling ──
|
|
466
1032
|
if (matchesKey(data, "space")) {
|
|
467
|
-
//
|
|
1033
|
+
// RULE: If editor has content, SPACE always types a space — never voice
|
|
468
1034
|
const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
|
|
469
|
-
if (editorText && editorText.trim().length > 0)
|
|
1035
|
+
if (editorText && editorText.trim().length > 0) {
|
|
1036
|
+
clearHoldTimer();
|
|
1037
|
+
spaceDownTime = null;
|
|
1038
|
+
spaceConsumed = false;
|
|
1039
|
+
return undefined; // let the default space character through
|
|
1040
|
+
}
|
|
470
1041
|
|
|
1042
|
+
// ── Kitty key-release ──
|
|
471
1043
|
if (isKeyRelease(data)) {
|
|
472
|
-
|
|
1044
|
+
kittyReleaseDetected = true;
|
|
1045
|
+
|
|
1046
|
+
// Released before threshold → type a space character
|
|
1047
|
+
if (spaceDownTime && !spaceConsumed) {
|
|
1048
|
+
clearHoldTimer();
|
|
1049
|
+
spaceDownTime = null;
|
|
1050
|
+
spaceConsumed = false;
|
|
1051
|
+
// Insert a space into editor
|
|
1052
|
+
if (ctx?.hasUI) ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1053
|
+
return { consume: true };
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
// Released after threshold → stop recording (true hold-to-talk)
|
|
1057
|
+
if (spaceConsumed && isHolding && voiceState === "recording") {
|
|
473
1058
|
isHolding = false;
|
|
1059
|
+
spaceConsumed = false;
|
|
1060
|
+
spaceDownTime = null;
|
|
474
1061
|
stopVoiceRecording("editor");
|
|
475
1062
|
return { consume: true };
|
|
476
1063
|
}
|
|
1064
|
+
|
|
1065
|
+
spaceDownTime = null;
|
|
1066
|
+
spaceConsumed = false;
|
|
477
1067
|
return undefined;
|
|
478
1068
|
}
|
|
479
1069
|
|
|
1070
|
+
// ── Kitty key-repeat: suppress while holding past threshold ──
|
|
480
1071
|
if (isKeyRepeat(data)) {
|
|
481
|
-
if (isHolding) return { consume: true };
|
|
1072
|
+
if (spaceConsumed || isHolding) return { consume: true };
|
|
482
1073
|
return undefined;
|
|
483
1074
|
}
|
|
484
1075
|
|
|
485
|
-
// Key
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
1076
|
+
// === Key PRESS ===
|
|
1077
|
+
|
|
1078
|
+
// If already recording (toggle mode for non-Kitty) → stop
|
|
1079
|
+
if (voiceState === "recording" && spaceConsumed) {
|
|
1080
|
+
isHolding = false;
|
|
1081
|
+
spaceConsumed = false;
|
|
1082
|
+
spaceDownTime = null;
|
|
1083
|
+
clearHoldTimer();
|
|
1084
|
+
stopVoiceRecording("editor");
|
|
491
1085
|
return { consume: true };
|
|
492
1086
|
}
|
|
493
1087
|
|
|
494
|
-
|
|
1088
|
+
// If transcribing → ignore
|
|
1089
|
+
if (voiceState === "transcribing") {
|
|
1090
|
+
return { consume: true };
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
// Idle → start the hold timer
|
|
1094
|
+
if (voiceState === "idle" && !spaceDownTime) {
|
|
1095
|
+
spaceDownTime = Date.now();
|
|
1096
|
+
spaceConsumed = false;
|
|
1097
|
+
|
|
1098
|
+
// Show a subtle "preparing" indicator
|
|
1099
|
+
if (ctx?.hasUI) {
|
|
1100
|
+
showHoldHintWidget();
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
// After threshold: activate voice recording
|
|
1104
|
+
holdActivationTimer = setTimeout(() => {
|
|
1105
|
+
holdActivationTimer = null;
|
|
1106
|
+
// Double-check: still idle, still holding, editor still empty
|
|
1107
|
+
const currentText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
|
|
1108
|
+
if (voiceState === "idle" && spaceDownTime && !(currentText && currentText.trim().length > 0)) {
|
|
1109
|
+
spaceConsumed = true;
|
|
1110
|
+
isHolding = true;
|
|
1111
|
+
startVoiceRecording("editor").then((ok) => {
|
|
1112
|
+
if (!ok) {
|
|
1113
|
+
isHolding = false;
|
|
1114
|
+
spaceConsumed = false;
|
|
1115
|
+
spaceDownTime = null;
|
|
1116
|
+
}
|
|
1117
|
+
});
|
|
1118
|
+
} else {
|
|
1119
|
+
spaceDownTime = null;
|
|
1120
|
+
spaceConsumed = false;
|
|
1121
|
+
}
|
|
1122
|
+
}, HOLD_THRESHOLD_MS);
|
|
1123
|
+
|
|
1124
|
+
return { consume: true }; // consume now — we'll insert space on early release
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
if (isHolding || spaceConsumed) return { consume: true };
|
|
1128
|
+
return undefined;
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1131
|
+
// ── Any other key while holding space (pre-threshold) → cancel hold, insert space ──
|
|
1132
|
+
if (spaceDownTime && !spaceConsumed && !matchesKey(data, "space")) {
|
|
1133
|
+
clearHoldTimer();
|
|
1134
|
+
// Insert the space that was consumed during hold detection
|
|
1135
|
+
if (ctx?.hasUI) {
|
|
1136
|
+
ctx.ui.setEditorText((ctx.ui.getEditorText() || "") + " ");
|
|
1137
|
+
hideHoldHintWidget();
|
|
1138
|
+
}
|
|
1139
|
+
spaceDownTime = null;
|
|
1140
|
+
spaceConsumed = false;
|
|
1141
|
+
// Don't consume this key — let it through
|
|
495
1142
|
return undefined;
|
|
496
1143
|
}
|
|
497
1144
|
|
|
498
|
-
//
|
|
1145
|
+
// ── Ctrl+Shift+B handling (BTW voice) — direct toggle, no hold threshold ──
|
|
499
1146
|
if (matchesKey(data, "ctrl+shift+b")) {
|
|
500
1147
|
if (isKeyRelease(data)) {
|
|
501
|
-
|
|
1148
|
+
kittyReleaseDetected = true;
|
|
1149
|
+
if (isHolding && voiceState === "recording") {
|
|
502
1150
|
isHolding = false;
|
|
503
1151
|
stopVoiceRecording("btw");
|
|
504
1152
|
return { consume: true };
|
|
@@ -511,6 +1159,13 @@ export default function (pi: ExtensionAPI) {
|
|
|
511
1159
|
return undefined;
|
|
512
1160
|
}
|
|
513
1161
|
|
|
1162
|
+
// Toggle: stop if recording
|
|
1163
|
+
if (voiceState === "recording") {
|
|
1164
|
+
isHolding = false;
|
|
1165
|
+
stopVoiceRecording("btw");
|
|
1166
|
+
return { consume: true };
|
|
1167
|
+
}
|
|
1168
|
+
|
|
514
1169
|
if (voiceState === "idle" && !isHolding) {
|
|
515
1170
|
isHolding = true;
|
|
516
1171
|
startVoiceRecording("btw").then((ok) => {
|
|
@@ -523,12 +1178,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
523
1178
|
return undefined;
|
|
524
1179
|
}
|
|
525
1180
|
|
|
526
|
-
// Any other key while holding = cancel
|
|
527
|
-
if (isHolding && voiceState === "recording") {
|
|
528
|
-
// Don't cancel on modifier-only events
|
|
529
|
-
return undefined;
|
|
530
|
-
}
|
|
531
|
-
|
|
532
1181
|
return undefined;
|
|
533
1182
|
});
|
|
534
1183
|
}
|
|
@@ -536,7 +1185,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
536
1185
|
// ─── BTW: Side Conversations ─────────────────────────────────────────────
|
|
537
1186
|
|
|
538
1187
|
function buildBtwContext(): string {
|
|
539
|
-
// Build context from main session + btw thread
|
|
540
1188
|
const systemPrompt = ctx?.getSystemPrompt() ?? "";
|
|
541
1189
|
let btwContext = "You are a helpful side-channel assistant. ";
|
|
542
1190
|
btwContext += "The user is having a parallel conversation while their main Pi agent works. ";
|
|
@@ -570,7 +1218,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
570
1218
|
"",
|
|
571
1219
|
];
|
|
572
1220
|
|
|
573
|
-
// Show last exchange
|
|
574
1221
|
lines.push(` Q: ${last.question.slice(0, 100)}${last.question.length > 100 ? "..." : ""}`);
|
|
575
1222
|
const answerLines = last.answer.split("\n");
|
|
576
1223
|
for (const line of answerLines.slice(0, 8)) {
|
|
@@ -589,7 +1236,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
589
1236
|
|
|
590
1237
|
btwWidgetVisible = true;
|
|
591
1238
|
|
|
592
|
-
// Show thinking state
|
|
593
1239
|
ctx.ui.setWidget("btw", [
|
|
594
1240
|
" BTW",
|
|
595
1241
|
"",
|
|
@@ -598,10 +1244,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
598
1244
|
" Thinking...",
|
|
599
1245
|
], { placement: "aboveEditor" });
|
|
600
1246
|
|
|
601
|
-
// Build context for LLM
|
|
602
1247
|
const btwContext = buildBtwContext();
|
|
603
1248
|
|
|
604
|
-
// Use the model registry to get current model
|
|
605
1249
|
const model = ctx.model;
|
|
606
1250
|
if (!model) {
|
|
607
1251
|
const exchange: BtwExchange = {
|
|
@@ -616,7 +1260,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
616
1260
|
}
|
|
617
1261
|
|
|
618
1262
|
try {
|
|
619
|
-
// Stream the response
|
|
620
1263
|
let answer = "";
|
|
621
1264
|
const eventStream = streamSimple(model, {
|
|
622
1265
|
systemPrompt: btwContext,
|
|
@@ -633,7 +1276,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
633
1276
|
break;
|
|
634
1277
|
}
|
|
635
1278
|
|
|
636
|
-
// Update widget with streaming response
|
|
637
1279
|
const displayLines: string[] = [
|
|
638
1280
|
` BTW`,
|
|
639
1281
|
"",
|
|
@@ -657,7 +1299,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
657
1299
|
pi.appendEntry("btw", exchange);
|
|
658
1300
|
updateBtwWidget();
|
|
659
1301
|
} catch (err: any) {
|
|
660
|
-
// Fallback: send as a follow-up message to the main agent
|
|
661
1302
|
const exchange: BtwExchange = {
|
|
662
1303
|
question: message,
|
|
663
1304
|
answer: `(BTW streaming failed: ${err.message}. Falling back to sendUserMessage.)`,
|
|
@@ -667,7 +1308,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
667
1308
|
pi.appendEntry("btw", exchange);
|
|
668
1309
|
updateBtwWidget();
|
|
669
1310
|
|
|
670
|
-
// Use sendUserMessage as alternative
|
|
671
1311
|
pi.sendUserMessage(
|
|
672
1312
|
`[BTW question]: ${message}`,
|
|
673
1313
|
{ deliverAs: "followUp" },
|
|
@@ -677,7 +1317,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
677
1317
|
|
|
678
1318
|
// ─── Shortcuts ───────────────────────────────────────────────────────────
|
|
679
1319
|
|
|
680
|
-
// Ctrl+Shift+V = toggle voice (fallback for non-Kitty terminals)
|
|
681
1320
|
pi.registerShortcut("ctrl+shift+v", {
|
|
682
1321
|
description: "Toggle voice recording (start/stop)",
|
|
683
1322
|
handler: async (handlerCtx) => {
|
|
@@ -705,12 +1344,42 @@ export default function (pi: ExtensionAPI) {
|
|
|
705
1344
|
configSource = loaded.source;
|
|
706
1345
|
updateSocketPath(config, currentCwd);
|
|
707
1346
|
|
|
708
|
-
//
|
|
709
|
-
//
|
|
1347
|
+
// Auto-capture DEEPGRAM_API_KEY from env into config if not already stored.
|
|
1348
|
+
// This ensures streaming works even when Pi is launched from a context
|
|
1349
|
+
// that doesn't source .zshrc (GUI app, tmux, etc.)
|
|
1350
|
+
if (process.env.DEEPGRAM_API_KEY && !config.deepgramApiKey) {
|
|
1351
|
+
config.deepgramApiKey = process.env.DEEPGRAM_API_KEY;
|
|
1352
|
+
if (configSource !== "default") {
|
|
1353
|
+
saveConfig(config, config.scope, currentCwd);
|
|
1354
|
+
}
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
// Also try to load DEEPGRAM_API_KEY from shell if not in process.env and not in config
|
|
1358
|
+
if (!resolveDeepgramApiKey(config) && config.backend === "deepgram") {
|
|
1359
|
+
try {
|
|
1360
|
+
const result = spawnSync("zsh", ["-ic", "echo $DEEPGRAM_API_KEY"], {
|
|
1361
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
1362
|
+
timeout: 3000,
|
|
1363
|
+
env: { ...process.env, HOME: os.homedir() },
|
|
1364
|
+
});
|
|
1365
|
+
const shellKey = result.stdout?.toString().trim();
|
|
1366
|
+
if (shellKey && shellKey.length > 5) {
|
|
1367
|
+
config.deepgramApiKey = shellKey;
|
|
1368
|
+
process.env.DEEPGRAM_API_KEY = shellKey; // Also set for child processes
|
|
1369
|
+
if (configSource !== "default") {
|
|
1370
|
+
saveConfig(config, config.scope, currentCwd);
|
|
1371
|
+
}
|
|
1372
|
+
}
|
|
1373
|
+
} catch {}
|
|
1374
|
+
}
|
|
1375
|
+
|
|
710
1376
|
if (config.enabled && config.onboarding.completed) {
|
|
711
1377
|
updateVoiceStatus();
|
|
712
1378
|
setupHoldToTalk();
|
|
713
|
-
|
|
1379
|
+
// Only start daemon for non-streaming backends
|
|
1380
|
+
if (!isDeepgramStreaming(config)) {
|
|
1381
|
+
ensureDaemon(config).catch(() => {});
|
|
1382
|
+
}
|
|
714
1383
|
}
|
|
715
1384
|
});
|
|
716
1385
|
|
|
@@ -764,8 +1433,11 @@ export default function (pi: ExtensionAPI) {
|
|
|
764
1433
|
config.enabled = true;
|
|
765
1434
|
updateVoiceStatus();
|
|
766
1435
|
setupHoldToTalk();
|
|
767
|
-
|
|
768
|
-
|
|
1436
|
+
if (!isDeepgramStreaming(config)) {
|
|
1437
|
+
ensureDaemon(config).catch(() => {});
|
|
1438
|
+
}
|
|
1439
|
+
const mode = isDeepgramStreaming(config) ? "Deepgram streaming" : config.backend;
|
|
1440
|
+
cmdCtx.ui.notify(`Voice enabled (${mode}).\n Hold SPACE (empty editor) → release to transcribe\n Ctrl+Shift+V → toggle recording on/off\n Live transcription shown while speaking`, "info");
|
|
769
1441
|
return;
|
|
770
1442
|
}
|
|
771
1443
|
|
|
@@ -779,7 +1451,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
779
1451
|
}
|
|
780
1452
|
|
|
781
1453
|
if (sub === "stop") {
|
|
782
|
-
// Emergency stop — cancel any active recording
|
|
783
1454
|
if (voiceState === "recording") {
|
|
784
1455
|
isHolding = false;
|
|
785
1456
|
await stopVoiceRecording("editor");
|
|
@@ -793,6 +1464,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
793
1464
|
if (sub === "test") {
|
|
794
1465
|
cmdCtx.ui.notify("Testing voice setup...", "info");
|
|
795
1466
|
const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
|
|
1467
|
+
const dgKey = resolveDeepgramApiKey(config);
|
|
1468
|
+
const streaming = isDeepgramStreaming(config);
|
|
796
1469
|
const daemonUp = await isDaemonRunning();
|
|
797
1470
|
const provisioningPlan = buildProvisioningPlan(config, diagnostics);
|
|
798
1471
|
const selectedBackend = diagnostics.backends.find((backend) => backend.name === config.backend);
|
|
@@ -805,6 +1478,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
805
1478
|
` model: ${config.model}`,
|
|
806
1479
|
` model status: ${modelReadiness}`,
|
|
807
1480
|
` language: ${config.language}`,
|
|
1481
|
+
` streaming: ${streaming ? "YES (Deepgram WS)" : "NO (batch)"}`,
|
|
1482
|
+
` DEEPGRAM_API_KEY: ${dgKey ? "set (" + dgKey.slice(0, 8) + "...)" : "NOT SET"}`,
|
|
808
1483
|
` onboarding: ${config.onboarding.completed ? "complete" : "incomplete"}`,
|
|
809
1484
|
` python3: ${diagnostics.hasPython ? "OK" : "missing"}`,
|
|
810
1485
|
` sox/rec: ${diagnostics.hasSox ? "OK" : "missing"}`,
|
|
@@ -826,11 +1501,10 @@ export default function (pi: ExtensionAPI) {
|
|
|
826
1501
|
}
|
|
827
1502
|
}
|
|
828
1503
|
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
lines.push("
|
|
833
|
-
lines.push(...provisioningPlan.manualSteps.map((step) => ` - ${step}`));
|
|
1504
|
+
if (!dgKey && config.backend === "deepgram") {
|
|
1505
|
+
lines.push("");
|
|
1506
|
+
lines.push("⚠️ DEEPGRAM_API_KEY not set! Add to ~/.zshrc or ~/.env.secrets");
|
|
1507
|
+
lines.push(" export DEEPGRAM_API_KEY=your_key_here");
|
|
834
1508
|
}
|
|
835
1509
|
|
|
836
1510
|
cmdCtx.ui.notify(lines.join("\n"), provisioningPlan.ready ? "info" : "warning");
|
|
@@ -847,22 +1521,24 @@ export default function (pi: ExtensionAPI) {
|
|
|
847
1521
|
const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
|
|
848
1522
|
const selectedBackend = diagnostics.backends.find((backend) => backend.name === config.backend);
|
|
849
1523
|
const modelReadiness = getModelReadiness(selectedBackend, config.model);
|
|
1524
|
+
const streaming = isDeepgramStreaming(config);
|
|
850
1525
|
|
|
851
1526
|
cmdCtx.ui.notify([
|
|
852
1527
|
`Voice config:`,
|
|
853
|
-
` enabled:
|
|
854
|
-
` mode:
|
|
855
|
-
` scope:
|
|
856
|
-
` backend:
|
|
857
|
-
` model:
|
|
858
|
-
` model
|
|
859
|
-
` language:
|
|
860
|
-
`
|
|
861
|
-
`
|
|
862
|
-
`
|
|
863
|
-
`
|
|
864
|
-
`
|
|
865
|
-
`
|
|
1528
|
+
` enabled: ${config.enabled}`,
|
|
1529
|
+
` mode: ${config.mode}`,
|
|
1530
|
+
` scope: ${config.scope}`,
|
|
1531
|
+
` backend: ${config.backend}`,
|
|
1532
|
+
` model: ${config.model}`,
|
|
1533
|
+
` model stat: ${modelReadiness}`,
|
|
1534
|
+
` language: ${config.language}`,
|
|
1535
|
+
` streaming: ${streaming ? "YES (Deepgram WebSocket)" : "NO (batch)"}`,
|
|
1536
|
+
` state: ${voiceState}`,
|
|
1537
|
+
` setup: ${config.onboarding.completed ? `complete (${config.onboarding.source ?? "unknown"})` : "incomplete"}`,
|
|
1538
|
+
` socket: ${activeSocketPath}`,
|
|
1539
|
+
` daemon: ${daemonUp ? "running" : "stopped"}${daemonInfo}`,
|
|
1540
|
+
` hold-key: SPACE (editor empty) or Ctrl+Shift+V (toggle)`,
|
|
1541
|
+
` btw-key: Ctrl+Shift+B (hold to record → auto-btw)`,
|
|
866
1542
|
].join("\n"), "info");
|
|
867
1543
|
return;
|
|
868
1544
|
}
|
|
@@ -905,7 +1581,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
905
1581
|
cmdCtx.ui.notify("Voice setup cancelled.", "warning");
|
|
906
1582
|
return;
|
|
907
1583
|
}
|
|
908
|
-
|
|
909
1584
|
await finalizeAndSaveSetup(cmdCtx, result.config, result.selectedScope, result.summaryLines, "setup-command");
|
|
910
1585
|
return;
|
|
911
1586
|
}
|
|
@@ -1013,7 +1688,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
1013
1688
|
},
|
|
1014
1689
|
});
|
|
1015
1690
|
|
|
1016
|
-
// ─── Dedicated setup command
|
|
1691
|
+
// ─── Dedicated setup command ─────────────────────────────────────────────
|
|
1017
1692
|
|
|
1018
1693
|
pi.registerCommand("voice-setup", {
|
|
1019
1694
|
description: "Configure voice input — select backend, model, and language",
|
|
@@ -1081,7 +1756,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
1081
1756
|
|
|
1082
1757
|
pi.sendUserMessage(content, { deliverAs: "followUp" });
|
|
1083
1758
|
|
|
1084
|
-
// Clear after injection
|
|
1085
1759
|
btwThread = [];
|
|
1086
1760
|
btwWidgetVisible = false;
|
|
1087
1761
|
cmdCtx.ui.setWidget("btw", undefined);
|
|
@@ -1106,7 +1780,6 @@ export default function (pi: ExtensionAPI) {
|
|
|
1106
1780
|
threadText += `Q: ${ex.question}\nA: ${ex.answer}\n\n`;
|
|
1107
1781
|
}
|
|
1108
1782
|
|
|
1109
|
-
// Ask the model to summarize
|
|
1110
1783
|
const model = ctx.model;
|
|
1111
1784
|
if (!model) {
|
|
1112
1785
|
cmdCtx.ui.notify("No model available for summarization.", "error");
|