@codexstar/pi-listen 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +283 -0
- package/daemon.py +517 -0
- package/docs/API.md +273 -0
- package/docs/ARCHITECTURE.md +114 -0
- package/docs/backends.md +196 -0
- package/docs/plans/2026-03-12-pi-voice-master-plan.md +613 -0
- package/docs/plans/2026-03-12-pi-voice-model-aware-execution-plan.md +256 -0
- package/docs/plans/2026-03-12-pi-voice-onboarding-remediation-plan.md +391 -0
- package/docs/plans/pi-voice-model-aware-review.md +196 -0
- package/docs/plans/pi-voice-model-detection-qa-plan.md +226 -0
- package/docs/plans/pi-voice-model-detection-research.md +483 -0
- package/docs/plans/pi-voice-onboarding-ux-plan.md +388 -0
- package/docs/plans/pi-voice-release-validation-plan.md +386 -0
- package/docs/plans/pi-voice-remaining-implementation-plan.md +524 -0
- package/docs/plans/pi-voice-review-findings.md +227 -0
- package/docs/plans/pi-voice-technical-remediation-plan.md +613 -0
- package/docs/qa-matrix.md +69 -0
- package/docs/qa-results.md +357 -0
- package/docs/troubleshooting.md +265 -0
- package/extensions/voice/config.ts +206 -0
- package/extensions/voice/diagnostics.ts +212 -0
- package/extensions/voice/install.ts +62 -0
- package/extensions/voice/onboarding.ts +315 -0
- package/extensions/voice.ts +1149 -0
- package/package.json +48 -0
- package/scripts/setup-macos.sh +374 -0
- package/scripts/setup-windows.ps1 +271 -0
- package/transcribe.py +497 -0
|
@@ -0,0 +1,1149 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pi-voice — Voice input + BTW side conversations for Pi CLI.
|
|
3
|
+
*
|
|
4
|
+
* Features:
|
|
5
|
+
* 1. Hold-spacebar to talk (Kitty protocol key release detection)
|
|
6
|
+
* Fallback: Ctrl+Shift+V toggle for non-Kitty terminals
|
|
7
|
+
* 2. BTW side conversations (/btw <msg>, /btw:new, /btw:clear, /btw:inject, /btw:summarize)
|
|
8
|
+
* 3. Voice → BTW glue: Ctrl+Shift+B = hold to record → auto-send as /btw
|
|
9
|
+
*
|
|
10
|
+
* Records audio via SoX, transcribes via persistent daemon (daemon.py) or fallback subprocess.
|
|
11
|
+
* STT backends: faster-whisper, moonshine, whisper.cpp, deepgram, parakeet.
|
|
12
|
+
*
|
|
13
|
+
* Config in ~/.pi/agent/settings.json or <project>/.pi/settings.json:
|
|
14
|
+
* {
|
|
15
|
+
* "voice": {
|
|
16
|
+
* "enabled": true,
|
|
17
|
+
* "language": "en",
|
|
18
|
+
* "backend": "faster-whisper",
|
|
19
|
+
* "model": "small"
|
|
20
|
+
* }
|
|
21
|
+
* }
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import type {
|
|
25
|
+
ExtensionAPI,
|
|
26
|
+
ExtensionContext,
|
|
27
|
+
ExtensionCommandContext,
|
|
28
|
+
MessageRenderOptions,
|
|
29
|
+
} from "@mariozechner/pi-coding-agent";
|
|
30
|
+
import type { CustomMessage } from "@mariozechner/pi-coding-agent/dist/core/messages.js";
|
|
31
|
+
import { streamSimple } from "@mariozechner/pi-ai";
|
|
32
|
+
import type { AssistantMessageEvent } from "@mariozechner/pi-ai";
|
|
33
|
+
import { isKeyRelease, isKeyRepeat, matchesKey } from "@mariozechner/pi-tui";
|
|
34
|
+
import type { Component } from "@mariozechner/pi-tui";
|
|
35
|
+
import type { Theme } from "@mariozechner/pi-coding-agent/dist/modes/interactive/theme/theme.js";
|
|
36
|
+
import { spawn, spawnSync, type ChildProcess } from "node:child_process";
|
|
37
|
+
import * as fs from "node:fs";
|
|
38
|
+
import * as net from "node:net";
|
|
39
|
+
import * as os from "node:os";
|
|
40
|
+
import * as path from "node:path";
|
|
41
|
+
import {
|
|
42
|
+
DEFAULT_CONFIG,
|
|
43
|
+
getSocketPath,
|
|
44
|
+
loadConfigWithSource,
|
|
45
|
+
needsOnboarding,
|
|
46
|
+
saveConfig,
|
|
47
|
+
type VoiceConfig,
|
|
48
|
+
type VoiceSettingsScope,
|
|
49
|
+
} from "./voice/config";
|
|
50
|
+
import { getModelReadiness, recommendVoiceSetup, scanEnvironment } from "./voice/diagnostics";
|
|
51
|
+
import { finalizeOnboardingConfig, promptFirstRunOnboarding, runVoiceOnboarding } from "./voice/onboarding";
|
|
52
|
+
import { buildProvisioningPlan } from "./voice/install";
|
|
53
|
+
|
|
54
|
+
// ─── Types ───────────────────────────────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
type VoiceState = "idle" | "recording" | "transcribing";
|
|
57
|
+
|
|
58
|
+
interface BtwExchange {
|
|
59
|
+
question: string;
|
|
60
|
+
thinking?: string;
|
|
61
|
+
answer: string;
|
|
62
|
+
model?: string;
|
|
63
|
+
timestamp: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// ─── Constants ───────────────────────────────────────────────────────────────
|
|
67
|
+
|
|
68
|
+
const SAMPLE_RATE = 16000;
|
|
69
|
+
const EXT_DIR = path.dirname(new URL(import.meta.url).pathname);
|
|
70
|
+
const PROJECT_ROOT = path.join(EXT_DIR, "..");
|
|
71
|
+
const DAEMON_SCRIPT = path.join(PROJECT_ROOT, "daemon.py");
|
|
72
|
+
const TRANSCRIBE_SCRIPT = path.join(PROJECT_ROOT, "transcribe.py");
|
|
73
|
+
|
|
74
|
+
function commandExists(cmd: string): boolean {
|
|
75
|
+
return spawnSync("which", [cmd], { stdio: "pipe", timeout: 3000 }).status === 0;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// ─── Daemon Communication ────────────────────────────────────────────────────
|
|
79
|
+
|
|
80
|
+
let activeSocketPath = getSocketPath({
|
|
81
|
+
scope: DEFAULT_CONFIG.scope,
|
|
82
|
+
cwd: process.cwd(),
|
|
83
|
+
backend: DEFAULT_CONFIG.backend,
|
|
84
|
+
model: DEFAULT_CONFIG.model,
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
function updateSocketPath(config: VoiceConfig, cwd: string) {
|
|
88
|
+
activeSocketPath = getSocketPath({
|
|
89
|
+
scope: config.scope,
|
|
90
|
+
cwd,
|
|
91
|
+
backend: config.backend,
|
|
92
|
+
model: config.model,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function daemonSend(
|
|
97
|
+
cmd: Record<string, unknown>,
|
|
98
|
+
timeout = 30000,
|
|
99
|
+
socketPath = activeSocketPath,
|
|
100
|
+
): Promise<Record<string, unknown>> {
|
|
101
|
+
return new Promise((resolve) => {
|
|
102
|
+
const sock = net.createConnection(socketPath);
|
|
103
|
+
let buf = "";
|
|
104
|
+
const timer = setTimeout(() => {
|
|
105
|
+
sock.destroy();
|
|
106
|
+
resolve({ error: "Daemon timeout" });
|
|
107
|
+
}, timeout);
|
|
108
|
+
|
|
109
|
+
sock.on("connect", () => {
|
|
110
|
+
sock.write(JSON.stringify(cmd) + "\n");
|
|
111
|
+
});
|
|
112
|
+
sock.on("data", (chunk) => {
|
|
113
|
+
buf += chunk.toString();
|
|
114
|
+
if (buf.includes("\n")) {
|
|
115
|
+
clearTimeout(timer);
|
|
116
|
+
sock.destroy();
|
|
117
|
+
try {
|
|
118
|
+
resolve(JSON.parse(buf.trim()));
|
|
119
|
+
} catch {
|
|
120
|
+
resolve({ error: "Invalid daemon response" });
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
});
|
|
124
|
+
sock.on("error", () => {
|
|
125
|
+
clearTimeout(timer);
|
|
126
|
+
resolve({ error: "Daemon not running" });
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
async function isDaemonRunning(socketPath = activeSocketPath): Promise<boolean> {
|
|
132
|
+
const resp = await daemonSend({ cmd: "ping" }, 2000, socketPath);
|
|
133
|
+
return resp.status === "ok";
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async function ensureDaemon(config: VoiceConfig): Promise<boolean> {
|
|
137
|
+
if (await isDaemonRunning(activeSocketPath)) {
|
|
138
|
+
const status = await daemonSend({ cmd: "status" }, 3000, activeSocketPath);
|
|
139
|
+
// When backend is 'auto', accept any loaded backend — the daemon already
|
|
140
|
+
// resolved 'auto' to a concrete backend, so we don't need to reload.
|
|
141
|
+
if (config.backend === "auto" || (status.backend === config.backend && status.model === config.model)) return true;
|
|
142
|
+
const reloaded = await daemonSend({
|
|
143
|
+
cmd: "load",
|
|
144
|
+
backend: config.backend,
|
|
145
|
+
model: config.model,
|
|
146
|
+
}, 15000, activeSocketPath);
|
|
147
|
+
return !reloaded.error;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return new Promise((resolve) => {
|
|
151
|
+
const args = [DAEMON_SCRIPT, "start", "--socket", activeSocketPath];
|
|
152
|
+
if (config.backend !== "auto") {
|
|
153
|
+
args.push("--backend", config.backend);
|
|
154
|
+
}
|
|
155
|
+
if (config.model) {
|
|
156
|
+
args.push("--model", config.model);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const proc = spawn("python3", args, {
|
|
160
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
161
|
+
detached: true,
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
let started = false;
|
|
165
|
+
proc.stdout?.on("data", (d) => {
|
|
166
|
+
const line = d.toString().trim();
|
|
167
|
+
try {
|
|
168
|
+
const msg = JSON.parse(line);
|
|
169
|
+
if (msg.status === "started") {
|
|
170
|
+
started = true;
|
|
171
|
+
proc.unref();
|
|
172
|
+
resolve(true);
|
|
173
|
+
}
|
|
174
|
+
} catch {}
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
proc.on("error", () => resolve(false));
|
|
178
|
+
|
|
179
|
+
// Timeout: if daemon doesn't start in 10s, kill orphan and fall back
|
|
180
|
+
setTimeout(() => {
|
|
181
|
+
if (!started) {
|
|
182
|
+
try { proc.kill(); } catch {}
|
|
183
|
+
resolve(false);
|
|
184
|
+
}
|
|
185
|
+
}, 10000);
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// ─── Audio Recording ─────────────────────────────────────────────────────────
|
|
190
|
+
|
|
191
|
+
let recProcess: ChildProcess | null = null;
|
|
192
|
+
|
|
193
|
+
function startRecordingToFile(outPath: string): boolean {
|
|
194
|
+
if (recProcess) {
|
|
195
|
+
recProcess.kill("SIGTERM");
|
|
196
|
+
recProcess = null;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if (!commandExists("rec")) return false;
|
|
200
|
+
|
|
201
|
+
recProcess = spawn("rec", [
|
|
202
|
+
"-q", "-r", String(SAMPLE_RATE), "-c", "1", "-b", "16", outPath,
|
|
203
|
+
], { stdio: ["pipe", "pipe", "pipe"] });
|
|
204
|
+
|
|
205
|
+
recProcess.stderr?.on("data", () => {});
|
|
206
|
+
recProcess.on("error", () => { recProcess = null; });
|
|
207
|
+
return true;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function stopRecording(): Promise<void> {
|
|
211
|
+
return new Promise((resolve) => {
|
|
212
|
+
if (!recProcess) { resolve(); return; }
|
|
213
|
+
recProcess.on("close", () => { recProcess = null; resolve(); });
|
|
214
|
+
recProcess.kill("SIGTERM");
|
|
215
|
+
setTimeout(() => {
|
|
216
|
+
if (recProcess) { recProcess.kill("SIGKILL"); recProcess = null; }
|
|
217
|
+
resolve();
|
|
218
|
+
}, 2000);
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// ─── Transcription (daemon or fallback) ──────────────────────────────────────
|
|
223
|
+
|
|
224
|
+
async function transcribeAudio(
|
|
225
|
+
audioPath: string,
|
|
226
|
+
config: VoiceConfig,
|
|
227
|
+
): Promise<{ text: string; duration: number; error?: string }> {
|
|
228
|
+
// Try daemon first
|
|
229
|
+
if (await isDaemonRunning()) {
|
|
230
|
+
const resp = await daemonSend({
|
|
231
|
+
cmd: "transcribe",
|
|
232
|
+
audio: audioPath,
|
|
233
|
+
backend: config.backend,
|
|
234
|
+
model: config.model,
|
|
235
|
+
language: config.language,
|
|
236
|
+
vad: true,
|
|
237
|
+
});
|
|
238
|
+
if (!resp.error) {
|
|
239
|
+
return resp as { text: string; duration: number };
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Fallback: direct subprocess
|
|
244
|
+
return new Promise((resolve) => {
|
|
245
|
+
const args = [TRANSCRIBE_SCRIPT, "--language", config.language, audioPath];
|
|
246
|
+
if (config.backend !== "auto") args.splice(1, 0, "--backend", config.backend);
|
|
247
|
+
if (config.model) args.splice(1, 0, "--model", config.model);
|
|
248
|
+
|
|
249
|
+
const proc = spawn("python3", args, { stdio: ["pipe", "pipe", "pipe"] });
|
|
250
|
+
let stdout = "";
|
|
251
|
+
let stderr = "";
|
|
252
|
+
proc.stdout?.on("data", (d: Buffer) => { stdout += d.toString(); });
|
|
253
|
+
proc.stderr?.on("data", (d: Buffer) => { stderr += d.toString(); });
|
|
254
|
+
proc.on("close", (code) => {
|
|
255
|
+
try { resolve(JSON.parse(stdout.trim())); }
|
|
256
|
+
catch { resolve({ text: "", duration: 0, error: stderr || `Exit ${code}` }); }
|
|
257
|
+
});
|
|
258
|
+
proc.on("error", (err) => resolve({ text: "", duration: 0, error: err.message }));
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// ─── Extension ───────────────────────────────────────────────────────────────
|
|
263
|
+
|
|
264
|
+
export default function (pi: ExtensionAPI) {
|
|
265
|
+
let config = DEFAULT_CONFIG;
|
|
266
|
+
let configSource: VoiceSettingsScope | "default" = "default";
|
|
267
|
+
let currentCwd = process.cwd();
|
|
268
|
+
let voiceState: VoiceState = "idle";
|
|
269
|
+
let ctx: ExtensionContext | null = null;
|
|
270
|
+
let tempFile: string | null = null;
|
|
271
|
+
let recordingStart = 0;
|
|
272
|
+
let statusTimer: ReturnType<typeof setInterval> | null = null;
|
|
273
|
+
let terminalInputUnsub: (() => void) | null = null;
|
|
274
|
+
let isHolding = false;
|
|
275
|
+
|
|
276
|
+
// ─── BTW State ───────────────────────────────────────────────────────────
|
|
277
|
+
|
|
278
|
+
let btwThread: BtwExchange[] = [];
|
|
279
|
+
let btwWidgetVisible = false;
|
|
280
|
+
|
|
281
|
+
// ─── Voice UI ────────────────────────────────────────────────────────────
|
|
282
|
+
|
|
283
|
+
function updateVoiceStatus() {
|
|
284
|
+
if (!ctx?.hasUI) return;
|
|
285
|
+
switch (voiceState) {
|
|
286
|
+
case "idle": {
|
|
287
|
+
if (!config.enabled) {
|
|
288
|
+
ctx.ui.setStatus("voice", undefined);
|
|
289
|
+
break;
|
|
290
|
+
}
|
|
291
|
+
const modeTag = !config.onboarding.completed
|
|
292
|
+
? "SETUP"
|
|
293
|
+
: config.mode === "api"
|
|
294
|
+
? "API"
|
|
295
|
+
: config.mode === "local"
|
|
296
|
+
? "LOCAL"
|
|
297
|
+
: "AUTO";
|
|
298
|
+
ctx.ui.setStatus("voice", `MIC ${modeTag}`);
|
|
299
|
+
break;
|
|
300
|
+
}
|
|
301
|
+
case "recording": {
|
|
302
|
+
const secs = Math.round((Date.now() - recordingStart) / 1000);
|
|
303
|
+
ctx.ui.setStatus("voice", `REC ${secs}s`);
|
|
304
|
+
break;
|
|
305
|
+
}
|
|
306
|
+
case "transcribing":
|
|
307
|
+
ctx.ui.setStatus("voice", "STT...");
|
|
308
|
+
break;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
function setVoiceState(newState: VoiceState) {
|
|
313
|
+
voiceState = newState;
|
|
314
|
+
updateVoiceStatus();
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function voiceCleanup() {
|
|
318
|
+
if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
|
|
319
|
+
if (recProcess) { recProcess.kill("SIGTERM"); recProcess = null; }
|
|
320
|
+
if (tempFile) { try { fs.unlinkSync(tempFile); } catch {} tempFile = null; }
|
|
321
|
+
isHolding = false;
|
|
322
|
+
setVoiceState("idle");
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
async function finalizeAndSaveSetup(
|
|
326
|
+
uiCtx: ExtensionContext | ExtensionCommandContext,
|
|
327
|
+
nextConfig: VoiceConfig,
|
|
328
|
+
selectedScope: VoiceSettingsScope,
|
|
329
|
+
summaryLines: string[],
|
|
330
|
+
source: "first-run" | "setup-command",
|
|
331
|
+
) {
|
|
332
|
+
updateSocketPath(nextConfig, currentCwd);
|
|
333
|
+
const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
|
|
334
|
+
const provisioningPlan = buildProvisioningPlan(nextConfig, diagnostics);
|
|
335
|
+
let validated = provisioningPlan.ready;
|
|
336
|
+
if (validated && nextConfig.enabled) {
|
|
337
|
+
validated = await ensureDaemon(nextConfig);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
config = finalizeOnboardingConfig(nextConfig, { validated, source });
|
|
341
|
+
configSource = selectedScope;
|
|
342
|
+
updateSocketPath(config, currentCwd);
|
|
343
|
+
const savedPath = saveConfig(config, selectedScope, currentCwd);
|
|
344
|
+
const statusHeader = validated ? "Voice setup complete." : "Voice setup saved, but action is still required.";
|
|
345
|
+
uiCtx.ui.notify([
|
|
346
|
+
statusHeader,
|
|
347
|
+
...summaryLines,
|
|
348
|
+
"",
|
|
349
|
+
`Saved to ${savedPath}`,
|
|
350
|
+
].join("\n"), validated ? "info" : "warning");
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// ─── Voice: Start / Stop / Transcribe ────────────────────────────────────
|
|
354
|
+
|
|
355
|
+
async function startVoiceRecording(target: "editor" | "btw" = "editor"): Promise<boolean> {
|
|
356
|
+
if (voiceState !== "idle" || !ctx) return false;
|
|
357
|
+
|
|
358
|
+
tempFile = path.join(os.tmpdir(), `pi-voice-${Date.now()}.wav`);
|
|
359
|
+
if (!startRecordingToFile(tempFile)) {
|
|
360
|
+
ctx.ui.notify("Voice requires SoX. Install: brew install sox", "error");
|
|
361
|
+
return false;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
recordingStart = Date.now();
|
|
365
|
+
setVoiceState("recording");
|
|
366
|
+
statusTimer = setInterval(() => {
|
|
367
|
+
if (voiceState === "recording") updateVoiceStatus();
|
|
368
|
+
}, 1000);
|
|
369
|
+
|
|
370
|
+
if (ctx.hasUI) {
|
|
371
|
+
ctx.ui.setWidget("voice-recording", [
|
|
372
|
+
target === "btw"
|
|
373
|
+
? " BTW Recording... release to send"
|
|
374
|
+
: " Recording... release to transcribe",
|
|
375
|
+
], { placement: "aboveEditor" });
|
|
376
|
+
}
|
|
377
|
+
return true;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
async function stopVoiceRecording(target: "editor" | "btw" = "editor") {
|
|
381
|
+
if (voiceState !== "recording" || !ctx) return;
|
|
382
|
+
if (statusTimer) { clearInterval(statusTimer); statusTimer = null; }
|
|
383
|
+
|
|
384
|
+
const elapsed = ((Date.now() - recordingStart) / 1000).toFixed(1);
|
|
385
|
+
const audioFile = tempFile; // capture before cleanup can null it
|
|
386
|
+
setVoiceState("transcribing");
|
|
387
|
+
ctx.ui.setWidget("voice-recording", undefined);
|
|
388
|
+
|
|
389
|
+
await stopRecording();
|
|
390
|
+
|
|
391
|
+
if (!audioFile || !fs.existsSync(audioFile)) {
|
|
392
|
+
ctx.ui.notify("No audio recorded.", "warning");
|
|
393
|
+
setVoiceState("idle");
|
|
394
|
+
return;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
const stats = fs.statSync(audioFile);
|
|
398
|
+
if (stats.size < 1000) {
|
|
399
|
+
ctx.ui.notify("Recording too short.", "warning");
|
|
400
|
+
try { fs.unlinkSync(audioFile); } catch {}
|
|
401
|
+
tempFile = null;
|
|
402
|
+
setVoiceState("idle");
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Ensure daemon is up before transcribing — await so the warm path
|
|
407
|
+
// is available for this request instead of falling through to the
|
|
408
|
+
// cold subprocess fallback.
|
|
409
|
+
await ensureDaemon(config).catch(() => {});
|
|
410
|
+
|
|
411
|
+
const result = await transcribeAudio(audioFile, config);
|
|
412
|
+
try { fs.unlinkSync(audioFile); } catch {}
|
|
413
|
+
if (tempFile === audioFile) tempFile = null;
|
|
414
|
+
|
|
415
|
+
if (result.error) {
|
|
416
|
+
ctx.ui.notify(`STT error: ${result.error}`, "error");
|
|
417
|
+
setVoiceState("idle");
|
|
418
|
+
return;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
const transcript = (result.text || "").trim();
|
|
422
|
+
if (!transcript) {
|
|
423
|
+
ctx.ui.notify("No speech detected.", "warning");
|
|
424
|
+
setVoiceState("idle");
|
|
425
|
+
return;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
if (target === "btw") {
|
|
429
|
+
await handleBtw(transcript);
|
|
430
|
+
} else {
|
|
431
|
+
// Inject into editor
|
|
432
|
+
if (ctx.hasUI) {
|
|
433
|
+
const existing = ctx.ui.getEditorText();
|
|
434
|
+
ctx.ui.setEditorText(existing ? existing + " " + transcript : transcript);
|
|
435
|
+
ctx.ui.notify(
|
|
436
|
+
`STT (${elapsed}s): ${transcript.slice(0, 80)}${transcript.length > 80 ? "..." : ""}`,
|
|
437
|
+
"info",
|
|
438
|
+
);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
setVoiceState("idle");
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// ─── Hold-to-talk via Kitty protocol ─────────────────────────────────────
|
|
446
|
+
|
|
447
|
+
function setupHoldToTalk() {
|
|
448
|
+
if (!ctx?.hasUI) return;
|
|
449
|
+
|
|
450
|
+
// Remove previous listener
|
|
451
|
+
if (terminalInputUnsub) { terminalInputUnsub(); terminalInputUnsub = null; }
|
|
452
|
+
|
|
453
|
+
terminalInputUnsub = ctx.ui.onTerminalInput((data: string) => {
|
|
454
|
+
if (!config.enabled) return undefined;
|
|
455
|
+
|
|
456
|
+
// Hold SPACE → talk → release → transcribe to editor
|
|
457
|
+
if (matchesKey(data, "space")) {
|
|
458
|
+
// Only activate when editor is empty (avoid conflicting with typing)
|
|
459
|
+
const editorText = ctx?.hasUI ? ctx.ui.getEditorText() : "";
|
|
460
|
+
if (editorText && editorText.trim().length > 0) return undefined;
|
|
461
|
+
|
|
462
|
+
if (isKeyRelease(data)) {
|
|
463
|
+
if (isHolding) {
|
|
464
|
+
isHolding = false;
|
|
465
|
+
stopVoiceRecording("editor");
|
|
466
|
+
return { consume: true };
|
|
467
|
+
}
|
|
468
|
+
return undefined;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
if (isKeyRepeat(data)) {
|
|
472
|
+
if (isHolding) return { consume: true };
|
|
473
|
+
return undefined;
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// Key press — start recording
|
|
477
|
+
if (voiceState === "idle" && !isHolding) {
|
|
478
|
+
isHolding = true;
|
|
479
|
+
startVoiceRecording("editor").then((ok) => {
|
|
480
|
+
if (!ok) isHolding = false;
|
|
481
|
+
});
|
|
482
|
+
return { consume: true };
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
if (isHolding) return { consume: true };
|
|
486
|
+
return undefined;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
// Hold Ctrl+Shift+B → talk → release → auto-btw
|
|
490
|
+
if (matchesKey(data, "ctrl+shift+b")) {
|
|
491
|
+
if (isKeyRelease(data)) {
|
|
492
|
+
if (isHolding) {
|
|
493
|
+
isHolding = false;
|
|
494
|
+
stopVoiceRecording("btw");
|
|
495
|
+
return { consume: true };
|
|
496
|
+
}
|
|
497
|
+
return undefined;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
if (isKeyRepeat(data)) {
|
|
501
|
+
if (isHolding) return { consume: true };
|
|
502
|
+
return undefined;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
if (voiceState === "idle" && !isHolding) {
|
|
506
|
+
isHolding = true;
|
|
507
|
+
startVoiceRecording("btw").then((ok) => {
|
|
508
|
+
if (!ok) isHolding = false;
|
|
509
|
+
});
|
|
510
|
+
return { consume: true };
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
if (isHolding) return { consume: true };
|
|
514
|
+
return undefined;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// Any other key while holding = cancel
|
|
518
|
+
if (isHolding && voiceState === "recording") {
|
|
519
|
+
// Don't cancel on modifier-only events
|
|
520
|
+
return undefined;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
return undefined;
|
|
524
|
+
});
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// ─── BTW: Side Conversations ─────────────────────────────────────────────
|
|
528
|
+
|
|
529
|
+
function buildBtwContext(): string {
|
|
530
|
+
// Build context from main session + btw thread
|
|
531
|
+
const systemPrompt = ctx?.getSystemPrompt() ?? "";
|
|
532
|
+
let btwContext = "You are a helpful side-channel assistant. ";
|
|
533
|
+
btwContext += "The user is having a parallel conversation while their main Pi agent works. ";
|
|
534
|
+
btwContext += "Be concise and direct. This is a quick side question, not the main task.\n\n";
|
|
535
|
+
|
|
536
|
+
if (systemPrompt) {
|
|
537
|
+
btwContext += `Main session context (for reference):\n${systemPrompt.slice(0, 2000)}\n\n`;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
if (btwThread.length > 0) {
|
|
541
|
+
btwContext += "Prior btw thread:\n";
|
|
542
|
+
for (const ex of btwThread) {
|
|
543
|
+
btwContext += `User: ${ex.question}\nAssistant: ${ex.answer}\n\n`;
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
return btwContext;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
function updateBtwWidget() {
|
|
551
|
+
if (!ctx?.hasUI) return;
|
|
552
|
+
|
|
553
|
+
if (!btwWidgetVisible || btwThread.length === 0) {
|
|
554
|
+
ctx.ui.setWidget("btw", undefined);
|
|
555
|
+
return;
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
const last = btwThread[btwThread.length - 1];
|
|
559
|
+
const lines: string[] = [
|
|
560
|
+
` BTW (${btwThread.length} exchange${btwThread.length > 1 ? "s" : ""})`,
|
|
561
|
+
"",
|
|
562
|
+
];
|
|
563
|
+
|
|
564
|
+
// Show last exchange
|
|
565
|
+
lines.push(` Q: ${last.question.slice(0, 100)}${last.question.length > 100 ? "..." : ""}`);
|
|
566
|
+
const answerLines = last.answer.split("\n");
|
|
567
|
+
for (const line of answerLines.slice(0, 8)) {
|
|
568
|
+
lines.push(` ${line}`);
|
|
569
|
+
}
|
|
570
|
+
if (answerLines.length > 8) lines.push(" ...");
|
|
571
|
+
|
|
572
|
+
lines.push("");
|
|
573
|
+
lines.push(" /btw:clear to dismiss | /btw:inject to send to agent");
|
|
574
|
+
|
|
575
|
+
ctx.ui.setWidget("btw", lines, { placement: "aboveEditor" });
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
async function handleBtw(message: string) {
|
|
579
|
+
if (!ctx) return;
|
|
580
|
+
|
|
581
|
+
btwWidgetVisible = true;
|
|
582
|
+
|
|
583
|
+
// Show thinking state
|
|
584
|
+
ctx.ui.setWidget("btw", [
|
|
585
|
+
" BTW",
|
|
586
|
+
"",
|
|
587
|
+
` Q: ${message.slice(0, 100)}${message.length > 100 ? "..." : ""}`,
|
|
588
|
+
"",
|
|
589
|
+
" Thinking...",
|
|
590
|
+
], { placement: "aboveEditor" });
|
|
591
|
+
|
|
592
|
+
// Build context for LLM
|
|
593
|
+
const btwContext = buildBtwContext();
|
|
594
|
+
|
|
595
|
+
// Use the model registry to get current model
|
|
596
|
+
const model = ctx.model;
|
|
597
|
+
if (!model) {
|
|
598
|
+
const exchange: BtwExchange = {
|
|
599
|
+
question: message,
|
|
600
|
+
answer: "(No model available — is the session active?)",
|
|
601
|
+
timestamp: Date.now(),
|
|
602
|
+
};
|
|
603
|
+
btwThread.push(exchange);
|
|
604
|
+
pi.appendEntry("btw", exchange);
|
|
605
|
+
updateBtwWidget();
|
|
606
|
+
return;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
try {
|
|
610
|
+
// Stream the response
|
|
611
|
+
let answer = "";
|
|
612
|
+
const eventStream = streamSimple(model, {
|
|
613
|
+
systemPrompt: btwContext,
|
|
614
|
+
messages: [{ role: "user" as const, content: message, timestamp: Date.now() }],
|
|
615
|
+
});
|
|
616
|
+
|
|
617
|
+
for await (const event of eventStream) {
|
|
618
|
+
if (event.type === "text_delta") {
|
|
619
|
+
answer += event.delta;
|
|
620
|
+
} else if (event.type === "error") {
|
|
621
|
+
answer = "(Error during BTW streaming)";
|
|
622
|
+
break;
|
|
623
|
+
} else if (event.type === "done") {
|
|
624
|
+
break;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
// Update widget with streaming response
|
|
628
|
+
const displayLines: string[] = [
|
|
629
|
+
` BTW`,
|
|
630
|
+
"",
|
|
631
|
+
` Q: ${message.slice(0, 80)}`,
|
|
632
|
+
"",
|
|
633
|
+
];
|
|
634
|
+
for (const line of answer.split("\n").slice(0, 10)) {
|
|
635
|
+
displayLines.push(` ${line}`);
|
|
636
|
+
}
|
|
637
|
+
ctx!.ui.setWidget("btw", displayLines, { placement: "aboveEditor" });
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
const exchange: BtwExchange = {
|
|
641
|
+
question: message,
|
|
642
|
+
answer: answer.trim() || "(empty response)",
|
|
643
|
+
model: model.name,
|
|
644
|
+
timestamp: Date.now(),
|
|
645
|
+
};
|
|
646
|
+
|
|
647
|
+
btwThread.push(exchange);
|
|
648
|
+
pi.appendEntry("btw", exchange);
|
|
649
|
+
updateBtwWidget();
|
|
650
|
+
} catch (err: any) {
|
|
651
|
+
// Fallback: send as a follow-up message to the main agent
|
|
652
|
+
const exchange: BtwExchange = {
|
|
653
|
+
question: message,
|
|
654
|
+
answer: `(BTW streaming failed: ${err.message}. Falling back to sendUserMessage.)`,
|
|
655
|
+
timestamp: Date.now(),
|
|
656
|
+
};
|
|
657
|
+
btwThread.push(exchange);
|
|
658
|
+
pi.appendEntry("btw", exchange);
|
|
659
|
+
updateBtwWidget();
|
|
660
|
+
|
|
661
|
+
// Use sendUserMessage as alternative
|
|
662
|
+
pi.sendUserMessage(
|
|
663
|
+
`[BTW question]: ${message}`,
|
|
664
|
+
{ deliverAs: "followUp" },
|
|
665
|
+
);
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// ─── Shortcuts ───────────────────────────────────────────────────────────
|
|
670
|
+
|
|
671
|
+
// Ctrl+Shift+V = toggle voice (fallback for non-Kitty terminals)
|
|
672
|
+
pi.registerShortcut("ctrl+shift+v", {
|
|
673
|
+
description: "Toggle voice recording (start/stop)",
|
|
674
|
+
handler: async (handlerCtx) => {
|
|
675
|
+
ctx = handlerCtx;
|
|
676
|
+
if (!config.enabled) {
|
|
677
|
+
handlerCtx.ui.notify("Voice disabled. Use /voice on", "warning");
|
|
678
|
+
return;
|
|
679
|
+
}
|
|
680
|
+
if (voiceState === "idle") {
|
|
681
|
+
await startVoiceRecording("editor");
|
|
682
|
+
} else if (voiceState === "recording") {
|
|
683
|
+
await stopVoiceRecording("editor");
|
|
684
|
+
}
|
|
685
|
+
},
|
|
686
|
+
});
|
|
687
|
+
|
|
688
|
+
// ─── Lifecycle ───────────────────────────────────────────────────────────
|
|
689
|
+
|
|
690
|
+
pi.on("session_start", async (_event, startCtx) => {
|
|
691
|
+
ctx = startCtx;
|
|
692
|
+
currentCwd = startCtx.cwd;
|
|
693
|
+
const loaded = loadConfigWithSource(startCtx.cwd);
|
|
694
|
+
config = loaded.config;
|
|
695
|
+
configSource = loaded.source;
|
|
696
|
+
updateSocketPath(config, currentCwd);
|
|
697
|
+
updateVoiceStatus();
|
|
698
|
+
setupHoldToTalk();
|
|
699
|
+
|
|
700
|
+
if (needsOnboarding(config, configSource) && startCtx.hasUI) {
|
|
701
|
+
const decision = await promptFirstRunOnboarding(startCtx);
|
|
702
|
+
if (decision.action === "start") {
|
|
703
|
+
const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
|
|
704
|
+
const result = await runVoiceOnboarding(startCtx, config, diagnostics);
|
|
705
|
+
if (result) {
|
|
706
|
+
await finalizeAndSaveSetup(startCtx, result.config, result.selectedScope, result.summaryLines, "first-run");
|
|
707
|
+
} else {
|
|
708
|
+
config = {
|
|
709
|
+
...config,
|
|
710
|
+
onboarding: {
|
|
711
|
+
...config.onboarding,
|
|
712
|
+
skippedAt: new Date().toISOString(),
|
|
713
|
+
},
|
|
714
|
+
};
|
|
715
|
+
saveConfig(config, config.scope, currentCwd);
|
|
716
|
+
startCtx.ui.notify("Voice setup skipped. Run /voice setup anytime.", "warning");
|
|
717
|
+
}
|
|
718
|
+
} else {
|
|
719
|
+
config = {
|
|
720
|
+
...config,
|
|
721
|
+
onboarding: {
|
|
722
|
+
...config.onboarding,
|
|
723
|
+
skippedAt: new Date().toISOString(),
|
|
724
|
+
},
|
|
725
|
+
};
|
|
726
|
+
saveConfig(config, config.scope, currentCwd);
|
|
727
|
+
startCtx.ui.notify("Voice setup skipped. Run /voice setup anytime.", "warning");
|
|
728
|
+
}
|
|
729
|
+
return;
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
// Start daemon in background only after the user has completed onboarding.
|
|
733
|
+
if (config.enabled && config.onboarding.completed && !needsOnboarding(config, configSource)) {
|
|
734
|
+
ensureDaemon(config).catch(() => {});
|
|
735
|
+
}
|
|
736
|
+
});
|
|
737
|
+
|
|
738
|
+
pi.on("session_shutdown", async () => {
|
|
739
|
+
voiceCleanup();
|
|
740
|
+
if (terminalInputUnsub) { terminalInputUnsub(); terminalInputUnsub = null; }
|
|
741
|
+
});
|
|
742
|
+
|
|
743
|
+
pi.on("session_switch", async (_event, switchCtx) => {
|
|
744
|
+
ctx = switchCtx;
|
|
745
|
+
currentCwd = switchCtx.cwd;
|
|
746
|
+
const loaded = loadConfigWithSource(switchCtx.cwd);
|
|
747
|
+
config = loaded.config;
|
|
748
|
+
configSource = loaded.source;
|
|
749
|
+
updateSocketPath(config, currentCwd);
|
|
750
|
+
btwThread = [];
|
|
751
|
+
btwWidgetVisible = false;
|
|
752
|
+
if (ctx.hasUI) ctx.ui.setWidget("btw", undefined);
|
|
753
|
+
setupHoldToTalk();
|
|
754
|
+
updateVoiceStatus();
|
|
755
|
+
});
|
|
756
|
+
|
|
757
|
+
// ─── BTW Message Renderer ────────────────────────────────────────────────
|
|
758
|
+
|
|
759
|
+
pi.registerMessageRenderer("btw", (message: CustomMessage<BtwExchange>, _options: MessageRenderOptions, _theme: Theme): Component | undefined => {
|
|
760
|
+
const ex = message.details;
|
|
761
|
+
if (!ex) return undefined;
|
|
762
|
+
return {
|
|
763
|
+
invalidate() {},
|
|
764
|
+
render(width: number): string[] {
|
|
765
|
+
const maxW = Math.max(width - 4, 20);
|
|
766
|
+
const q = ex.question.length > maxW ? ex.question.slice(0, maxW) + "…" : ex.question;
|
|
767
|
+
const a = ex.answer.length > 200 ? ex.answer.slice(0, 200) + "…" : ex.answer;
|
|
768
|
+
return [
|
|
769
|
+
`BTW Q: ${q}`,
|
|
770
|
+
`BTW A: ${a}`,
|
|
771
|
+
];
|
|
772
|
+
},
|
|
773
|
+
};
|
|
774
|
+
});
|
|
775
|
+
|
|
776
|
+
// ─── /voice command ──────────────────────────────────────────────────────
|
|
777
|
+
|
|
778
|
+
pi.registerCommand("voice", {
|
|
779
|
+
description: "Voice input: /voice [on|off|test|info|setup|reconfigure|doctor|backends|daemon]",
|
|
780
|
+
handler: async (args, cmdCtx) => {
|
|
781
|
+
ctx = cmdCtx;
|
|
782
|
+
const sub = (args || "").trim().toLowerCase();
|
|
783
|
+
|
|
784
|
+
if (sub === "on") {
|
|
785
|
+
config.enabled = true;
|
|
786
|
+
updateVoiceStatus();
|
|
787
|
+
setupHoldToTalk();
|
|
788
|
+
ensureDaemon(config).catch(() => {});
|
|
789
|
+
cmdCtx.ui.notify("Voice enabled. Hold SPACE (empty editor) to record.", "info");
|
|
790
|
+
return;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
if (sub === "off") {
|
|
794
|
+
config.enabled = false;
|
|
795
|
+
voiceCleanup();
|
|
796
|
+
if (terminalInputUnsub) { terminalInputUnsub(); terminalInputUnsub = null; }
|
|
797
|
+
ctx.ui.setStatus("voice", undefined);
|
|
798
|
+
cmdCtx.ui.notify("Voice disabled.", "info");
|
|
799
|
+
return;
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
if (sub === "test") {
|
|
803
|
+
cmdCtx.ui.notify("Testing voice setup...", "info");
|
|
804
|
+
const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
|
|
805
|
+
const daemonUp = await isDaemonRunning();
|
|
806
|
+
const provisioningPlan = buildProvisioningPlan(config, diagnostics);
|
|
807
|
+
const selectedBackend = diagnostics.backends.find((backend) => backend.name === config.backend);
|
|
808
|
+
const modelReadiness = getModelReadiness(selectedBackend, config.model);
|
|
809
|
+
|
|
810
|
+
const lines = [
|
|
811
|
+
"Voice test:",
|
|
812
|
+
` mode: ${config.mode}`,
|
|
813
|
+
` backend: ${config.backend}`,
|
|
814
|
+
` model: ${config.model}`,
|
|
815
|
+
` model status: ${modelReadiness}`,
|
|
816
|
+
` language: ${config.language}`,
|
|
817
|
+
` onboarding: ${config.onboarding.completed ? "complete" : "incomplete"}`,
|
|
818
|
+
` python3: ${diagnostics.hasPython ? "OK" : "missing"}`,
|
|
819
|
+
` sox/rec: ${diagnostics.hasSox ? "OK" : "missing"}`,
|
|
820
|
+
` daemon: ${daemonUp ? "running" : "not running"}`,
|
|
821
|
+
];
|
|
822
|
+
|
|
823
|
+
if (diagnostics.hasSox) {
|
|
824
|
+
const testFile = path.join(os.tmpdir(), "pi-voice-test.wav");
|
|
825
|
+
const testProc = spawn("rec", ["-q", "-r", "16000", "-c", "1", "-b", "16", "-d", "1", testFile], { stdio: "pipe" });
|
|
826
|
+
await new Promise<void>((resolve) => {
|
|
827
|
+
testProc.on("close", () => resolve());
|
|
828
|
+
setTimeout(() => { testProc.kill(); resolve(); }, 2000);
|
|
829
|
+
});
|
|
830
|
+
if (fs.existsSync(testFile)) {
|
|
831
|
+
lines.push(` mic sample: OK (${fs.statSync(testFile).size} bytes)`);
|
|
832
|
+
try { fs.unlinkSync(testFile); } catch {}
|
|
833
|
+
} else {
|
|
834
|
+
lines.push(" mic sample: missing audio");
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
lines.push("", "Suggested commands:");
|
|
839
|
+
lines.push(...(provisioningPlan.commands.length > 0 ? provisioningPlan.commands.map((command) => ` - ${command}`) : [" - none"]));
|
|
840
|
+
if (provisioningPlan.manualSteps.length > 0) {
|
|
841
|
+
lines.push("", "Manual steps:");
|
|
842
|
+
lines.push(...provisioningPlan.manualSteps.map((step) => ` - ${step}`));
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
cmdCtx.ui.notify(lines.join("\n"), provisioningPlan.ready ? "info" : "warning");
|
|
846
|
+
return;
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
if (sub === "info") {
|
|
850
|
+
const daemonUp = await isDaemonRunning();
|
|
851
|
+
let daemonInfo = "";
|
|
852
|
+
if (daemonUp) {
|
|
853
|
+
const status = await daemonSend({ cmd: "status" });
|
|
854
|
+
daemonInfo = `\n daemon: running (pid ${status.pid}, ${status.requests} requests, ${status.uptime}s uptime)`;
|
|
855
|
+
}
|
|
856
|
+
const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
|
|
857
|
+
const selectedBackend = diagnostics.backends.find((backend) => backend.name === config.backend);
|
|
858
|
+
const modelReadiness = getModelReadiness(selectedBackend, config.model);
|
|
859
|
+
|
|
860
|
+
cmdCtx.ui.notify([
|
|
861
|
+
`Voice config:`,
|
|
862
|
+
` enabled: ${config.enabled}`,
|
|
863
|
+
` mode: ${config.mode}`,
|
|
864
|
+
` scope: ${config.scope}`,
|
|
865
|
+
` backend: ${config.backend}`,
|
|
866
|
+
` model: ${config.model}`,
|
|
867
|
+
` model status: ${modelReadiness}`,
|
|
868
|
+
` language: ${config.language}`,
|
|
869
|
+
` state: ${voiceState}`,
|
|
870
|
+
` setup: ${config.onboarding.completed ? `complete (${config.onboarding.source ?? "unknown"})` : "incomplete"}`,
|
|
871
|
+
` socket: ${activeSocketPath}`,
|
|
872
|
+
` daemon: ${daemonUp ? "running" : "stopped"}${daemonInfo}`,
|
|
873
|
+
` hold-key: SPACE (editor empty) or Ctrl+Shift+V (toggle)`,
|
|
874
|
+
` btw-key: Ctrl+Shift+B (hold to record → auto-btw)`,
|
|
875
|
+
].join("\n"), "info");
|
|
876
|
+
return;
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
if (sub === "daemon" || sub === "daemon start") {
|
|
880
|
+
cmdCtx.ui.notify("Starting STT daemon...", "info");
|
|
881
|
+
const ok = await ensureDaemon(config);
|
|
882
|
+
cmdCtx.ui.notify(ok ? "Daemon started." : "Failed to start daemon.", ok ? "info" : "error");
|
|
883
|
+
return;
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
if (sub === "daemon stop") {
|
|
887
|
+
const resp = await daemonSend({ cmd: "shutdown" });
|
|
888
|
+
cmdCtx.ui.notify(resp.error ? "Daemon not running." : "Daemon stopped.", "info");
|
|
889
|
+
return;
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
if (sub === "daemon status") {
|
|
893
|
+
const resp = await daemonSend({ cmd: "status" });
|
|
894
|
+
if (resp.error) {
|
|
895
|
+
cmdCtx.ui.notify("Daemon not running.", "info");
|
|
896
|
+
} else {
|
|
897
|
+
cmdCtx.ui.notify([
|
|
898
|
+
`Daemon status:`,
|
|
899
|
+
` pid: ${resp.pid}`,
|
|
900
|
+
` uptime: ${resp.uptime}s`,
|
|
901
|
+
` requests: ${resp.requests}`,
|
|
902
|
+
` backend: ${resp.backend}`,
|
|
903
|
+
` model: ${resp.model}`,
|
|
904
|
+
` model loaded: ${resp.model_loaded}`,
|
|
905
|
+
].join("\n"), "info");
|
|
906
|
+
}
|
|
907
|
+
return;
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
if (sub === "setup" || sub === "reconfigure") {
|
|
911
|
+
const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
|
|
912
|
+
const result = await runVoiceOnboarding(cmdCtx, config, diagnostics);
|
|
913
|
+
if (!result) {
|
|
914
|
+
cmdCtx.ui.notify("Voice setup cancelled.", "warning");
|
|
915
|
+
return;
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
await finalizeAndSaveSetup(cmdCtx, result.config, result.selectedScope, result.summaryLines, "setup-command");
|
|
919
|
+
return;
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
if (sub === "backends") {
|
|
923
|
+
const backendsJson = spawnSync("python3", [TRANSCRIBE_SCRIPT, "--list-backends"], {
|
|
924
|
+
stdio: ["pipe", "pipe", "pipe"], timeout: 10000,
|
|
925
|
+
});
|
|
926
|
+
try {
|
|
927
|
+
const backends = JSON.parse(backendsJson.stdout?.toString() || "[]");
|
|
928
|
+
const lines = backends.flatMap((b: any) => {
|
|
929
|
+
const installedModels = Array.isArray(b.installed_models) ? b.installed_models : [];
|
|
930
|
+
const readiness = b.type === "cloud"
|
|
931
|
+
? (b.available ? "api ready" : `needs setup: ${b.install}`)
|
|
932
|
+
: installedModels.length > 0
|
|
933
|
+
? `installed: ${installedModels.join(", ")}`
|
|
934
|
+
: b.available
|
|
935
|
+
? "no confirmed installed models"
|
|
936
|
+
: `install: ${b.install}`;
|
|
937
|
+
const summary = `${b.available ? "+" : "-"} ${String(b.name).padEnd(16)} ${String(b.type).padEnd(6)} ${readiness}`;
|
|
938
|
+
const detection = b.install_detection ? ` detection: ${b.install_detection}` : undefined;
|
|
939
|
+
return detection ? [summary, detection] : [summary];
|
|
940
|
+
});
|
|
941
|
+
cmdCtx.ui.notify("Backends:\n" + lines.join("\n"), "info");
|
|
942
|
+
} catch {
|
|
943
|
+
cmdCtx.ui.notify("Failed to scan backends.", "error");
|
|
944
|
+
}
|
|
945
|
+
return;
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
if (sub === "doctor") {
|
|
949
|
+
const diagnostics = scanEnvironment(TRANSCRIBE_SCRIPT);
|
|
950
|
+
const recommendation = recommendVoiceSetup(diagnostics, "balanced");
|
|
951
|
+
const currentPlan = buildProvisioningPlan(config, diagnostics);
|
|
952
|
+
const currentBackend = diagnostics.backends.find((backend) => backend.name === config.backend);
|
|
953
|
+
const currentModelReadiness = getModelReadiness(currentBackend, config.model);
|
|
954
|
+
const recommendedConfig: VoiceConfig = {
|
|
955
|
+
...config,
|
|
956
|
+
mode: recommendation.mode,
|
|
957
|
+
backend: recommendation.backend,
|
|
958
|
+
model: recommendation.model,
|
|
959
|
+
};
|
|
960
|
+
const recommendedPlan = buildProvisioningPlan(recommendedConfig, diagnostics);
|
|
961
|
+
const recommendedBackend = diagnostics.backends.find((backend) => backend.name === recommendation.backend);
|
|
962
|
+
const recommendedModelReadiness = getModelReadiness(recommendedBackend, recommendation.model);
|
|
963
|
+
const availableBackends = diagnostics.backends
|
|
964
|
+
.filter((backend) => backend.available)
|
|
965
|
+
.map((backend) => `${backend.name} (${backend.type})`);
|
|
966
|
+
const lines = [
|
|
967
|
+
"Voice doctor:",
|
|
968
|
+
` python3: ${diagnostics.hasPython ? "OK" : "missing"}`,
|
|
969
|
+
` sox/rec: ${diagnostics.hasSox ? "OK" : "missing"}`,
|
|
970
|
+
` brew: ${diagnostics.hasHomebrew ? "OK" : "missing"}`,
|
|
971
|
+
` deepgram key: ${diagnostics.hasDeepgramKey ? "configured" : "missing"}`,
|
|
972
|
+
` available backends: ${availableBackends.length > 0 ? availableBackends.join(", ") : "none"}`,
|
|
973
|
+
"",
|
|
974
|
+
`Current config: ${config.mode}/${config.backend}/${config.model}`,
|
|
975
|
+
`Current model status: ${currentModelReadiness}`,
|
|
976
|
+
"Repair current setup:",
|
|
977
|
+
...(currentPlan.commands.length > 0 ? currentPlan.commands.map((command) => ` - ${command}`) : [" - no install commands required"]),
|
|
978
|
+
...(currentPlan.manualSteps.length > 0 ? currentPlan.manualSteps.map((step) => ` - ${step}`) : []),
|
|
979
|
+
"",
|
|
980
|
+
`Recommended alternative: ${recommendation.mode}/${recommendation.backend}/${recommendation.model}`,
|
|
981
|
+
`Recommended model status: ${recommendedModelReadiness}`,
|
|
982
|
+
`Why: ${recommendation.reason}`,
|
|
983
|
+
...(recommendation.fixableIssues.length > 0 ? ["Fixable issues:", ...recommendation.fixableIssues.map((issue) => ` - ${issue}`)] : ["Fixable issues:", " - none"]),
|
|
984
|
+
"Suggested commands for the recommendation:",
|
|
985
|
+
...(recommendedPlan.commands.length > 0 ? recommendedPlan.commands.map((command) => ` - ${command}`) : [" - none"]),
|
|
986
|
+
...(recommendedPlan.manualSteps.length > 0 ? recommendedPlan.manualSteps.map((step) => ` - ${step}`) : []),
|
|
987
|
+
];
|
|
988
|
+
cmdCtx.ui.notify(lines.join("\n"), !currentPlan.ready || recommendation.fixableIssues.length > 0 ? "warning" : "info");
|
|
989
|
+
return;
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
// Default: toggle
|
|
993
|
+
config.enabled = !config.enabled;
|
|
994
|
+
if (!config.enabled) voiceCleanup();
|
|
995
|
+
else setupHoldToTalk();
|
|
996
|
+
updateVoiceStatus();
|
|
997
|
+
cmdCtx.ui.notify(`Voice ${config.enabled ? "enabled" : "disabled"}.`, "info");
|
|
998
|
+
},
|
|
999
|
+
});
|
|
1000
|
+
|
|
1001
|
+
// ─── /btw commands ───────────────────────────────────────────────────────
|
|
1002
|
+
|
|
1003
|
+
pi.registerCommand("btw", {
|
|
1004
|
+
description: "Side conversation: /btw <message> | /btw:new | /btw:clear | /btw:inject | /btw:summarize",
|
|
1005
|
+
handler: async (args, cmdCtx) => {
|
|
1006
|
+
ctx = cmdCtx;
|
|
1007
|
+
const message = (args || "").trim();
|
|
1008
|
+
|
|
1009
|
+
if (!message) {
|
|
1010
|
+
cmdCtx.ui.notify(
|
|
1011
|
+
"Usage: /btw <message>\n" +
|
|
1012
|
+
" /btw:new [msg] — new thread\n" +
|
|
1013
|
+
" /btw:clear — dismiss widget\n" +
|
|
1014
|
+
" /btw:inject — push thread to agent\n" +
|
|
1015
|
+
" /btw:summarize — summarize then inject",
|
|
1016
|
+
"info",
|
|
1017
|
+
);
|
|
1018
|
+
return;
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
await handleBtw(message);
|
|
1022
|
+
},
|
|
1023
|
+
});
|
|
1024
|
+
|
|
1025
|
+
pi.registerCommand("btw:new", {
|
|
1026
|
+
description: "Start a fresh BTW thread",
|
|
1027
|
+
handler: async (args, cmdCtx) => {
|
|
1028
|
+
ctx = cmdCtx;
|
|
1029
|
+
btwThread = [];
|
|
1030
|
+
pi.appendEntry("btw-reset", { timestamp: Date.now() });
|
|
1031
|
+
btwWidgetVisible = false;
|
|
1032
|
+
cmdCtx.ui.setWidget("btw", undefined);
|
|
1033
|
+
cmdCtx.ui.notify("BTW thread cleared.", "info");
|
|
1034
|
+
|
|
1035
|
+
const message = (args || "").trim();
|
|
1036
|
+
if (message) {
|
|
1037
|
+
await handleBtw(message);
|
|
1038
|
+
}
|
|
1039
|
+
},
|
|
1040
|
+
});
|
|
1041
|
+
|
|
1042
|
+
pi.registerCommand("btw:clear", {
|
|
1043
|
+
description: "Dismiss BTW widget and clear thread",
|
|
1044
|
+
handler: async (_args, cmdCtx) => {
|
|
1045
|
+
ctx = cmdCtx;
|
|
1046
|
+
btwThread = [];
|
|
1047
|
+
btwWidgetVisible = false;
|
|
1048
|
+
cmdCtx.ui.setWidget("btw", undefined);
|
|
1049
|
+
pi.appendEntry("btw-reset", { timestamp: Date.now() });
|
|
1050
|
+
cmdCtx.ui.notify("BTW cleared.", "info");
|
|
1051
|
+
},
|
|
1052
|
+
});
|
|
1053
|
+
|
|
1054
|
+
pi.registerCommand("btw:inject", {
|
|
1055
|
+
description: "Push BTW thread into main agent context",
|
|
1056
|
+
handler: async (args, cmdCtx) => {
|
|
1057
|
+
ctx = cmdCtx;
|
|
1058
|
+
|
|
1059
|
+
if (btwThread.length === 0) {
|
|
1060
|
+
cmdCtx.ui.notify("No BTW thread to inject.", "warning");
|
|
1061
|
+
return;
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
const instructions = (args || "").trim();
|
|
1065
|
+
let content = "Here is a side conversation I had (via /btw). Please take it into account:\n\n";
|
|
1066
|
+
|
|
1067
|
+
for (const ex of btwThread) {
|
|
1068
|
+
content += `Me: ${ex.question}\nAssistant: ${ex.answer}\n\n`;
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
if (instructions) {
|
|
1072
|
+
content += `\nAdditional instructions: ${instructions}`;
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
pi.sendUserMessage(content, { deliverAs: "followUp" });
|
|
1076
|
+
|
|
1077
|
+
// Clear after injection
|
|
1078
|
+
btwThread = [];
|
|
1079
|
+
btwWidgetVisible = false;
|
|
1080
|
+
cmdCtx.ui.setWidget("btw", undefined);
|
|
1081
|
+
pi.appendEntry("btw-reset", { timestamp: Date.now(), reason: "injected" });
|
|
1082
|
+
cmdCtx.ui.notify("BTW thread injected into main context.", "info");
|
|
1083
|
+
},
|
|
1084
|
+
});
|
|
1085
|
+
|
|
1086
|
+
pi.registerCommand("btw:summarize", {
|
|
1087
|
+
description: "Summarize BTW thread then inject into main agent",
|
|
1088
|
+
handler: async (args, cmdCtx) => {
|
|
1089
|
+
ctx = cmdCtx;
|
|
1090
|
+
|
|
1091
|
+
if (btwThread.length === 0) {
|
|
1092
|
+
cmdCtx.ui.notify("No BTW thread to summarize.", "warning");
|
|
1093
|
+
return;
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
const instructions = (args || "").trim();
|
|
1097
|
+
let threadText = "";
|
|
1098
|
+
for (const ex of btwThread) {
|
|
1099
|
+
threadText += `Q: ${ex.question}\nA: ${ex.answer}\n\n`;
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
// Ask the model to summarize
|
|
1103
|
+
const model = ctx.model;
|
|
1104
|
+
if (!model) {
|
|
1105
|
+
cmdCtx.ui.notify("No model available for summarization.", "error");
|
|
1106
|
+
return;
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
cmdCtx.ui.notify("Summarizing BTW thread...", "info");
|
|
1110
|
+
|
|
1111
|
+
try {
|
|
1112
|
+
let summary = "";
|
|
1113
|
+
const summaryPrompt = instructions
|
|
1114
|
+
? `Summarize this conversation thread concisely. Focus on: ${instructions}\n\n${threadText}`
|
|
1115
|
+
: `Summarize this conversation thread concisely, capturing key decisions and information:\n\n${threadText}`;
|
|
1116
|
+
|
|
1117
|
+
const eventStream = streamSimple(model, {
|
|
1118
|
+
systemPrompt: "You are a concise summarizer. Output only the summary, no preamble.",
|
|
1119
|
+
messages: [{ role: "user" as const, content: summaryPrompt, timestamp: Date.now() }],
|
|
1120
|
+
});
|
|
1121
|
+
|
|
1122
|
+
for await (const event of eventStream) {
|
|
1123
|
+
if (event.type === "text_delta") summary += event.delta;
|
|
1124
|
+
else if (event.type === "error") break;
|
|
1125
|
+
else if (event.type === "done") break;
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
summary = summary.trim();
|
|
1129
|
+
if (!summary) {
|
|
1130
|
+
cmdCtx.ui.notify("Empty summary — injecting full thread instead.", "warning");
|
|
1131
|
+
summary = threadText;
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
pi.sendUserMessage(
|
|
1135
|
+
`Here's a summary of a side conversation I had (via /btw):\n\n${summary}`,
|
|
1136
|
+
{ deliverAs: "followUp" },
|
|
1137
|
+
);
|
|
1138
|
+
|
|
1139
|
+
btwThread = [];
|
|
1140
|
+
btwWidgetVisible = false;
|
|
1141
|
+
cmdCtx.ui.setWidget("btw", undefined);
|
|
1142
|
+
pi.appendEntry("btw-reset", { timestamp: Date.now(), reason: "summarized" });
|
|
1143
|
+
cmdCtx.ui.notify("BTW summary injected.", "info");
|
|
1144
|
+
} catch (err: any) {
|
|
1145
|
+
cmdCtx.ui.notify(`Summarization failed: ${err.message}`, "error");
|
|
1146
|
+
}
|
|
1147
|
+
},
|
|
1148
|
+
});
|
|
1149
|
+
}
|