pi-voice 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -1
- package/out/cli/cli.js +18310 -449
- package/out/main/index.js +120 -64
- package/package.json +4 -2
package/out/main/index.js
CHANGED
|
@@ -2,16 +2,39 @@ import { systemPreferences, app, BrowserWindow, ipcMain } from "electron";
|
|
|
2
2
|
import { fileURLToPath } from "node:url";
|
|
3
3
|
import { uIOhook, UiohookKey } from "uiohook-napi";
|
|
4
4
|
import { join } from "node:path";
|
|
5
|
+
import { homedir } from "node:os";
|
|
6
|
+
import pino from "pino";
|
|
5
7
|
import { readFileSync, existsSync, mkdirSync, createWriteStream, unlinkSync, writeFileSync } from "node:fs";
|
|
8
|
+
import { z } from "zod";
|
|
6
9
|
import { GoogleGenAI } from "@google/genai";
|
|
7
10
|
import OpenAI, { toFile } from "openai";
|
|
8
11
|
import { WhisperFullParams, WhisperSamplingStrategy, Whisper } from "@napi-rs/whisper";
|
|
9
|
-
import { homedir } from "node:os";
|
|
10
12
|
import { Readable } from "node:stream";
|
|
11
13
|
import { finished } from "node:stream/promises";
|
|
12
14
|
import { spawn } from "node:child_process";
|
|
13
15
|
import { createAgentSession, SessionManager } from "@mariozechner/pi-coding-agent";
|
|
14
16
|
import { createServer } from "node:net";
|
|
17
|
+
function resolveLogPath() {
|
|
18
|
+
const envPath = process.env["PI_VOICE_LOG_PATH"];
|
|
19
|
+
if (envPath) return envPath;
|
|
20
|
+
const configHome = process.env["XDG_CONFIG_HOME"] || join(homedir(), ".config");
|
|
21
|
+
return join(configHome, "pi-voice", "daemon.log");
|
|
22
|
+
}
|
|
23
|
+
const logPath = resolveLogPath();
|
|
24
|
+
const logger = pino(
|
|
25
|
+
{
|
|
26
|
+
level: "debug"
|
|
27
|
+
},
|
|
28
|
+
pino.multistream([
|
|
29
|
+
// Console output (human-readable via stdout)
|
|
30
|
+
{ level: "debug", stream: process.stdout },
|
|
31
|
+
// File output (JSON, auto-creates parent directories)
|
|
32
|
+
{
|
|
33
|
+
level: "debug",
|
|
34
|
+
stream: pino.destination({ dest: logPath, mkdir: true, sync: false })
|
|
35
|
+
}
|
|
36
|
+
])
|
|
37
|
+
);
|
|
15
38
|
function getReleaseCodes(binding) {
|
|
16
39
|
const codes = [binding.keycode];
|
|
17
40
|
if (binding.ctrl) {
|
|
@@ -67,14 +90,14 @@ class FnHook {
|
|
|
67
90
|
});
|
|
68
91
|
uIOhook.start();
|
|
69
92
|
this.started = true;
|
|
70
|
-
|
|
93
|
+
logger.info({ key: this.displayName }, "Started monitoring key");
|
|
71
94
|
}
|
|
72
95
|
stop() {
|
|
73
96
|
if (!this.started) return;
|
|
74
97
|
uIOhook.stop();
|
|
75
98
|
this.started = false;
|
|
76
99
|
this.active = false;
|
|
77
|
-
|
|
100
|
+
logger.info("Stopped monitoring key");
|
|
78
101
|
}
|
|
79
102
|
get isFnDown() {
|
|
80
103
|
return this.active;
|
|
@@ -216,50 +239,68 @@ function formatKeyDisplay(binding) {
|
|
|
216
239
|
}
|
|
217
240
|
const DEFAULT_KEY_STRING = "meta+shift+i";
|
|
218
241
|
const DEFAULT_PROVIDER = "local";
|
|
219
|
-
const VALID_PROVIDERS = ["local", "gemini", "openai"];
|
|
220
242
|
function defaultConfig() {
|
|
243
|
+
const binding = parseKeyBinding(DEFAULT_KEY_STRING);
|
|
221
244
|
return {
|
|
222
|
-
key:
|
|
223
|
-
keyDisplay: formatKeyDisplay(
|
|
245
|
+
key: binding,
|
|
246
|
+
keyDisplay: formatKeyDisplay(binding),
|
|
224
247
|
provider: DEFAULT_PROVIDER
|
|
225
248
|
};
|
|
226
249
|
}
|
|
250
|
+
const configFileSchema = z.object({
|
|
251
|
+
key: z.string().refine(
|
|
252
|
+
(v) => {
|
|
253
|
+
try {
|
|
254
|
+
parseKeyBinding(v);
|
|
255
|
+
return true;
|
|
256
|
+
} catch {
|
|
257
|
+
return false;
|
|
258
|
+
}
|
|
259
|
+
},
|
|
260
|
+
{ message: "Invalid key binding" }
|
|
261
|
+
).optional().default(DEFAULT_KEY_STRING),
|
|
262
|
+
provider: z.enum(["local", "gemini", "openai"]).optional().default(DEFAULT_PROVIDER)
|
|
263
|
+
});
|
|
264
|
+
class ConfigError extends Error {
|
|
265
|
+
constructor(configPath, details) {
|
|
266
|
+
super(`Invalid config at ${configPath}:
|
|
267
|
+
${details}`);
|
|
268
|
+
this.configPath = configPath;
|
|
269
|
+
this.details = details;
|
|
270
|
+
this.name = "ConfigError";
|
|
271
|
+
}
|
|
272
|
+
}
|
|
227
273
|
function loadConfig(cwd) {
|
|
228
274
|
const configPath = join(cwd, ".pi", "pi-voice.json");
|
|
275
|
+
let raw;
|
|
229
276
|
try {
|
|
230
|
-
|
|
231
|
-
const json = JSON.parse(raw);
|
|
232
|
-
let binding;
|
|
233
|
-
let display;
|
|
234
|
-
if (typeof json.key === "string") {
|
|
235
|
-
binding = parseKeyBinding(json.key);
|
|
236
|
-
display = formatKeyDisplay(binding);
|
|
237
|
-
} else {
|
|
238
|
-
if (json.key !== void 0) {
|
|
239
|
-
console.warn(`[Config] "key" must be a string in ${configPath}, using default`);
|
|
240
|
-
}
|
|
241
|
-
binding = parseKeyBinding(DEFAULT_KEY_STRING);
|
|
242
|
-
display = formatKeyDisplay(binding);
|
|
243
|
-
}
|
|
244
|
-
let provider = DEFAULT_PROVIDER;
|
|
245
|
-
if (typeof json.provider === "string") {
|
|
246
|
-
const p = json.provider.toLowerCase();
|
|
247
|
-
if (VALID_PROVIDERS.includes(p)) {
|
|
248
|
-
provider = p;
|
|
249
|
-
} else {
|
|
250
|
-
console.warn(`[Config] Unknown provider "${json.provider}" in ${configPath}, using default "${DEFAULT_PROVIDER}"`);
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
console.log(`[Config] Loaded config: key=${display}, provider=${provider} from ${configPath}`);
|
|
254
|
-
return { key: binding, keyDisplay: display, provider };
|
|
277
|
+
raw = readFileSync(configPath, "utf-8");
|
|
255
278
|
} catch (err) {
|
|
256
279
|
if (err.code === "ENOENT") {
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
console.warn(`[Config] Failed to load ${configPath}, using default:`, err);
|
|
280
|
+
logger.info({ configPath }, "No config file found, using defaults");
|
|
281
|
+
return defaultConfig();
|
|
260
282
|
}
|
|
261
|
-
|
|
283
|
+
throw new ConfigError(configPath, `Failed to read file: ${err.message}`);
|
|
262
284
|
}
|
|
285
|
+
let json;
|
|
286
|
+
try {
|
|
287
|
+
json = JSON.parse(raw);
|
|
288
|
+
} catch {
|
|
289
|
+
throw new ConfigError(configPath, "Invalid JSON syntax");
|
|
290
|
+
}
|
|
291
|
+
const result = configFileSchema.safeParse(json);
|
|
292
|
+
if (!result.success) {
|
|
293
|
+
const details = result.error.issues.map((issue) => {
|
|
294
|
+
const path = issue.path.length > 0 ? `"${issue.path.join(".")}"` : "(root)";
|
|
295
|
+
return ` - ${path}: ${issue.message}`;
|
|
296
|
+
}).join("\n");
|
|
297
|
+
throw new ConfigError(configPath, details);
|
|
298
|
+
}
|
|
299
|
+
const parsed = result.data;
|
|
300
|
+
const binding = parseKeyBinding(parsed.key);
|
|
301
|
+
const display = formatKeyDisplay(binding);
|
|
302
|
+
logger.info({ key: display, provider: parsed.provider, configPath }, "Loaded config");
|
|
303
|
+
return { key: binding, keyDisplay: display, provider: parsed.provider };
|
|
263
304
|
}
|
|
264
305
|
const DEFAULT_MODEL = "medium-q5_0";
|
|
265
306
|
const HF_BASE_URL = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
|
|
@@ -271,7 +312,7 @@ function modelFileName(model) {
|
|
|
271
312
|
}
|
|
272
313
|
async function downloadModel(model, destPath) {
|
|
273
314
|
const url = `${HF_BASE_URL}/${modelFileName(model)}`;
|
|
274
|
-
|
|
315
|
+
logger.info({ model, url }, "Downloading Whisper model");
|
|
275
316
|
const response = await fetch(url, { method: "GET", redirect: "follow" });
|
|
276
317
|
if (!response.ok) {
|
|
277
318
|
throw new Error(
|
|
@@ -323,7 +364,7 @@ async function downloadModel(model, destPath) {
|
|
|
323
364
|
process.stderr.write("\n");
|
|
324
365
|
const { renameSync } = await import("node:fs");
|
|
325
366
|
renameSync(tmpPath, destPath);
|
|
326
|
-
|
|
367
|
+
logger.info({ destPath }, "Whisper model saved");
|
|
327
368
|
} catch (err) {
|
|
328
369
|
try {
|
|
329
370
|
unlinkSync(tmpPath);
|
|
@@ -380,9 +421,9 @@ async function getWhisperInstance() {
|
|
|
380
421
|
if (whisperInitPromise) return whisperInitPromise;
|
|
381
422
|
whisperInitPromise = (async () => {
|
|
382
423
|
const modelPath = await resolveModelPath();
|
|
383
|
-
|
|
424
|
+
logger.info({ modelPath }, "Loading Whisper model");
|
|
384
425
|
whisperInstance = new Whisper(modelPath);
|
|
385
|
-
|
|
426
|
+
logger.info("Whisper model loaded");
|
|
386
427
|
return whisperInstance;
|
|
387
428
|
})();
|
|
388
429
|
return whisperInitPromise;
|
|
@@ -447,7 +488,7 @@ async function transcribe(audioData, provider = "local") {
|
|
|
447
488
|
text = await transcribeGemini(Buffer.from(audioData));
|
|
448
489
|
break;
|
|
449
490
|
}
|
|
450
|
-
|
|
491
|
+
logger.info({ provider, text }, "Transcribed");
|
|
451
492
|
return text;
|
|
452
493
|
}
|
|
453
494
|
let geminiClient = null;
|
|
@@ -525,8 +566,9 @@ async function* synthesizeStreamGemini(text) {
|
|
|
525
566
|
totalBytes += leftover.length;
|
|
526
567
|
yield leftover;
|
|
527
568
|
}
|
|
528
|
-
|
|
529
|
-
|
|
569
|
+
logger.info(
|
|
570
|
+
{ provider: "gemini", totalBytes, text: text.substring(0, 50) },
|
|
571
|
+
"Streamed PCM audio"
|
|
530
572
|
);
|
|
531
573
|
}
|
|
532
574
|
async function* synthesizeStreamOpenAI(text) {
|
|
@@ -549,8 +591,9 @@ async function* synthesizeStreamOpenAI(text) {
|
|
|
549
591
|
yield chunk;
|
|
550
592
|
offset = end;
|
|
551
593
|
}
|
|
552
|
-
|
|
553
|
-
|
|
594
|
+
logger.info(
|
|
595
|
+
{ provider: "openai", totalBytes, text: text.substring(0, 50) },
|
|
596
|
+
"Streamed PCM audio"
|
|
554
597
|
);
|
|
555
598
|
}
|
|
556
599
|
function speakLocal(text) {
|
|
@@ -571,8 +614,9 @@ function speakLocal(text) {
|
|
|
571
614
|
});
|
|
572
615
|
child.on("close", (code) => {
|
|
573
616
|
if (code === 0) {
|
|
574
|
-
|
|
575
|
-
|
|
617
|
+
logger.info(
|
|
618
|
+
{ provider: "local", text: text.substring(0, 50) },
|
|
619
|
+
"Spoke text"
|
|
576
620
|
);
|
|
577
621
|
resolve();
|
|
578
622
|
} else {
|
|
@@ -603,13 +647,13 @@ function setSessionCwd(cwd) {
|
|
|
603
647
|
}
|
|
604
648
|
async function getOrCreateSession() {
|
|
605
649
|
if (session) return session;
|
|
606
|
-
|
|
650
|
+
logger.info({ cwd: sessionCwd }, "Creating new agent session");
|
|
607
651
|
const result = await createAgentSession({
|
|
608
652
|
cwd: sessionCwd,
|
|
609
653
|
sessionManager: SessionManager.inMemory()
|
|
610
654
|
});
|
|
611
655
|
session = result.session;
|
|
612
|
-
|
|
656
|
+
logger.info("Agent session created");
|
|
613
657
|
return session;
|
|
614
658
|
}
|
|
615
659
|
async function prompt(text, options) {
|
|
@@ -618,7 +662,7 @@ async function prompt(text, options) {
|
|
|
618
662
|
if (event.type === "message_update" && event.assistantMessageEvent.type === "text_end") {
|
|
619
663
|
const content = event.assistantMessageEvent.content.trim();
|
|
620
664
|
if (content.length > 0) {
|
|
621
|
-
|
|
665
|
+
logger.info({ content }, "Agent response");
|
|
622
666
|
options?.onTextEnd?.(content);
|
|
623
667
|
}
|
|
624
668
|
}
|
|
@@ -633,7 +677,7 @@ function dispose() {
|
|
|
633
677
|
if (session) {
|
|
634
678
|
session.dispose();
|
|
635
679
|
session = null;
|
|
636
|
-
|
|
680
|
+
logger.info("Agent session disposed");
|
|
637
681
|
}
|
|
638
682
|
}
|
|
639
683
|
const IPC = {
|
|
@@ -706,7 +750,7 @@ function startDaemonServer(handler) {
|
|
|
706
750
|
});
|
|
707
751
|
});
|
|
708
752
|
server.listen(socketPath);
|
|
709
|
-
|
|
753
|
+
logger.info({ socketPath }, "DaemonIPC listening");
|
|
710
754
|
return socketPath;
|
|
711
755
|
}
|
|
712
756
|
function stopDaemonServer() {
|
|
@@ -721,7 +765,7 @@ function stopDaemonServer() {
|
|
|
721
765
|
} catch {
|
|
722
766
|
}
|
|
723
767
|
}
|
|
724
|
-
|
|
768
|
+
logger.info("DaemonIPC server stopped");
|
|
725
769
|
}
|
|
726
770
|
const workingCwd = process.env["PI_VOICE_CWD"] || process.cwd();
|
|
727
771
|
let mainWindow = null;
|
|
@@ -730,7 +774,7 @@ let currentState = "idle";
|
|
|
730
774
|
setSessionCwd(workingCwd);
|
|
731
775
|
function setState(state, message) {
|
|
732
776
|
currentState = state;
|
|
733
|
-
|
|
777
|
+
logger.info({ state, message }, "State changed");
|
|
734
778
|
}
|
|
735
779
|
function createWindow() {
|
|
736
780
|
mainWindow = new BrowserWindow({
|
|
@@ -764,7 +808,7 @@ function setupIpcHandlers(provider) {
|
|
|
764
808
|
ipcMain.on(IPC.RECORDING_DATA, async (_event, data) => {
|
|
765
809
|
if (currentState !== "recording") return;
|
|
766
810
|
if (data.byteLength < 1e3) {
|
|
767
|
-
|
|
811
|
+
logger.info("Recording too short, ignoring");
|
|
768
812
|
setState("idle", "Recording too short");
|
|
769
813
|
return;
|
|
770
814
|
}
|
|
@@ -830,7 +874,7 @@ function setupIpcHandlers(provider) {
|
|
|
830
874
|
}
|
|
831
875
|
} catch (err) {
|
|
832
876
|
const msg = err instanceof Error ? err.message : String(err);
|
|
833
|
-
|
|
877
|
+
logger.error({ err: msg }, "Pipeline error");
|
|
834
878
|
setState("error", msg);
|
|
835
879
|
setTimeout(() => {
|
|
836
880
|
if (currentState === "error") setState("idle");
|
|
@@ -838,7 +882,7 @@ function setupIpcHandlers(provider) {
|
|
|
838
882
|
}
|
|
839
883
|
});
|
|
840
884
|
ipcMain.on(IPC.RECORDING_ERROR, (_event, error) => {
|
|
841
|
-
|
|
885
|
+
logger.error({ err: error }, "Recording error");
|
|
842
886
|
setState("error", error);
|
|
843
887
|
setTimeout(() => {
|
|
844
888
|
if (currentState === "error") setState("idle");
|
|
@@ -856,8 +900,9 @@ function setupFnHook(config) {
|
|
|
856
900
|
{
|
|
857
901
|
onFnDown: () => {
|
|
858
902
|
if (currentState !== "idle") {
|
|
859
|
-
|
|
860
|
-
|
|
903
|
+
logger.info(
|
|
904
|
+
{ key: config.keyDisplay, state: currentState },
|
|
905
|
+
"Key pressed but not idle, ignoring"
|
|
861
906
|
);
|
|
862
907
|
return;
|
|
863
908
|
}
|
|
@@ -876,7 +921,7 @@ function setupFnHook(config) {
|
|
|
876
921
|
fnHook.start();
|
|
877
922
|
} catch (err) {
|
|
878
923
|
const msg = err instanceof Error ? err.message : String(err);
|
|
879
|
-
|
|
924
|
+
logger.error({ err: msg }, "FnHook error");
|
|
880
925
|
setState("error", msg);
|
|
881
926
|
}
|
|
882
927
|
}
|
|
@@ -900,7 +945,7 @@ function handleDaemonCommand(command) {
|
|
|
900
945
|
}
|
|
901
946
|
}
|
|
902
947
|
function gracefulShutdown() {
|
|
903
|
-
|
|
948
|
+
logger.info("Shutting down...");
|
|
904
949
|
fnHook?.stop();
|
|
905
950
|
dispose();
|
|
906
951
|
stopDaemonServer();
|
|
@@ -912,17 +957,28 @@ process.on("SIGTERM", () => {
|
|
|
912
957
|
});
|
|
913
958
|
const gotLock = app.requestSingleInstanceLock();
|
|
914
959
|
if (!gotLock) {
|
|
915
|
-
|
|
960
|
+
logger.warn("Another instance is already running. Exiting.");
|
|
916
961
|
app.quit();
|
|
917
962
|
}
|
|
918
963
|
app.whenReady().then(async () => {
|
|
919
|
-
|
|
964
|
+
let config;
|
|
965
|
+
try {
|
|
966
|
+
config = loadConfig(workingCwd);
|
|
967
|
+
} catch (err) {
|
|
968
|
+
if (err instanceof ConfigError) {
|
|
969
|
+
logger.error({ err: err.message }, "Config error");
|
|
970
|
+
} else {
|
|
971
|
+
logger.error({ err: err instanceof Error ? err.message : String(err) }, "Failed to load config");
|
|
972
|
+
}
|
|
973
|
+
app.quit();
|
|
974
|
+
return;
|
|
975
|
+
}
|
|
920
976
|
if (config.provider === "local") {
|
|
921
977
|
try {
|
|
922
978
|
await resolveModelPath();
|
|
923
979
|
} catch (err) {
|
|
924
980
|
const msg = err instanceof Error ? err.message : String(err);
|
|
925
|
-
|
|
981
|
+
logger.error({ err: msg }, "Failed to prepare Whisper model");
|
|
926
982
|
app.quit();
|
|
927
983
|
return;
|
|
928
984
|
}
|
|
@@ -932,7 +988,7 @@ app.whenReady().then(async () => {
|
|
|
932
988
|
setupFnHook(config);
|
|
933
989
|
startDaemonServer(handleDaemonCommand);
|
|
934
990
|
saveRuntimeState(workingCwd);
|
|
935
|
-
|
|
991
|
+
logger.info({ cwd: workingCwd }, "pi-voice daemon started");
|
|
936
992
|
});
|
|
937
993
|
app.on("window-all-closed", () => {
|
|
938
994
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-voice",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Voice interface for pi coding agent",
|
|
5
5
|
"author": "Yuku Kotani",
|
|
6
6
|
"license": "MIT",
|
|
@@ -37,7 +37,9 @@
|
|
|
37
37
|
"@napi-rs/whisper": "^0.0.4",
|
|
38
38
|
"electron": "^40.2.1",
|
|
39
39
|
"openai": "^6.10.0",
|
|
40
|
-
"
|
|
40
|
+
"pino": "^10.3.1",
|
|
41
|
+
"uiohook-napi": "^1.5.4",
|
|
42
|
+
"zod": "^4.3.6"
|
|
41
43
|
},
|
|
42
44
|
"trustedDependencies": [
|
|
43
45
|
"electron",
|