pi-voice 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/out/main/index.js CHANGED
@@ -2,16 +2,39 @@ import { systemPreferences, app, BrowserWindow, ipcMain } from "electron";
2
2
  import { fileURLToPath } from "node:url";
3
3
  import { uIOhook, UiohookKey } from "uiohook-napi";
4
4
  import { join } from "node:path";
5
+ import { homedir } from "node:os";
6
+ import pino from "pino";
5
7
  import { readFileSync, existsSync, mkdirSync, createWriteStream, unlinkSync, writeFileSync } from "node:fs";
8
+ import { z } from "zod";
6
9
  import { GoogleGenAI } from "@google/genai";
7
10
  import OpenAI, { toFile } from "openai";
8
11
  import { WhisperFullParams, WhisperSamplingStrategy, Whisper } from "@napi-rs/whisper";
9
- import { homedir } from "node:os";
10
12
  import { Readable } from "node:stream";
11
13
  import { finished } from "node:stream/promises";
12
14
  import { spawn } from "node:child_process";
13
15
  import { createAgentSession, SessionManager } from "@mariozechner/pi-coding-agent";
14
16
  import { createServer } from "node:net";
17
+ function resolveLogPath() {
18
+ const envPath = process.env["PI_VOICE_LOG_PATH"];
19
+ if (envPath) return envPath;
20
+ const configHome = process.env["XDG_CONFIG_HOME"] || join(homedir(), ".config");
21
+ return join(configHome, "pi-voice", "daemon.log");
22
+ }
23
+ const logPath = resolveLogPath();
24
+ const logger = pino(
25
+ {
26
+ level: "debug"
27
+ },
28
+ pino.multistream([
29
+ // Console output (human-readable via stdout)
30
+ { level: "debug", stream: process.stdout },
31
+ // File output (JSON, auto-creates parent directories)
32
+ {
33
+ level: "debug",
34
+ stream: pino.destination({ dest: logPath, mkdir: true, sync: false })
35
+ }
36
+ ])
37
+ );
15
38
  function getReleaseCodes(binding) {
16
39
  const codes = [binding.keycode];
17
40
  if (binding.ctrl) {
@@ -67,14 +90,14 @@ class FnHook {
67
90
  });
68
91
  uIOhook.start();
69
92
  this.started = true;
70
- console.log(`[FnHook] Started monitoring ${this.displayName}`);
93
+ logger.info({ key: this.displayName }, "Started monitoring key");
71
94
  }
72
95
  stop() {
73
96
  if (!this.started) return;
74
97
  uIOhook.stop();
75
98
  this.started = false;
76
99
  this.active = false;
77
- console.log("[FnHook] Stopped monitoring");
100
+ logger.info("Stopped monitoring key");
78
101
  }
79
102
  get isFnDown() {
80
103
  return this.active;
@@ -216,50 +239,68 @@ function formatKeyDisplay(binding) {
216
239
  }
217
240
  const DEFAULT_KEY_STRING = "meta+shift+i";
218
241
  const DEFAULT_PROVIDER = "local";
219
- const VALID_PROVIDERS = ["local", "gemini", "openai"];
220
242
  function defaultConfig() {
243
+ const binding = parseKeyBinding(DEFAULT_KEY_STRING);
221
244
  return {
222
- key: parseKeyBinding(DEFAULT_KEY_STRING),
223
- keyDisplay: formatKeyDisplay(parseKeyBinding(DEFAULT_KEY_STRING)),
245
+ key: binding,
246
+ keyDisplay: formatKeyDisplay(binding),
224
247
  provider: DEFAULT_PROVIDER
225
248
  };
226
249
  }
250
+ const configFileSchema = z.object({
251
+ key: z.string().refine(
252
+ (v) => {
253
+ try {
254
+ parseKeyBinding(v);
255
+ return true;
256
+ } catch {
257
+ return false;
258
+ }
259
+ },
260
+ { message: "Invalid key binding" }
261
+ ).optional().default(DEFAULT_KEY_STRING),
262
+ provider: z.enum(["local", "gemini", "openai"]).optional().default(DEFAULT_PROVIDER)
263
+ });
264
+ class ConfigError extends Error {
265
+ constructor(configPath, details) {
266
+ super(`Invalid config at ${configPath}:
267
+ ${details}`);
268
+ this.configPath = configPath;
269
+ this.details = details;
270
+ this.name = "ConfigError";
271
+ }
272
+ }
227
273
  function loadConfig(cwd) {
228
274
  const configPath = join(cwd, ".pi", "pi-voice.json");
275
+ let raw;
229
276
  try {
230
- const raw = readFileSync(configPath, "utf-8");
231
- const json = JSON.parse(raw);
232
- let binding;
233
- let display;
234
- if (typeof json.key === "string") {
235
- binding = parseKeyBinding(json.key);
236
- display = formatKeyDisplay(binding);
237
- } else {
238
- if (json.key !== void 0) {
239
- console.warn(`[Config] "key" must be a string in ${configPath}, using default`);
240
- }
241
- binding = parseKeyBinding(DEFAULT_KEY_STRING);
242
- display = formatKeyDisplay(binding);
243
- }
244
- let provider = DEFAULT_PROVIDER;
245
- if (typeof json.provider === "string") {
246
- const p = json.provider.toLowerCase();
247
- if (VALID_PROVIDERS.includes(p)) {
248
- provider = p;
249
- } else {
250
- console.warn(`[Config] Unknown provider "${json.provider}" in ${configPath}, using default "${DEFAULT_PROVIDER}"`);
251
- }
252
- }
253
- console.log(`[Config] Loaded config: key=${display}, provider=${provider} from ${configPath}`);
254
- return { key: binding, keyDisplay: display, provider };
277
+ raw = readFileSync(configPath, "utf-8");
255
278
  } catch (err) {
256
279
  if (err.code === "ENOENT") {
257
- console.log(`[Config] No config file found at ${configPath}, using default (${DEFAULT_KEY_STRING})`);
258
- } else {
259
- console.warn(`[Config] Failed to load ${configPath}, using default:`, err);
280
+ logger.info({ configPath }, "No config file found, using defaults");
281
+ return defaultConfig();
260
282
  }
261
- return defaultConfig();
283
+ throw new ConfigError(configPath, `Failed to read file: ${err.message}`);
262
284
  }
285
+ let json;
286
+ try {
287
+ json = JSON.parse(raw);
288
+ } catch {
289
+ throw new ConfigError(configPath, "Invalid JSON syntax");
290
+ }
291
+ const result = configFileSchema.safeParse(json);
292
+ if (!result.success) {
293
+ const details = result.error.issues.map((issue) => {
294
+ const path = issue.path.length > 0 ? `"${issue.path.join(".")}"` : "(root)";
295
+ return ` - ${path}: ${issue.message}`;
296
+ }).join("\n");
297
+ throw new ConfigError(configPath, details);
298
+ }
299
+ const parsed = result.data;
300
+ const binding = parseKeyBinding(parsed.key);
301
+ const display = formatKeyDisplay(binding);
302
+ logger.info({ key: display, provider: parsed.provider, configPath }, "Loaded config");
303
+ return { key: binding, keyDisplay: display, provider: parsed.provider };
263
304
  }
264
305
  const DEFAULT_MODEL = "medium-q5_0";
265
306
  const HF_BASE_URL = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
@@ -271,7 +312,7 @@ function modelFileName(model) {
271
312
  }
272
313
  async function downloadModel(model, destPath) {
273
314
  const url = `${HF_BASE_URL}/${modelFileName(model)}`;
274
- console.log(`[Whisper] Downloading model "${model}" from ${url} ...`);
315
+ logger.info({ model, url }, "Downloading Whisper model");
275
316
  const response = await fetch(url, { method: "GET", redirect: "follow" });
276
317
  if (!response.ok) {
277
318
  throw new Error(
@@ -323,7 +364,7 @@ async function downloadModel(model, destPath) {
323
364
  process.stderr.write("\n");
324
365
  const { renameSync } = await import("node:fs");
325
366
  renameSync(tmpPath, destPath);
326
- console.log(`[Whisper] Model saved to ${destPath}`);
367
+ logger.info({ destPath }, "Whisper model saved");
327
368
  } catch (err) {
328
369
  try {
329
370
  unlinkSync(tmpPath);
@@ -380,9 +421,9 @@ async function getWhisperInstance() {
380
421
  if (whisperInitPromise) return whisperInitPromise;
381
422
  whisperInitPromise = (async () => {
382
423
  const modelPath = await resolveModelPath();
383
- console.log("[STT:local] Loading Whisper model from", modelPath, "...");
424
+ logger.info({ modelPath }, "Loading Whisper model");
384
425
  whisperInstance = new Whisper(modelPath);
385
- console.log("[STT:local] Whisper model loaded");
426
+ logger.info("Whisper model loaded");
386
427
  return whisperInstance;
387
428
  })();
388
429
  return whisperInitPromise;
@@ -447,7 +488,7 @@ async function transcribe(audioData, provider = "local") {
447
488
  text = await transcribeGemini(Buffer.from(audioData));
448
489
  break;
449
490
  }
450
- console.log(`[STT:${provider}] Transcribed: "${text}"`);
491
+ logger.info({ provider, text }, "Transcribed");
451
492
  return text;
452
493
  }
453
494
  let geminiClient = null;
@@ -525,8 +566,9 @@ async function* synthesizeStreamGemini(text) {
525
566
  totalBytes += leftover.length;
526
567
  yield leftover;
527
568
  }
528
- console.log(
529
- `[TTS:gemini] Streamed ${totalBytes} bytes of PCM audio for "${text.substring(0, 50)}..."`
569
+ logger.info(
570
+ { provider: "gemini", totalBytes, text: text.substring(0, 50) },
571
+ "Streamed PCM audio"
530
572
  );
531
573
  }
532
574
  async function* synthesizeStreamOpenAI(text) {
@@ -549,8 +591,9 @@ async function* synthesizeStreamOpenAI(text) {
549
591
  yield chunk;
550
592
  offset = end;
551
593
  }
552
- console.log(
553
- `[TTS:openai] Streamed ${totalBytes} bytes of PCM audio for "${text.substring(0, 50)}..."`
594
+ logger.info(
595
+ { provider: "openai", totalBytes, text: text.substring(0, 50) },
596
+ "Streamed PCM audio"
554
597
  );
555
598
  }
556
599
  function speakLocal(text) {
@@ -571,8 +614,9 @@ function speakLocal(text) {
571
614
  });
572
615
  child.on("close", (code) => {
573
616
  if (code === 0) {
574
- console.log(
575
- `[TTS:local] Spoke "${text.substring(0, 50)}${text.length > 50 ? "..." : ""}"`
617
+ logger.info(
618
+ { provider: "local", text: text.substring(0, 50) },
619
+ "Spoke text"
576
620
  );
577
621
  resolve();
578
622
  } else {
@@ -603,13 +647,13 @@ function setSessionCwd(cwd) {
603
647
  }
604
648
  async function getOrCreateSession() {
605
649
  if (session) return session;
606
- console.log(`[PiSession] Creating new agent session (cwd: ${sessionCwd})...`);
650
+ logger.info({ cwd: sessionCwd }, "Creating new agent session");
607
651
  const result = await createAgentSession({
608
652
  cwd: sessionCwd,
609
653
  sessionManager: SessionManager.inMemory()
610
654
  });
611
655
  session = result.session;
612
- console.log("[PiSession] Session created");
656
+ logger.info("Agent session created");
613
657
  return session;
614
658
  }
615
659
  async function prompt(text, options) {
@@ -618,7 +662,7 @@ async function prompt(text, options) {
618
662
  if (event.type === "message_update" && event.assistantMessageEvent.type === "text_end") {
619
663
  const content = event.assistantMessageEvent.content.trim();
620
664
  if (content.length > 0) {
621
- console.log(`[PiSession] Response: ${content}`);
665
+ logger.info({ content }, "Agent response");
622
666
  options?.onTextEnd?.(content);
623
667
  }
624
668
  }
@@ -633,7 +677,7 @@ function dispose() {
633
677
  if (session) {
634
678
  session.dispose();
635
679
  session = null;
636
- console.log("[PiSession] Session disposed");
680
+ logger.info("Agent session disposed");
637
681
  }
638
682
  }
639
683
  const IPC = {
@@ -706,7 +750,7 @@ function startDaemonServer(handler) {
706
750
  });
707
751
  });
708
752
  server.listen(socketPath);
709
- console.log(`[DaemonIPC] Listening on ${socketPath}`);
753
+ logger.info({ socketPath }, "DaemonIPC listening");
710
754
  return socketPath;
711
755
  }
712
756
  function stopDaemonServer() {
@@ -721,7 +765,7 @@ function stopDaemonServer() {
721
765
  } catch {
722
766
  }
723
767
  }
724
- console.log("[DaemonIPC] Server stopped");
768
+ logger.info("DaemonIPC server stopped");
725
769
  }
726
770
  const workingCwd = process.env["PI_VOICE_CWD"] || process.cwd();
727
771
  let mainWindow = null;
@@ -730,7 +774,7 @@ let currentState = "idle";
730
774
  setSessionCwd(workingCwd);
731
775
  function setState(state, message) {
732
776
  currentState = state;
733
- console.log(`[Main] State: ${state}${message ? ` - ${message}` : ""}`);
777
+ logger.info({ state, message }, "State changed");
734
778
  }
735
779
  function createWindow() {
736
780
  mainWindow = new BrowserWindow({
@@ -764,7 +808,7 @@ function setupIpcHandlers(provider) {
764
808
  ipcMain.on(IPC.RECORDING_DATA, async (_event, data) => {
765
809
  if (currentState !== "recording") return;
766
810
  if (data.byteLength < 1e3) {
767
- console.log("[Main] Recording too short, ignoring");
811
+ logger.info("Recording too short, ignoring");
768
812
  setState("idle", "Recording too short");
769
813
  return;
770
814
  }
@@ -830,7 +874,7 @@ function setupIpcHandlers(provider) {
830
874
  }
831
875
  } catch (err) {
832
876
  const msg = err instanceof Error ? err.message : String(err);
833
- console.error("[Main] Pipeline error:", msg);
877
+ logger.error({ err: msg }, "Pipeline error");
834
878
  setState("error", msg);
835
879
  setTimeout(() => {
836
880
  if (currentState === "error") setState("idle");
@@ -838,7 +882,7 @@ function setupIpcHandlers(provider) {
838
882
  }
839
883
  });
840
884
  ipcMain.on(IPC.RECORDING_ERROR, (_event, error) => {
841
- console.error("[Main] Recording error:", error);
885
+ logger.error({ err: error }, "Recording error");
842
886
  setState("error", error);
843
887
  setTimeout(() => {
844
888
  if (currentState === "error") setState("idle");
@@ -856,8 +900,9 @@ function setupFnHook(config) {
856
900
  {
857
901
  onFnDown: () => {
858
902
  if (currentState !== "idle") {
859
- console.log(
860
- `[Main] ${config.keyDisplay} pressed but state is ${currentState}, ignoring`
903
+ logger.info(
904
+ { key: config.keyDisplay, state: currentState },
905
+ "Key pressed but not idle, ignoring"
861
906
  );
862
907
  return;
863
908
  }
@@ -876,7 +921,7 @@ function setupFnHook(config) {
876
921
  fnHook.start();
877
922
  } catch (err) {
878
923
  const msg = err instanceof Error ? err.message : String(err);
879
- console.error("[Main] FnHook error:", msg);
924
+ logger.error({ err: msg }, "FnHook error");
880
925
  setState("error", msg);
881
926
  }
882
927
  }
@@ -900,7 +945,7 @@ function handleDaemonCommand(command) {
900
945
  }
901
946
  }
902
947
  function gracefulShutdown() {
903
- console.log("[Main] Shutting down...");
948
+ logger.info("Shutting down...");
904
949
  fnHook?.stop();
905
950
  dispose();
906
951
  stopDaemonServer();
@@ -912,17 +957,28 @@ process.on("SIGTERM", () => {
912
957
  });
913
958
  const gotLock = app.requestSingleInstanceLock();
914
959
  if (!gotLock) {
915
- console.log("[Main] Another instance is already running. Exiting.");
960
+ logger.warn("Another instance is already running. Exiting.");
916
961
  app.quit();
917
962
  }
918
963
  app.whenReady().then(async () => {
919
- const config = loadConfig(workingCwd);
964
+ let config;
965
+ try {
966
+ config = loadConfig(workingCwd);
967
+ } catch (err) {
968
+ if (err instanceof ConfigError) {
969
+ logger.error({ err: err.message }, "Config error");
970
+ } else {
971
+ logger.error({ err: err instanceof Error ? err.message : String(err) }, "Failed to load config");
972
+ }
973
+ app.quit();
974
+ return;
975
+ }
920
976
  if (config.provider === "local") {
921
977
  try {
922
978
  await resolveModelPath();
923
979
  } catch (err) {
924
980
  const msg = err instanceof Error ? err.message : String(err);
925
- console.error("[Main] Failed to prepare Whisper model:", msg);
981
+ logger.error({ err: msg }, "Failed to prepare Whisper model");
926
982
  app.quit();
927
983
  return;
928
984
  }
@@ -932,7 +988,7 @@ app.whenReady().then(async () => {
932
988
  setupFnHook(config);
933
989
  startDaemonServer(handleDaemonCommand);
934
990
  saveRuntimeState(workingCwd);
935
- console.log(`[Main] pi-voice daemon started (cwd: ${workingCwd})`);
991
+ logger.info({ cwd: workingCwd }, "pi-voice daemon started");
936
992
  });
937
993
  app.on("window-all-closed", () => {
938
994
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-voice",
3
- "version": "0.1.1",
3
+ "version": "0.2.0",
4
4
  "description": "Voice interface for pi coding agent",
5
5
  "author": "Yuku Kotani",
6
6
  "license": "MIT",
@@ -37,7 +37,9 @@
37
37
  "@napi-rs/whisper": "^0.0.4",
38
38
  "electron": "^40.2.1",
39
39
  "openai": "^6.10.0",
40
- "uiohook-napi": "^1.5.4"
40
+ "pino": "^10.3.1",
41
+ "uiohook-napi": "^1.5.4",
42
+ "zod": "^4.3.6"
41
43
  },
42
44
  "trustedDependencies": [
43
45
  "electron",