pi-voice 0.1.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/out/main/index.js CHANGED
@@ -2,16 +2,39 @@ import { systemPreferences, app, BrowserWindow, ipcMain } from "electron";
2
2
  import { fileURLToPath } from "node:url";
3
3
  import { uIOhook, UiohookKey } from "uiohook-napi";
4
4
  import { join } from "node:path";
5
+ import { homedir } from "node:os";
6
+ import pino from "pino";
5
7
  import { readFileSync, existsSync, mkdirSync, createWriteStream, unlinkSync, writeFileSync } from "node:fs";
6
- import { GoogleGenAI } from "@google/genai";
8
+ import { z } from "zod";
7
9
  import OpenAI, { toFile } from "openai";
8
10
  import { WhisperFullParams, WhisperSamplingStrategy, Whisper } from "@napi-rs/whisper";
9
- import { homedir } from "node:os";
11
+ import { GoogleGenAI } from "@google/genai";
10
12
  import { Readable } from "node:stream";
11
13
  import { finished } from "node:stream/promises";
12
14
  import { spawn } from "node:child_process";
13
15
  import { createAgentSession, SessionManager } from "@mariozechner/pi-coding-agent";
14
16
  import { createServer } from "node:net";
17
+ function resolveLogPath() {
18
+ const envPath = process.env["PI_VOICE_LOG_PATH"];
19
+ if (envPath) return envPath;
20
+ const configHome = process.env["XDG_CONFIG_HOME"] || join(homedir(), ".config");
21
+ return join(configHome, "pi-voice", "daemon.log");
22
+ }
23
+ const logPath = resolveLogPath();
24
+ const logger = pino(
25
+ {
26
+ level: "debug"
27
+ },
28
+ pino.multistream([
29
+ // Console output (human-readable via stdout)
30
+ { level: "debug", stream: process.stdout },
31
+ // File output (JSON, auto-creates parent directories)
32
+ {
33
+ level: "debug",
34
+ stream: pino.destination({ dest: logPath, mkdir: true, sync: false })
35
+ }
36
+ ])
37
+ );
15
38
  function getReleaseCodes(binding) {
16
39
  const codes = [binding.keycode];
17
40
  if (binding.ctrl) {
@@ -67,14 +90,14 @@ class FnHook {
67
90
  });
68
91
  uIOhook.start();
69
92
  this.started = true;
70
- console.log(`[FnHook] Started monitoring ${this.displayName}`);
93
+ logger.info({ key: this.displayName }, "Started monitoring key");
71
94
  }
72
95
  stop() {
73
96
  if (!this.started) return;
74
97
  uIOhook.stop();
75
98
  this.started = false;
76
99
  this.active = false;
77
- console.log("[FnHook] Stopped monitoring");
100
+ logger.info("Stopped monitoring key");
78
101
  }
79
102
  get isFnDown() {
80
103
  return this.active;
@@ -216,50 +239,88 @@ function formatKeyDisplay(binding) {
216
239
  }
217
240
  const DEFAULT_KEY_STRING = "meta+shift+i";
218
241
  const DEFAULT_PROVIDER = "local";
219
- const VALID_PROVIDERS = ["local", "gemini", "openai"];
220
242
  function defaultConfig() {
243
+ const binding = parseKeyBinding(DEFAULT_KEY_STRING);
221
244
  return {
222
- key: parseKeyBinding(DEFAULT_KEY_STRING),
223
- keyDisplay: formatKeyDisplay(parseKeyBinding(DEFAULT_KEY_STRING)),
245
+ key: binding,
246
+ keyDisplay: formatKeyDisplay(binding),
224
247
  provider: DEFAULT_PROVIDER
225
248
  };
226
249
  }
250
+ const configFileSchema = z.object({
251
+ key: z.string().refine(
252
+ (v) => {
253
+ try {
254
+ parseKeyBinding(v);
255
+ return true;
256
+ } catch {
257
+ return false;
258
+ }
259
+ },
260
+ { message: "Invalid key binding" }
261
+ ).optional().default(DEFAULT_KEY_STRING),
262
+ provider: z.enum(["local", "gemini", "openai"]).optional().default(DEFAULT_PROVIDER)
263
+ });
264
+ class ConfigError extends Error {
265
+ constructor(configPath, details) {
266
+ super(`Invalid config at ${configPath}:
267
+ ${details}`);
268
+ this.configPath = configPath;
269
+ this.details = details;
270
+ this.name = "ConfigError";
271
+ }
272
+ }
227
273
  function loadConfig(cwd) {
228
274
  const configPath = join(cwd, ".pi", "pi-voice.json");
275
+ let raw;
229
276
  try {
230
- const raw = readFileSync(configPath, "utf-8");
231
- const json = JSON.parse(raw);
232
- let binding;
233
- let display;
234
- if (typeof json.key === "string") {
235
- binding = parseKeyBinding(json.key);
236
- display = formatKeyDisplay(binding);
237
- } else {
238
- if (json.key !== void 0) {
239
- console.warn(`[Config] "key" must be a string in ${configPath}, using default`);
240
- }
241
- binding = parseKeyBinding(DEFAULT_KEY_STRING);
242
- display = formatKeyDisplay(binding);
243
- }
244
- let provider = DEFAULT_PROVIDER;
245
- if (typeof json.provider === "string") {
246
- const p = json.provider.toLowerCase();
247
- if (VALID_PROVIDERS.includes(p)) {
248
- provider = p;
249
- } else {
250
- console.warn(`[Config] Unknown provider "${json.provider}" in ${configPath}, using default "${DEFAULT_PROVIDER}"`);
251
- }
252
- }
253
- console.log(`[Config] Loaded config: key=${display}, provider=${provider} from ${configPath}`);
254
- return { key: binding, keyDisplay: display, provider };
277
+ raw = readFileSync(configPath, "utf-8");
255
278
  } catch (err) {
256
279
  if (err.code === "ENOENT") {
257
- console.log(`[Config] No config file found at ${configPath}, using default (${DEFAULT_KEY_STRING})`);
258
- } else {
259
- console.warn(`[Config] Failed to load ${configPath}, using default:`, err);
280
+ logger.info({ configPath }, "No config file found, using defaults");
281
+ return defaultConfig();
260
282
  }
261
- return defaultConfig();
283
+ throw new ConfigError(configPath, `Failed to read file: ${err.message}`);
262
284
  }
285
+ let json;
286
+ try {
287
+ json = JSON.parse(raw);
288
+ } catch {
289
+ throw new ConfigError(configPath, "Invalid JSON syntax");
290
+ }
291
+ const result = configFileSchema.safeParse(json);
292
+ if (!result.success) {
293
+ const details = result.error.issues.map((issue) => {
294
+ const path = issue.path.length > 0 ? `"${issue.path.join(".")}"` : "(root)";
295
+ return ` - ${path}: ${issue.message}`;
296
+ }).join("\n");
297
+ throw new ConfigError(configPath, details);
298
+ }
299
+ const parsed = result.data;
300
+ const binding = parseKeyBinding(parsed.key);
301
+ const display = formatKeyDisplay(binding);
302
+ logger.info({ key: display, provider: parsed.provider, configPath }, "Loaded config");
303
+ return { key: binding, keyDisplay: display, provider: parsed.provider };
304
+ }
305
+ let geminiClient = null;
306
+ function getGeminiClient() {
307
+ if (geminiClient) return geminiClient;
308
+ const forceVertexOff = process.env.GOOGLE_GENAI_USE_VERTEXAI === "false";
309
+ const project = process.env.GOOGLE_CLOUD_PROJECT;
310
+ const location = process.env.GOOGLE_CLOUD_LOCATION ?? "us-central1";
311
+ const apiKey = process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY;
312
+ if (project && !forceVertexOff) {
313
+ logger.info({ project, location }, "Initializing Gemini client (Vertex AI)");
314
+ geminiClient = new GoogleGenAI({ vertexai: true, project, location });
315
+ } else if (apiKey) {
316
+ logger.info("Initializing Gemini client (API key)");
317
+ geminiClient = new GoogleGenAI({ apiKey });
318
+ } else {
319
+ throw new Error(
320
+ "Gemini provider requires either GOOGLE_CLOUD_PROJECT (for Vertex AI) or GEMINI_API_KEY / GOOGLE_API_KEY (for Gemini API)."
321
+ );
322
+ }
323
+ return geminiClient;
263
324
  }
264
325
  const DEFAULT_MODEL = "medium-q5_0";
265
326
  const HF_BASE_URL = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
@@ -271,7 +332,7 @@ function modelFileName(model) {
271
332
  }
272
333
  async function downloadModel(model, destPath) {
273
334
  const url = `${HF_BASE_URL}/${modelFileName(model)}`;
274
- console.log(`[Whisper] Downloading model "${model}" from ${url} ...`);
335
+ logger.info({ model, url }, "Downloading Whisper model");
275
336
  const response = await fetch(url, { method: "GET", redirect: "follow" });
276
337
  if (!response.ok) {
277
338
  throw new Error(
@@ -323,7 +384,7 @@ async function downloadModel(model, destPath) {
323
384
  process.stderr.write("\n");
324
385
  const { renameSync } = await import("node:fs");
325
386
  renameSync(tmpPath, destPath);
326
- console.log(`[Whisper] Model saved to ${destPath}`);
387
+ logger.info({ destPath }, "Whisper model saved");
327
388
  } catch (err) {
328
389
  try {
329
390
  unlinkSync(tmpPath);
@@ -352,17 +413,6 @@ async function resolveModelPath() {
352
413
  await downloadModel(model, destPath);
353
414
  return destPath;
354
415
  }
355
- let geminiClient$1 = null;
356
- function getGeminiClient$1() {
357
- if (geminiClient$1) return geminiClient$1;
358
- const project = process.env.GOOGLE_CLOUD_PROJECT;
359
- const location = process.env.GOOGLE_CLOUD_LOCATION ?? "us-central1";
360
- if (!project) {
361
- throw new Error("GOOGLE_CLOUD_PROJECT environment variable is required");
362
- }
363
- geminiClient$1 = new GoogleGenAI({ vertexai: true, project, location });
364
- return geminiClient$1;
365
- }
366
416
  let openaiClient$1 = null;
367
417
  function getOpenAIClient$1() {
368
418
  if (openaiClient$1) return openaiClient$1;
@@ -380,15 +430,15 @@ async function getWhisperInstance() {
380
430
  if (whisperInitPromise) return whisperInitPromise;
381
431
  whisperInitPromise = (async () => {
382
432
  const modelPath = await resolveModelPath();
383
- console.log("[STT:local] Loading Whisper model from", modelPath, "...");
433
+ logger.info({ modelPath }, "Loading Whisper model");
384
434
  whisperInstance = new Whisper(modelPath);
385
- console.log("[STT:local] Whisper model loaded");
435
+ logger.info("Whisper model loaded");
386
436
  return whisperInstance;
387
437
  })();
388
438
  return whisperInitPromise;
389
439
  }
390
440
  async function transcribeGemini(audioBuffer) {
391
- const client = getGeminiClient$1();
441
+ const client = getGeminiClient();
392
442
  const base64Audio = audioBuffer.toString("base64");
393
443
  const response = await client.models.generateContent({
394
444
  model: "gemini-2.5-flash",
@@ -447,20 +497,9 @@ async function transcribe(audioData, provider = "local") {
447
497
  text = await transcribeGemini(Buffer.from(audioData));
448
498
  break;
449
499
  }
450
- console.log(`[STT:${provider}] Transcribed: "${text}"`);
500
+ logger.info({ provider, text }, "Transcribed");
451
501
  return text;
452
502
  }
453
- let geminiClient = null;
454
- function getGeminiClient() {
455
- if (geminiClient) return geminiClient;
456
- const project = process.env.GOOGLE_CLOUD_PROJECT;
457
- const location = process.env.GOOGLE_CLOUD_LOCATION ?? "us-central1";
458
- if (!project) {
459
- throw new Error("GOOGLE_CLOUD_PROJECT environment variable is required");
460
- }
461
- geminiClient = new GoogleGenAI({ vertexai: true, project, location });
462
- return geminiClient;
463
- }
464
503
  let openaiClient = null;
465
504
  function getOpenAIClient() {
466
505
  if (openaiClient) return openaiClient;
@@ -525,8 +564,9 @@ async function* synthesizeStreamGemini(text) {
525
564
  totalBytes += leftover.length;
526
565
  yield leftover;
527
566
  }
528
- console.log(
529
- `[TTS:gemini] Streamed ${totalBytes} bytes of PCM audio for "${text.substring(0, 50)}..."`
567
+ logger.info(
568
+ { provider: "gemini", totalBytes, text: text.substring(0, 50) },
569
+ "Streamed PCM audio"
530
570
  );
531
571
  }
532
572
  async function* synthesizeStreamOpenAI(text) {
@@ -549,8 +589,9 @@ async function* synthesizeStreamOpenAI(text) {
549
589
  yield chunk;
550
590
  offset = end;
551
591
  }
552
- console.log(
553
- `[TTS:openai] Streamed ${totalBytes} bytes of PCM audio for "${text.substring(0, 50)}..."`
592
+ logger.info(
593
+ { provider: "openai", totalBytes, text: text.substring(0, 50) },
594
+ "Streamed PCM audio"
554
595
  );
555
596
  }
556
597
  function speakLocal(text) {
@@ -571,8 +612,9 @@ function speakLocal(text) {
571
612
  });
572
613
  child.on("close", (code) => {
573
614
  if (code === 0) {
574
- console.log(
575
- `[TTS:local] Spoke "${text.substring(0, 50)}${text.length > 50 ? "..." : ""}"`
615
+ logger.info(
616
+ { provider: "local", text: text.substring(0, 50) },
617
+ "Spoke text"
576
618
  );
577
619
  resolve();
578
620
  } else {
@@ -603,13 +645,13 @@ function setSessionCwd(cwd) {
603
645
  }
604
646
  async function getOrCreateSession() {
605
647
  if (session) return session;
606
- console.log(`[PiSession] Creating new agent session (cwd: ${sessionCwd})...`);
648
+ logger.info({ cwd: sessionCwd }, "Creating new agent session");
607
649
  const result = await createAgentSession({
608
650
  cwd: sessionCwd,
609
651
  sessionManager: SessionManager.inMemory()
610
652
  });
611
653
  session = result.session;
612
- console.log("[PiSession] Session created");
654
+ logger.info("Agent session created");
613
655
  return session;
614
656
  }
615
657
  async function prompt(text, options) {
@@ -618,7 +660,7 @@ async function prompt(text, options) {
618
660
  if (event.type === "message_update" && event.assistantMessageEvent.type === "text_end") {
619
661
  const content = event.assistantMessageEvent.content.trim();
620
662
  if (content.length > 0) {
621
- console.log(`[PiSession] Response: ${content}`);
663
+ logger.info({ content }, "Agent response");
622
664
  options?.onTextEnd?.(content);
623
665
  }
624
666
  }
@@ -633,7 +675,7 @@ function dispose() {
633
675
  if (session) {
634
676
  session.dispose();
635
677
  session = null;
636
- console.log("[PiSession] Session disposed");
678
+ logger.info("Agent session disposed");
637
679
  }
638
680
  }
639
681
  const IPC = {
@@ -706,7 +748,7 @@ function startDaemonServer(handler) {
706
748
  });
707
749
  });
708
750
  server.listen(socketPath);
709
- console.log(`[DaemonIPC] Listening on ${socketPath}`);
751
+ logger.info({ socketPath }, "DaemonIPC listening");
710
752
  return socketPath;
711
753
  }
712
754
  function stopDaemonServer() {
@@ -721,7 +763,7 @@ function stopDaemonServer() {
721
763
  } catch {
722
764
  }
723
765
  }
724
- console.log("[DaemonIPC] Server stopped");
766
+ logger.info("DaemonIPC server stopped");
725
767
  }
726
768
  const workingCwd = process.env["PI_VOICE_CWD"] || process.cwd();
727
769
  let mainWindow = null;
@@ -730,7 +772,7 @@ let currentState = "idle";
730
772
  setSessionCwd(workingCwd);
731
773
  function setState(state, message) {
732
774
  currentState = state;
733
- console.log(`[Main] State: ${state}${message ? ` - ${message}` : ""}`);
775
+ logger.info({ state, message }, "State changed");
734
776
  }
735
777
  function createWindow() {
736
778
  mainWindow = new BrowserWindow({
@@ -764,7 +806,7 @@ function setupIpcHandlers(provider) {
764
806
  ipcMain.on(IPC.RECORDING_DATA, async (_event, data) => {
765
807
  if (currentState !== "recording") return;
766
808
  if (data.byteLength < 1e3) {
767
- console.log("[Main] Recording too short, ignoring");
809
+ logger.info("Recording too short, ignoring");
768
810
  setState("idle", "Recording too short");
769
811
  return;
770
812
  }
@@ -830,7 +872,7 @@ function setupIpcHandlers(provider) {
830
872
  }
831
873
  } catch (err) {
832
874
  const msg = err instanceof Error ? err.message : String(err);
833
- console.error("[Main] Pipeline error:", msg);
875
+ logger.error({ err: msg }, "Pipeline error");
834
876
  setState("error", msg);
835
877
  setTimeout(() => {
836
878
  if (currentState === "error") setState("idle");
@@ -838,7 +880,7 @@ function setupIpcHandlers(provider) {
838
880
  }
839
881
  });
840
882
  ipcMain.on(IPC.RECORDING_ERROR, (_event, error) => {
841
- console.error("[Main] Recording error:", error);
883
+ logger.error({ err: error }, "Recording error");
842
884
  setState("error", error);
843
885
  setTimeout(() => {
844
886
  if (currentState === "error") setState("idle");
@@ -856,8 +898,9 @@ function setupFnHook(config) {
856
898
  {
857
899
  onFnDown: () => {
858
900
  if (currentState !== "idle") {
859
- console.log(
860
- `[Main] ${config.keyDisplay} pressed but state is ${currentState}, ignoring`
901
+ logger.info(
902
+ { key: config.keyDisplay, state: currentState },
903
+ "Key pressed but not idle, ignoring"
861
904
  );
862
905
  return;
863
906
  }
@@ -876,7 +919,7 @@ function setupFnHook(config) {
876
919
  fnHook.start();
877
920
  } catch (err) {
878
921
  const msg = err instanceof Error ? err.message : String(err);
879
- console.error("[Main] FnHook error:", msg);
922
+ logger.error({ err: msg }, "FnHook error");
880
923
  setState("error", msg);
881
924
  }
882
925
  }
@@ -900,7 +943,7 @@ function handleDaemonCommand(command) {
900
943
  }
901
944
  }
902
945
  function gracefulShutdown() {
903
- console.log("[Main] Shutting down...");
946
+ logger.info("Shutting down...");
904
947
  fnHook?.stop();
905
948
  dispose();
906
949
  stopDaemonServer();
@@ -912,17 +955,28 @@ process.on("SIGTERM", () => {
912
955
  });
913
956
  const gotLock = app.requestSingleInstanceLock();
914
957
  if (!gotLock) {
915
- console.log("[Main] Another instance is already running. Exiting.");
958
+ logger.warn("Another instance is already running. Exiting.");
916
959
  app.quit();
917
960
  }
918
961
  app.whenReady().then(async () => {
919
- const config = loadConfig(workingCwd);
962
+ let config;
963
+ try {
964
+ config = loadConfig(workingCwd);
965
+ } catch (err) {
966
+ if (err instanceof ConfigError) {
967
+ logger.error({ err: err.message }, "Config error");
968
+ } else {
969
+ logger.error({ err: err instanceof Error ? err.message : String(err) }, "Failed to load config");
970
+ }
971
+ app.quit();
972
+ return;
973
+ }
920
974
  if (config.provider === "local") {
921
975
  try {
922
976
  await resolveModelPath();
923
977
  } catch (err) {
924
978
  const msg = err instanceof Error ? err.message : String(err);
925
- console.error("[Main] Failed to prepare Whisper model:", msg);
979
+ logger.error({ err: msg }, "Failed to prepare Whisper model");
926
980
  app.quit();
927
981
  return;
928
982
  }
@@ -932,7 +986,7 @@ app.whenReady().then(async () => {
932
986
  setupFnHook(config);
933
987
  startDaemonServer(handleDaemonCommand);
934
988
  saveRuntimeState(workingCwd);
935
- console.log(`[Main] pi-voice daemon started (cwd: ${workingCwd})`);
989
+ logger.info({ cwd: workingCwd }, "pi-voice daemon started");
936
990
  });
937
991
  app.on("window-all-closed", () => {
938
992
  });
package/package.json CHANGED
@@ -1,9 +1,13 @@
1
1
  {
2
2
  "name": "pi-voice",
3
- "version": "0.1.1",
3
+ "version": "0.3.3",
4
4
  "description": "Voice interface for pi coding agent",
5
5
  "author": "Yuku Kotani",
6
6
  "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "https://github.com/yukukotani/pi-voice"
10
+ },
7
11
  "type": "module",
8
12
  "main": "./out/main/index.js",
9
13
  "bin": {
@@ -37,7 +41,9 @@
37
41
  "@napi-rs/whisper": "^0.0.4",
38
42
  "electron": "^40.2.1",
39
43
  "openai": "^6.10.0",
40
- "uiohook-napi": "^1.5.4"
44
+ "pino": "^10.3.1",
45
+ "uiohook-napi": "^1.5.4",
46
+ "zod": "^4.3.6"
41
47
  },
42
48
  "trustedDependencies": [
43
49
  "electron",