pi-voice 0.2.0 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -52,14 +52,14 @@ You can configure pi-voice in `.pi/pi-voice.json`:
52
52
  | Key | Description |
53
53
  | --- | --- |
54
54
  | `key` | Push-to-talk shortcut. Combine modifiers (`ctrl`, `shift`, `alt`/`opt`, `meta`/`cmd`) and a main key with `+`. Examples: `"ctrl+t"`, `"alt+space"`, `"ctrl+shift+r"`. Default: `"meta+shift+i"`. |
55
- | `provider` | Speech provider for STT & TTS. `"local"`, `"gemini"` (Vertex AI), or `"openai"`. Default: `"local"`. |
55
+ | `provider` | Speech provider for STT & TTS. `"local"`, `"gemini"` (Vertex AI or Gemini API), or `"openai"`. Default: `"local"`. |
56
56
 
57
57
  ### Environment variables
58
58
 
59
59
  | Provider | Required variables |
60
60
  | --- | --- |
61
61
  | `local` | None (model is auto-downloaded on first launch). Optional: `WHISPER_MODEL_PATH` (custom model path), `WHISPER_MODEL` (model name, default `medium-q5_0`), `SAY_VOICE` (macOS `say` voice name, e.g. `"Kyoko"`). |
62
- | `gemini` | `GOOGLE_CLOUD_PROJECT`, `GOOGLE_CLOUD_LOCATION` (optional, default `us-central1`) |
62
+ | `gemini` | **Vertex AI:** `GOOGLE_CLOUD_PROJECT`, `GOOGLE_CLOUD_LOCATION` (optional, default `us-central1`). **Gemini API:** `GEMINI_API_KEY` or `GOOGLE_API_KEY`. If `GOOGLE_CLOUD_PROJECT` is set, Vertex AI is used; set `GOOGLE_GENAI_USE_VERTEXAI=false` to force API key mode. |
63
63
  | `openai` | `OPENAI_API_KEY` |
64
64
 
65
65
  #### Logging
package/out/cli/cli.js CHANGED
@@ -1952,7 +1952,7 @@ var require_indexes = __commonJS((exports, module) => {
1952
1952
 
1953
1953
  // node_modules/thread-stream/index.js
1954
1954
  var require_thread_stream = __commonJS((exports, module) => {
1955
- var __dirname = "/Users/yukukotani/ghq/github.com/yukukotani/pi-voice/node_modules/thread-stream";
1955
+ var __dirname = "/home/runner/work/pi-voice/pi-voice/node_modules/thread-stream";
1956
1956
  var { version } = require_package();
1957
1957
  var { EventEmitter } = __require("events");
1958
1958
  var { Worker } = __require("worker_threads");
@@ -2377,7 +2377,7 @@ var require_thread_stream = __commonJS((exports, module) => {
2377
2377
 
2378
2378
  // node_modules/pino/lib/transport.js
2379
2379
  var require_transport = __commonJS((exports, module) => {
2380
- var __dirname = "/Users/yukukotani/ghq/github.com/yukukotani/pi-voice/node_modules/pino/lib";
2380
+ var __dirname = "/home/runner/work/pi-voice/pi-voice/node_modules/pino/lib";
2381
2381
  var { createRequire: createRequire2 } = __require("module");
2382
2382
  var { existsSync: existsSync2 } = __require("node:fs");
2383
2383
  var getCallers = require_caller();
@@ -4492,7 +4492,7 @@ var require_node_gyp_build2 = __commonJS((exports, module) => {
4492
4492
 
4493
4493
  // node_modules/uiohook-napi/dist/index.js
4494
4494
  var require_dist = __commonJS((exports) => {
4495
- var __dirname = "/Users/yukukotani/ghq/github.com/yukukotani/pi-voice/node_modules/uiohook-napi/dist";
4495
+ var __dirname = "/home/runner/work/pi-voice/pi-voice/node_modules/uiohook-napi/dist";
4496
4496
  Object.defineProperty(exports, "__esModule", { value: true });
4497
4497
  exports.uIOhook = exports.UiohookKey = exports.WheelDirection = exports.EventType = undefined;
4498
4498
  var events_1 = __require("events");
package/out/main/index.js CHANGED
@@ -6,9 +6,9 @@ import { homedir } from "node:os";
6
6
  import pino from "pino";
7
7
  import { readFileSync, existsSync, mkdirSync, createWriteStream, unlinkSync, writeFileSync } from "node:fs";
8
8
  import { z } from "zod";
9
- import { GoogleGenAI } from "@google/genai";
10
9
  import OpenAI, { toFile } from "openai";
11
10
  import { WhisperFullParams, WhisperSamplingStrategy, Whisper } from "@napi-rs/whisper";
11
+ import { GoogleGenAI } from "@google/genai";
12
12
  import { Readable } from "node:stream";
13
13
  import { finished } from "node:stream/promises";
14
14
  import { spawn } from "node:child_process";
@@ -302,6 +302,26 @@ function loadConfig(cwd) {
302
302
  logger.info({ key: display, provider: parsed.provider, configPath }, "Loaded config");
303
303
  return { key: binding, keyDisplay: display, provider: parsed.provider };
304
304
  }
305
+ let geminiClient = null;
306
+ function getGeminiClient() {
307
+ if (geminiClient) return geminiClient;
308
+ const forceVertexOff = process.env.GOOGLE_GENAI_USE_VERTEXAI === "false";
309
+ const project = process.env.GOOGLE_CLOUD_PROJECT;
310
+ const location = process.env.GOOGLE_CLOUD_LOCATION ?? "us-central1";
311
+ const apiKey = process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY;
312
+ if (project && !forceVertexOff) {
313
+ logger.info({ project, location }, "Initializing Gemini client (Vertex AI)");
314
+ geminiClient = new GoogleGenAI({ vertexai: true, project, location });
315
+ } else if (apiKey) {
316
+ logger.info("Initializing Gemini client (API key)");
317
+ geminiClient = new GoogleGenAI({ apiKey });
318
+ } else {
319
+ throw new Error(
320
+ "Gemini provider requires either GOOGLE_CLOUD_PROJECT (for Vertex AI) or GEMINI_API_KEY / GOOGLE_API_KEY (for Gemini API)."
321
+ );
322
+ }
323
+ return geminiClient;
324
+ }
305
325
  const DEFAULT_MODEL = "medium-q5_0";
306
326
  const HF_BASE_URL = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
307
327
  function modelCacheDir() {
@@ -393,17 +413,6 @@ async function resolveModelPath() {
393
413
  await downloadModel(model, destPath);
394
414
  return destPath;
395
415
  }
396
- let geminiClient$1 = null;
397
- function getGeminiClient$1() {
398
- if (geminiClient$1) return geminiClient$1;
399
- const project = process.env.GOOGLE_CLOUD_PROJECT;
400
- const location = process.env.GOOGLE_CLOUD_LOCATION ?? "us-central1";
401
- if (!project) {
402
- throw new Error("GOOGLE_CLOUD_PROJECT environment variable is required");
403
- }
404
- geminiClient$1 = new GoogleGenAI({ vertexai: true, project, location });
405
- return geminiClient$1;
406
- }
407
416
  let openaiClient$1 = null;
408
417
  function getOpenAIClient$1() {
409
418
  if (openaiClient$1) return openaiClient$1;
@@ -429,7 +438,7 @@ async function getWhisperInstance() {
429
438
  return whisperInitPromise;
430
439
  }
431
440
  async function transcribeGemini(audioBuffer) {
432
- const client = getGeminiClient$1();
441
+ const client = getGeminiClient();
433
442
  const base64Audio = audioBuffer.toString("base64");
434
443
  const response = await client.models.generateContent({
435
444
  model: "gemini-2.5-flash",
@@ -491,17 +500,6 @@ async function transcribe(audioData, provider = "local") {
491
500
  logger.info({ provider, text }, "Transcribed");
492
501
  return text;
493
502
  }
494
- let geminiClient = null;
495
- function getGeminiClient() {
496
- if (geminiClient) return geminiClient;
497
- const project = process.env.GOOGLE_CLOUD_PROJECT;
498
- const location = process.env.GOOGLE_CLOUD_LOCATION ?? "us-central1";
499
- if (!project) {
500
- throw new Error("GOOGLE_CLOUD_PROJECT environment variable is required");
501
- }
502
- geminiClient = new GoogleGenAI({ vertexai: true, project, location });
503
- return geminiClient;
504
- }
505
503
  let openaiClient = null;
506
504
  function getOpenAIClient() {
507
505
  if (openaiClient) return openaiClient;
package/package.json CHANGED
@@ -1,9 +1,13 @@
1
1
  {
2
2
  "name": "pi-voice",
3
- "version": "0.2.0",
3
+ "version": "0.3.3",
4
4
  "description": "Voice interface for pi coding agent",
5
5
  "author": "Yuku Kotani",
6
6
  "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "https://github.com/yukukotani/pi-voice"
10
+ },
7
11
  "type": "module",
8
12
  "main": "./out/main/index.js",
9
13
  "bin": {