pi-voice 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,8 +7,6 @@ const IPC = {
7
7
  PLAY_AUDIO_STREAM_START: "play-audio-stream-start",
8
8
  PLAY_AUDIO_STREAM_CHUNK: "play-audio-stream-chunk",
9
9
  PLAY_AUDIO_STREAM_END: "play-audio-stream-end",
10
- STATE_CHANGED: "state-changed",
11
- STATUS_MESSAGE: "status-message",
12
10
  // renderer -> main
13
11
  RECORDING_DATA: "recording-data",
14
12
  RECORDING_ERROR: "recording-error",
@@ -16,7 +14,7 @@ const IPC = {
16
14
  };
17
15
  const api = {
18
16
  onStartRecording: (callback) => {
19
- electron.ipcRenderer.on(IPC.START_RECORDING, () => callback());
17
+ electron.ipcRenderer.on(IPC.START_RECORDING, (_event, format) => callback(format ?? "webm"));
20
18
  },
21
19
  onStopRecording: (callback) => {
22
20
  electron.ipcRenderer.on(IPC.STOP_RECORDING, () => callback());
@@ -36,12 +34,6 @@ const api = {
36
34
  onPlayAudioStreamEnd: (callback) => {
37
35
  electron.ipcRenderer.on(IPC.PLAY_AUDIO_STREAM_END, () => callback());
38
36
  },
39
- onStateChanged: (callback) => {
40
- electron.ipcRenderer.on(IPC.STATE_CHANGED, (_event, state) => callback(state));
41
- },
42
- onStatusMessage: (callback) => {
43
- electron.ipcRenderer.on(IPC.STATUS_MESSAGE, (_event, message) => callback(message));
44
- },
45
37
  sendRecordingData: (data) => {
46
38
  electron.ipcRenderer.send(IPC.RECORDING_DATA, data);
47
39
  },
@@ -0,0 +1,209 @@
1
+ const toggleOnUrl = "" + new URL("toggle_on-D9c1Kpa8.wav", import.meta.url).href;
2
+ const toggleOffUrl = "" + new URL("toggle_off-DBeRrNFR.wav", import.meta.url).href;
3
+ let mediaRecorder = null;
4
+ let audioChunks = [];
5
+ let audioContext = null;
6
+ let pcmStream = null;
7
+ let pcmSourceNode = null;
8
+ let pcmProcessorNode = null;
9
+ let pcmChunks = [];
10
+ let pcmRecording = false;
11
+ const WHISPER_SAMPLE_RATE = 16e3;
12
+ function playSoundEffect(url) {
13
+ const ctx = audioContext ?? new AudioContext();
14
+ if (!audioContext) audioContext = ctx;
15
+ fetch(url).then((res) => res.arrayBuffer()).then((buf) => ctx.decodeAudioData(buf)).then((decoded) => {
16
+ const source = ctx.createBufferSource();
17
+ source.buffer = decoded;
18
+ const gain = ctx.createGain();
19
+ gain.gain.value = 2;
20
+ source.connect(gain);
21
+ gain.connect(ctx.destination);
22
+ source.start();
23
+ }).catch((err) => {
24
+ console.error("Failed to play sound effect:", err);
25
+ });
26
+ }
27
+ function downsample(buffer, sourceSampleRate, targetSampleRate) {
28
+ if (sourceSampleRate === targetSampleRate) return buffer;
29
+ const ratio = sourceSampleRate / targetSampleRate;
30
+ const newLength = Math.round(buffer.length / ratio);
31
+ const result = new Float32Array(newLength);
32
+ for (let i = 0; i < newLength; i++) {
33
+ const srcIndex = i * ratio;
34
+ const lo = Math.floor(srcIndex);
35
+ const hi = Math.min(lo + 1, buffer.length - 1);
36
+ const frac = srcIndex - lo;
37
+ result[i] = buffer[lo] * (1 - frac) + buffer[hi] * frac;
38
+ }
39
+ return result;
40
+ }
41
+ function startWebmRecording(stream) {
42
+ audioChunks = [];
43
+ mediaRecorder = new MediaRecorder(stream, {
44
+ mimeType: "audio/webm;codecs=opus"
45
+ });
46
+ mediaRecorder.ondataavailable = (event) => {
47
+ if (event.data.size > 0) {
48
+ audioChunks.push(event.data);
49
+ }
50
+ };
51
+ mediaRecorder.onstop = async () => {
52
+ stream.getTracks().forEach((track) => track.stop());
53
+ if (audioChunks.length === 0) {
54
+ window.piVoice.sendRecordingError("No audio data captured");
55
+ return;
56
+ }
57
+ const blob = new Blob(audioChunks, { type: "audio/webm" });
58
+ const arrayBuffer = await blob.arrayBuffer();
59
+ window.piVoice.sendRecordingData(arrayBuffer);
60
+ };
61
+ mediaRecorder.start(100);
62
+ }
63
+ function stopWebmRecording() {
64
+ if (mediaRecorder && mediaRecorder.state !== "inactive") {
65
+ mediaRecorder.stop();
66
+ }
67
+ }
68
+ function startPcmRecording(stream) {
69
+ const ctx = audioContext ?? new AudioContext();
70
+ if (!audioContext) audioContext = ctx;
71
+ pcmStream = stream;
72
+ pcmChunks = [];
73
+ pcmRecording = true;
74
+ pcmSourceNode = ctx.createMediaStreamSource(stream);
75
+ pcmProcessorNode = ctx.createScriptProcessor(4096, 1, 1);
76
+ pcmProcessorNode.onaudioprocess = (event) => {
77
+ if (!pcmRecording) return;
78
+ const input = event.inputBuffer.getChannelData(0);
79
+ pcmChunks.push(new Float32Array(input));
80
+ };
81
+ pcmSourceNode.connect(pcmProcessorNode);
82
+ pcmProcessorNode.connect(ctx.destination);
83
+ }
84
+ function stopPcmRecording() {
85
+ pcmRecording = false;
86
+ pcmProcessorNode?.disconnect();
87
+ pcmSourceNode?.disconnect();
88
+ pcmStream?.getTracks().forEach((track) => track.stop());
89
+ if (pcmChunks.length === 0) {
90
+ window.piVoice.sendRecordingError("No audio data captured");
91
+ pcmProcessorNode = null;
92
+ pcmSourceNode = null;
93
+ pcmStream = null;
94
+ return;
95
+ }
96
+ const totalLength = pcmChunks.reduce((sum, c) => sum + c.length, 0);
97
+ const fullBuffer = new Float32Array(totalLength);
98
+ let offset = 0;
99
+ for (const chunk of pcmChunks) {
100
+ fullBuffer.set(chunk, offset);
101
+ offset += chunk.length;
102
+ }
103
+ const sourceSampleRate = audioContext?.sampleRate ?? 48e3;
104
+ const resampled = downsample(fullBuffer, sourceSampleRate, WHISPER_SAMPLE_RATE);
105
+ window.piVoice.sendRecordingData(resampled.buffer);
106
+ pcmChunks = [];
107
+ pcmProcessorNode = null;
108
+ pcmSourceNode = null;
109
+ pcmStream = null;
110
+ }
111
+ let currentRecordingFormat = "webm";
112
+ window.piVoice.onStartRecording(async (format) => {
113
+ playSoundEffect(toggleOnUrl);
114
+ currentRecordingFormat = format;
115
+ try {
116
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
117
+ if (format === "pcm") {
118
+ startPcmRecording(stream);
119
+ } else {
120
+ startWebmRecording(stream);
121
+ }
122
+ } catch (err) {
123
+ const msg = err instanceof Error ? err.message : String(err);
124
+ window.piVoice.sendRecordingError(`Microphone access failed: ${msg}`);
125
+ }
126
+ });
127
+ window.piVoice.onStopRecording(() => {
128
+ playSoundEffect(toggleOffUrl);
129
+ if (currentRecordingFormat === "pcm") {
130
+ stopPcmRecording();
131
+ } else {
132
+ stopWebmRecording();
133
+ }
134
+ });
135
+ let streamSampleRate = 24e3;
136
+ let streamChannels = 1;
137
+ let streamBitsPerSample = 16;
138
+ let streamNextPlayTime = 0;
139
+ let streamActiveSources = 0;
140
+ let streamEnded = false;
141
+ function stopStreamPlayback() {
142
+ streamActiveSources = 0;
143
+ streamEnded = false;
144
+ streamNextPlayTime = 0;
145
+ }
146
+ window.piVoice.onPlayAudioStreamStart((meta) => {
147
+ try {
148
+ if (!audioContext) {
149
+ audioContext = new AudioContext();
150
+ }
151
+ stopStreamPlayback();
152
+ streamSampleRate = meta.sampleRate;
153
+ streamChannels = meta.channels;
154
+ streamBitsPerSample = meta.bitsPerSample;
155
+ streamNextPlayTime = 0;
156
+ streamEnded = false;
157
+ } catch (err) {
158
+ console.error("Stream start error:", err);
159
+ }
160
+ });
161
+ window.piVoice.onPlayAudioStreamChunk((pcmData) => {
162
+ try {
163
+ if (!audioContext) {
164
+ audioContext = new AudioContext();
165
+ }
166
+ const raw = pcmData instanceof ArrayBuffer ? pcmData : new Uint8Array(pcmData).buffer;
167
+ const bytesPerSample = streamBitsPerSample / 8;
168
+ const sampleCount = raw.byteLength / bytesPerSample / streamChannels;
169
+ if (sampleCount <= 0) return;
170
+ const audioBuffer = audioContext.createBuffer(
171
+ streamChannels,
172
+ sampleCount,
173
+ streamSampleRate
174
+ );
175
+ const view = new DataView(raw);
176
+ for (let ch = 0; ch < streamChannels; ch++) {
177
+ const channelData = audioBuffer.getChannelData(ch);
178
+ for (let i = 0; i < sampleCount; i++) {
179
+ const byteOffset = (i * streamChannels + ch) * bytesPerSample;
180
+ const int16 = view.getInt16(byteOffset, true);
181
+ channelData[i] = int16 / 32768;
182
+ }
183
+ }
184
+ const source = audioContext.createBufferSource();
185
+ source.buffer = audioBuffer;
186
+ source.connect(audioContext.destination);
187
+ const now = audioContext.currentTime;
188
+ if (streamNextPlayTime < now) {
189
+ streamNextPlayTime = now;
190
+ }
191
+ source.start(streamNextPlayTime);
192
+ streamNextPlayTime += audioBuffer.duration;
193
+ streamActiveSources++;
194
+ source.onended = () => {
195
+ streamActiveSources--;
196
+ if (streamEnded && streamActiveSources <= 0) {
197
+ window.piVoice.sendPlaybackDone();
198
+ }
199
+ };
200
+ } catch (err) {
201
+ console.error("Stream chunk playback error:", err);
202
+ }
203
+ });
204
+ window.piVoice.onPlayAudioStreamEnd(() => {
205
+ streamEnded = true;
206
+ if (streamActiveSources <= 0) {
207
+ window.piVoice.sendPlaybackDone();
208
+ }
209
+ });
@@ -1,147 +1,10 @@
1
1
  <!DOCTYPE html>
2
- <html lang="ja">
2
+ <html>
3
3
  <head>
4
4
  <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>pi-voice</title>
7
- <style>
8
- * {
9
- margin: 0;
10
- padding: 0;
11
- box-sizing: border-box;
12
- }
13
-
14
- body {
15
- font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
16
- background: #1a1a2e;
17
- color: #e0e0e0;
18
- display: flex;
19
- flex-direction: column;
20
- align-items: center;
21
- justify-content: center;
22
- height: 100vh;
23
- user-select: none;
24
- -webkit-app-region: drag;
25
- overflow: hidden;
26
- }
27
-
28
- .container {
29
- display: flex;
30
- flex-direction: column;
31
- align-items: center;
32
- gap: 24px;
33
- }
34
-
35
- .indicator {
36
- width: 120px;
37
- height: 120px;
38
- border-radius: 50%;
39
- background: #2a2a4a;
40
- display: flex;
41
- align-items: center;
42
- justify-content: center;
43
- transition: all 0.3s ease;
44
- position: relative;
45
- }
46
-
47
- .indicator::after {
48
- content: '';
49
- position: absolute;
50
- width: 100%;
51
- height: 100%;
52
- border-radius: 50%;
53
- border: 3px solid transparent;
54
- transition: all 0.3s ease;
55
- }
56
-
57
- /* State styles */
58
- .state-idle .indicator {
59
- background: #2a2a4a;
60
- }
61
-
62
- .state-recording .indicator {
63
- background: #e74c3c;
64
- box-shadow: 0 0 30px rgba(231, 76, 60, 0.5);
65
- animation: pulse 1s ease-in-out infinite;
66
- }
67
-
68
- .state-transcribing .indicator {
69
- background: #f39c12;
70
- animation: spin 1.5s linear infinite;
71
- }
72
-
73
- .state-thinking .indicator {
74
- background: #3498db;
75
- animation: think 2s ease-in-out infinite;
76
- }
77
-
78
- .state-speaking .indicator {
79
- background: #2ecc71;
80
- animation: speak 0.5s ease-in-out infinite alternate;
81
- }
82
-
83
- .state-error .indicator {
84
- background: #c0392b;
85
- }
86
-
87
- @keyframes pulse {
88
- 0%, 100% { transform: scale(1); }
89
- 50% { transform: scale(1.08); }
90
- }
91
-
92
- @keyframes spin {
93
- from { transform: rotate(0deg); }
94
- to { transform: rotate(360deg); }
95
- }
96
-
97
- @keyframes think {
98
- 0%, 100% { opacity: 1; }
99
- 50% { opacity: 0.5; }
100
- }
101
-
102
- @keyframes speak {
103
- from { transform: scale(1); }
104
- to { transform: scale(1.05); }
105
- }
106
-
107
- .icon {
108
- font-size: 48px;
109
- line-height: 1;
110
- }
111
-
112
- .state-label {
113
- font-size: 16px;
114
- font-weight: 600;
115
- text-transform: uppercase;
116
- letter-spacing: 2px;
117
- }
118
-
119
- .status-message {
120
- font-size: 13px;
121
- color: #888;
122
- max-width: 300px;
123
- text-align: center;
124
- word-break: break-word;
125
- min-height: 20px;
126
- }
127
-
128
- .hint {
129
- font-size: 11px;
130
- color: #555;
131
- position: fixed;
132
- bottom: 16px;
133
- }
134
- </style>
135
- <script type="module" crossorigin src="./assets/index-dks-nI81.js"></script>
5
+ <title>pi-voice audio worker</title>
6
+ <script type="module" crossorigin src="./assets/index-CdX3ylbA.js"></script>
136
7
  </head>
137
8
  <body>
138
- <div class="container" id="app">
139
- <div class="indicator" id="indicator">
140
- <span class="icon" id="icon">&#x23F8;</span>
141
- </div>
142
- <div class="state-label" id="stateLabel">IDLE</div>
143
- <div class="status-message" id="statusMessage">Hold Fn to speak</div>
144
- </div>
145
- <div class="hint">Fn key: push-to-talk</div>
146
9
  </body>
147
10
  </html>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-voice",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "Voice interface for pi coding agent",
5
5
  "author": "Yuku Kotani",
6
6
  "license": "MIT",
@@ -11,8 +11,7 @@
11
11
  },
12
12
  "files": [
13
13
  "bin/",
14
- "out/",
15
- "build/"
14
+ "out/"
16
15
  ],
17
16
  "scripts": {
18
17
  "cli": "bun src/cli.ts",
@@ -23,14 +22,11 @@
23
22
  "build:cli": "bun build src/cli.ts --outdir out/cli --target node --format esm --external electron",
24
23
  "preview": "electron-vite preview",
25
24
  "prepack": "bun run build",
26
- "dist": "bun run build && electron-builder --mac --config",
27
- "dist:dir": "bun run build && electron-builder --mac --dir --config",
28
25
  "prepublish": "bun run build"
29
26
  },
30
27
  "devDependencies": {
31
28
  "@types/bun": "latest",
32
- "electron-vite": "^3.1.0",
33
- "electron-builder": "^26.0.0"
29
+ "electron-vite": "^3.1.0"
34
30
  },
35
31
  "peerDependencies": {
36
32
  "typescript": "^5"
@@ -38,12 +34,16 @@
38
34
  "dependencies": {
39
35
  "@google/genai": "^1.40.0",
40
36
  "@mariozechner/pi-coding-agent": "^0.52.7",
37
+ "@napi-rs/whisper": "^0.0.4",
41
38
  "electron": "^40.2.1",
42
- "iohook-macos": "^1.2.1"
39
+ "openai": "^6.10.0",
40
+ "pino": "^10.3.1",
41
+ "uiohook-napi": "^1.5.4",
42
+ "zod": "^4.3.6"
43
43
  },
44
44
  "trustedDependencies": [
45
45
  "electron",
46
- "iohook-macos",
46
+ "uiohook-napi",
47
47
  "protobufjs"
48
48
  ]
49
49
  }
@@ -1,14 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
- <plist version="1.0">
4
- <dict>
5
- <key>com.apple.security.cs.allow-jit</key>
6
- <true/>
7
- <key>com.apple.security.cs.allow-unsigned-executable-memory</key>
8
- <true/>
9
- <key>com.apple.security.device.audio-input</key>
10
- <true/>
11
- <key>com.apple.security.automation.apple-events</key>
12
- <true/>
13
- </dict>
14
- </plist>
@@ -1,162 +0,0 @@
1
- const toggleOnUrl = "" + new URL("toggle_on-D9c1Kpa8.wav", import.meta.url).href;
2
- const toggleOffUrl = "" + new URL("toggle_off-DBeRrNFR.wav", import.meta.url).href;
3
- document.getElementById("indicator");
4
- const icon = document.getElementById("icon");
5
- const stateLabel = document.getElementById("stateLabel");
6
- const statusMessage = document.getElementById("statusMessage");
7
- let mediaRecorder = null;
8
- let audioChunks = [];
9
- let audioContext = null;
10
- function playSoundEffect(url) {
11
- const audio = new Audio(url);
12
- audio.play().catch((err) => {
13
- console.error("Failed to play sound effect:", err);
14
- });
15
- }
16
- const stateConfig = {
17
- idle: { icon: "⏸", label: "IDLE", defaultMessage: "Hold Fn to speak" },
18
- recording: {
19
- icon: "🔴",
20
- label: "RECORDING",
21
- defaultMessage: "Listening..."
22
- },
23
- transcribing: {
24
- icon: "🔄",
25
- label: "TRANSCRIBING",
26
- defaultMessage: "Converting speech to text..."
27
- },
28
- thinking: {
29
- icon: "🧠",
30
- label: "THINKING",
31
- defaultMessage: "pi is thinking..."
32
- },
33
- speaking: {
34
- icon: "🔊",
35
- label: "SPEAKING",
36
- defaultMessage: "Playing response..."
37
- },
38
- error: { icon: "⚠", label: "ERROR", defaultMessage: "An error occurred" }
39
- };
40
- window.piVoice.onStateChanged((state) => {
41
- document.body.className = "";
42
- document.body.classList.add(`state-${state}`);
43
- const config = stateConfig[state];
44
- if (config) {
45
- icon.textContent = config.icon;
46
- stateLabel.textContent = config.label;
47
- statusMessage.textContent = config.defaultMessage;
48
- }
49
- });
50
- window.piVoice.onStatusMessage((message) => {
51
- statusMessage.textContent = message;
52
- });
53
- window.piVoice.onStartRecording(async () => {
54
- playSoundEffect(toggleOnUrl);
55
- try {
56
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
57
- audioChunks = [];
58
- mediaRecorder = new MediaRecorder(stream, {
59
- mimeType: "audio/webm;codecs=opus"
60
- });
61
- mediaRecorder.ondataavailable = (event) => {
62
- if (event.data.size > 0) {
63
- audioChunks.push(event.data);
64
- }
65
- };
66
- mediaRecorder.onstop = async () => {
67
- stream.getTracks().forEach((track) => track.stop());
68
- if (audioChunks.length === 0) {
69
- window.piVoice.sendRecordingError("No audio data captured");
70
- return;
71
- }
72
- const blob = new Blob(audioChunks, { type: "audio/webm" });
73
- const arrayBuffer = await blob.arrayBuffer();
74
- window.piVoice.sendRecordingData(arrayBuffer);
75
- };
76
- mediaRecorder.start(100);
77
- } catch (err) {
78
- const msg = err instanceof Error ? err.message : String(err);
79
- window.piVoice.sendRecordingError(`Microphone access failed: ${msg}`);
80
- }
81
- });
82
- window.piVoice.onStopRecording(() => {
83
- playSoundEffect(toggleOffUrl);
84
- if (mediaRecorder && mediaRecorder.state !== "inactive") {
85
- mediaRecorder.stop();
86
- }
87
- });
88
- let streamSampleRate = 24e3;
89
- let streamChannels = 1;
90
- let streamBitsPerSample = 16;
91
- let streamNextPlayTime = 0;
92
- let streamActiveSources = 0;
93
- let streamEnded = false;
94
- function stopStreamPlayback() {
95
- streamActiveSources = 0;
96
- streamEnded = false;
97
- streamNextPlayTime = 0;
98
- }
99
- window.piVoice.onPlayAudioStreamStart((meta) => {
100
- try {
101
- if (!audioContext) {
102
- audioContext = new AudioContext();
103
- }
104
- stopStreamPlayback();
105
- streamSampleRate = meta.sampleRate;
106
- streamChannels = meta.channels;
107
- streamBitsPerSample = meta.bitsPerSample;
108
- streamNextPlayTime = 0;
109
- streamEnded = false;
110
- } catch (err) {
111
- console.error("Stream start error:", err);
112
- }
113
- });
114
- window.piVoice.onPlayAudioStreamChunk((pcmData) => {
115
- try {
116
- if (!audioContext) {
117
- audioContext = new AudioContext();
118
- }
119
- const raw = pcmData instanceof ArrayBuffer ? pcmData : new Uint8Array(pcmData).buffer;
120
- const bytesPerSample = streamBitsPerSample / 8;
121
- const sampleCount = raw.byteLength / bytesPerSample / streamChannels;
122
- if (sampleCount <= 0) return;
123
- const audioBuffer = audioContext.createBuffer(
124
- streamChannels,
125
- sampleCount,
126
- streamSampleRate
127
- );
128
- const view = new DataView(raw);
129
- for (let ch = 0; ch < streamChannels; ch++) {
130
- const channelData = audioBuffer.getChannelData(ch);
131
- for (let i = 0; i < sampleCount; i++) {
132
- const byteOffset = (i * streamChannels + ch) * bytesPerSample;
133
- const int16 = view.getInt16(byteOffset, true);
134
- channelData[i] = int16 / 32768;
135
- }
136
- }
137
- const source = audioContext.createBufferSource();
138
- source.buffer = audioBuffer;
139
- source.connect(audioContext.destination);
140
- const now = audioContext.currentTime;
141
- if (streamNextPlayTime < now) {
142
- streamNextPlayTime = now;
143
- }
144
- source.start(streamNextPlayTime);
145
- streamNextPlayTime += audioBuffer.duration;
146
- streamActiveSources++;
147
- source.onended = () => {
148
- streamActiveSources--;
149
- if (streamEnded && streamActiveSources <= 0) {
150
- window.piVoice.sendPlaybackDone();
151
- }
152
- };
153
- } catch (err) {
154
- console.error("Stream chunk playback error:", err);
155
- }
156
- });
157
- window.piVoice.onPlayAudioStreamEnd(() => {
158
- streamEnded = true;
159
- if (streamActiveSources <= 0) {
160
- window.piVoice.sendPlaybackDone();
161
- }
162
- });