audio-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +411 -0
- package/dist/audio/capture.d.ts +28 -0
- package/dist/audio/capture.js +2 -0
- package/dist/audio/capture.js.map +1 -0
- package/dist/audio/devices.d.ts +16 -0
- package/dist/audio/devices.js +65 -0
- package/dist/audio/devices.js.map +1 -0
- package/dist/audio/helper-capture.d.ts +36 -0
- package/dist/audio/helper-capture.js +135 -0
- package/dist/audio/helper-capture.js.map +1 -0
- package/dist/audio/helper-path.d.ts +10 -0
- package/dist/audio/helper-path.js +47 -0
- package/dist/audio/helper-path.js.map +1 -0
- package/dist/audio/wav.d.ts +52 -0
- package/dist/audio/wav.js +174 -0
- package/dist/audio/wav.js.map +1 -0
- package/dist/bin/audio-capture-helper +0 -0
- package/dist/config.d.ts +14 -0
- package/dist/config.js +27 -0
- package/dist/config.js.map +1 -0
- package/dist/errors.d.ts +18 -0
- package/dist/errors.js +21 -0
- package/dist/errors.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +10 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +14 -0
- package/dist/logger.js +34 -0
- package/dist/logger.js.map +1 -0
- package/dist/paths.d.ts +15 -0
- package/dist/paths.js +36 -0
- package/dist/paths.js.map +1 -0
- package/dist/server.d.ts +1 -0
- package/dist/server.js +112 -0
- package/dist/server.js.map +1 -0
- package/dist/session/manager.d.ts +58 -0
- package/dist/session/manager.js +184 -0
- package/dist/session/manager.js.map +1 -0
- package/dist/session/store.d.ts +35 -0
- package/dist/session/store.js +72 -0
- package/dist/session/store.js.map +1 -0
- package/dist/tools/index.d.ts +16 -0
- package/dist/tools/index.js +180 -0
- package/dist/tools/index.js.map +1 -0
- package/package.json +58 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 audio-mcp contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
# audio-mcp
|
|
2
|
+
|
|
3
|
+
Local MCP (Model Context Protocol) server for **macOS** that captures
|
|
4
|
+
**microphone input and/or system audio output** into explicit, user-defined
|
|
5
|
+
sessions and exposes the raw WAV audio to AI agents through MCP tools.
|
|
6
|
+
|
|
7
|
+
When capturing both sources at once, audio-mcp produces a **stereo WAV**
|
|
8
|
+
with mic on the left channel and system output on the right channel —
|
|
9
|
+
preserving both signals losslessly in a single file.
|
|
10
|
+
|
|
11
|
+
## What this is — and what it is *not*
|
|
12
|
+
|
|
13
|
+
**It is:**
|
|
14
|
+
|
|
15
|
+
- A session-based audio recorder (explicit `start_session` / `stop_session`)
|
|
16
|
+
- A local-only MCP server (stdio transport)
|
|
17
|
+
- A way to hand raw WAV audio to a multimodal AI agent for analysis
|
|
18
|
+
- Able to capture both mic input AND system output, individually or together
|
|
19
|
+
|
|
20
|
+
**It is *not*:**
|
|
21
|
+
|
|
22
|
+
- A transcription or speech-to-text pipeline — no STT is performed
|
|
23
|
+
- A background daemon — it only records inside explicit sessions you start
|
|
24
|
+
- A network service — no data leaves your machine from the server itself
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## How it works
|
|
29
|
+
|
|
30
|
+
audio-mcp ships a small bundled Swift helper binary
|
|
31
|
+
(`audio-capture-helper`, signed with a Developer ID cert and notarized
|
|
32
|
+
by Apple) that uses:
|
|
33
|
+
|
|
34
|
+
- **ScreenCaptureKit** (SCStream) for system-audio output capture
|
|
35
|
+
- **AVFoundation** (AVCaptureSession) for microphone input
|
|
36
|
+
|
|
37
|
+
Audio is streamed from the helper to the Node MCP server as raw PCM and
|
|
38
|
+
written incrementally to a WAV file. Nothing leaves your machine unless
|
|
39
|
+
your AI agent sends a `get_audio` result to an external model.
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Prerequisites
|
|
44
|
+
|
|
45
|
+
- **macOS 13 (Ventura) or later** — required by ScreenCaptureKit audio
|
|
46
|
+
- **Node.js 18 or later**
|
|
47
|
+
- Microphone and/or Screen Recording permission granted to your MCP client
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Install
|
|
52
|
+
|
|
53
|
+
### With `npx` (recommended)
|
|
54
|
+
|
|
55
|
+
No install needed — this config runs it on demand:
|
|
56
|
+
|
|
57
|
+
```json
|
|
58
|
+
{
|
|
59
|
+
"mcpServers": {
|
|
60
|
+
"audio": {
|
|
61
|
+
"command": "npx",
|
|
62
|
+
"args": ["-y", "audio-mcp"]
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Global install
|
|
69
|
+
|
|
70
|
+
```sh
|
|
71
|
+
npm install -g audio-mcp
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Homebrew tap
|
|
75
|
+
|
|
76
|
+
```sh
|
|
77
|
+
brew install bugorbn/audio-mcp/audio-mcp
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## First-launch permissions
|
|
83
|
+
|
|
84
|
+
audio-mcp requires up to two macOS permissions (depending on what you
|
|
85
|
+
capture). They're approved by the parent MCP client app (Claude Desktop,
|
|
86
|
+
Cursor, etc.), not by the helper binary itself.
|
|
87
|
+
|
|
88
|
+
The bundled Swift helper is **signed with a Developer ID certificate and
|
|
89
|
+
notarized by Apple**, so there's no Gatekeeper prompt on first launch.
|
|
90
|
+
|
|
91
|
+
### 1. Microphone permission (`capture: "mic"` or `"both"`)
|
|
92
|
+
|
|
93
|
+
macOS will prompt the first time you start a session. Grant it.
|
|
94
|
+
Re-enable at: **System Settings → Privacy & Security → Microphone →
|
|
95
|
+
\[your MCP client]**.
|
|
96
|
+
|
|
97
|
+
### 2. Screen Recording permission (`capture: "system"` or `"both"`)
|
|
98
|
+
|
|
99
|
+
macOS requires this for system audio capture via ScreenCaptureKit.
|
|
100
|
+
Grant on first use. Re-enable at: **System Settings → Privacy &
|
|
101
|
+
Security → Screen Recording → \[your MCP client]** — you'll need to
|
|
102
|
+
restart the MCP client after changing this.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## MCP client configuration
|
|
107
|
+
|
|
108
|
+
### Claude Desktop
|
|
109
|
+
|
|
110
|
+
`~/Library/Application Support/Claude/claude_desktop_config.json`:
|
|
111
|
+
|
|
112
|
+
```json
|
|
113
|
+
{
|
|
114
|
+
"mcpServers": {
|
|
115
|
+
"audio": {
|
|
116
|
+
"command": "npx",
|
|
117
|
+
"args": ["-y", "audio-mcp"]
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Cursor
|
|
124
|
+
|
|
125
|
+
`~/.cursor/mcp.json`:
|
|
126
|
+
|
|
127
|
+
```json
|
|
128
|
+
{
|
|
129
|
+
"mcpServers": {
|
|
130
|
+
"audio": {
|
|
131
|
+
"command": "npx",
|
|
132
|
+
"args": ["-y", "audio-mcp"]
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## Tools
|
|
141
|
+
|
|
142
|
+
All tools return structured JSON. Errors come back as
|
|
143
|
+
`{ "error": { "code": "...", "message": "..." } }` with one of:
|
|
144
|
+
`SESSION_ALREADY_ACTIVE`, `SESSION_NOT_FOUND`, `SESSION_STILL_ACTIVE`,
|
|
145
|
+
`AUDIO_DEVICE_ERROR`, `CHUNK_TOO_LARGE`, `NOT_IMPLEMENTED`, `INVALID_INPUT`.
|
|
146
|
+
|
|
147
|
+
### `start_session`
|
|
148
|
+
|
|
149
|
+
Start a new recording. Fails if a session is already active.
|
|
150
|
+
|
|
151
|
+
**Input:**
|
|
152
|
+
|
|
153
|
+
```json
|
|
154
|
+
{
|
|
155
|
+
"label": "meeting with Sam",
|
|
156
|
+
"source": "MacBook Pro Microphone",
|
|
157
|
+
"capture": "both"
|
|
158
|
+
}
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
All fields optional. `capture` is one of:
|
|
162
|
+
|
|
163
|
+
- `"mic"` — mono WAV, microphone only
|
|
164
|
+
- `"system"` — mono WAV, system output only (requires Screen Recording permission)
|
|
165
|
+
- `"both"` — **stereo WAV**, L=mic R=system (default)
|
|
166
|
+
|
|
167
|
+
`source` accepts either a device uniqueID (from `list_audio_sources`)
|
|
168
|
+
or a substring of the device name. Only applies when capture includes `mic`.
|
|
169
|
+
|
|
170
|
+
**Output:**
|
|
171
|
+
|
|
172
|
+
```json
|
|
173
|
+
{
|
|
174
|
+
"session_id": "f3d0…",
|
|
175
|
+
"label": "meeting with Sam",
|
|
176
|
+
"source": "MacBook Pro Microphone + system audio",
|
|
177
|
+
"capture_mode": "both",
|
|
178
|
+
"started_at": "2026-04-05T10:00:00.000Z",
|
|
179
|
+
"sample_rate": 16000,
|
|
180
|
+
"channels": 2,
|
|
181
|
+
"format": "wav"
|
|
182
|
+
}
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### `stop_session`
|
|
186
|
+
|
|
187
|
+
Finalize the WAV file and record duration/size.
|
|
188
|
+
|
|
189
|
+
**Input:** `{ "session_id": "f3d0…" }`
|
|
190
|
+
|
|
191
|
+
**Output:**
|
|
192
|
+
|
|
193
|
+
```json
|
|
194
|
+
{
|
|
195
|
+
"session_id": "f3d0…",
|
|
196
|
+
"label": "meeting with Sam",
|
|
197
|
+
"started_at": "2026-04-05T10:00:00.000Z",
|
|
198
|
+
"stopped_at": "2026-04-05T10:02:22.500Z",
|
|
199
|
+
"duration_seconds": 142.5,
|
|
200
|
+
"file_size_bytes": 9136684,
|
|
201
|
+
"path": "/Users/<you>/.audio-mcp/sessions/f3d0….wav"
|
|
202
|
+
}
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### `get_audio`
|
|
206
|
+
|
|
207
|
+
Return a base64-encoded WAV slice of a recorded session. Max 300
|
|
208
|
+
seconds per call — chunk larger ranges into multiple calls.
|
|
209
|
+
|
|
210
|
+
**Input:**
|
|
211
|
+
|
|
212
|
+
```json
|
|
213
|
+
{
|
|
214
|
+
"session_id": "f3d0…",
|
|
215
|
+
"start_second": 0,
|
|
216
|
+
"end_second": 60,
|
|
217
|
+
"format": "wav"
|
|
218
|
+
}
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
`format` defaults to `"wav"`. `"opus"` returns `NOT_IMPLEMENTED` in v0.1 and is reserved for a later release.
|
|
222
|
+
|
|
223
|
+
**Output:**
|
|
224
|
+
|
|
225
|
+
```json
|
|
226
|
+
{
|
|
227
|
+
"session_id": "f3d0…",
|
|
228
|
+
"start_second": 0,
|
|
229
|
+
"end_second": 60,
|
|
230
|
+
"duration_seconds": 60,
|
|
231
|
+
"format": "wav",
|
|
232
|
+
"sample_rate": 16000,
|
|
233
|
+
"channels": 2,
|
|
234
|
+
"audio_base64": "UklGR…",
|
|
235
|
+
"size_bytes": 3840044
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
For stereo sessions the returned WAV preserves the L=mic / R=system
|
|
240
|
+
layout so downstream tools (or the agent itself) can separate them.
|
|
241
|
+
|
|
242
|
+
**Live sessions:** `get_audio` also works while a session is still
|
|
243
|
+
recording. The response clamps `end_second` to the number of seconds
|
|
244
|
+
currently on disk, so an agent can poll every few seconds to stream
|
|
245
|
+
audio out of a live recording. `list_sessions` and `get_session`
|
|
246
|
+
report live `file_size_bytes` and `duration_seconds` for active
|
|
247
|
+
sessions to help agents track progress.
|
|
248
|
+
|
|
249
|
+
### `list_sessions`
|
|
250
|
+
|
|
251
|
+
List all recorded sessions, newest first. No input. Each item includes
|
|
252
|
+
`capture_mode` and `channels`.
|
|
253
|
+
|
|
254
|
+
### `get_session`
|
|
255
|
+
|
|
256
|
+
Return metadata for a single session. Input: `{ "session_id": "…" }`.
|
|
257
|
+
|
|
258
|
+
### `list_audio_sources`
|
|
259
|
+
|
|
260
|
+
Enumerate available microphone input devices. System audio is captured
|
|
261
|
+
via the `capture` parameter rather than as a device. No input.
|
|
262
|
+
|
|
263
|
+
### `delete_session`
|
|
264
|
+
|
|
265
|
+
Permanently delete a session's WAV file and metadata. Refuses active
|
|
266
|
+
sessions.
|
|
267
|
+
|
|
268
|
+
---
|
|
269
|
+
|
|
270
|
+
## Example agent workflows
|
|
271
|
+
|
|
272
|
+
**Record and summarise a video call (mic + speakers):**
|
|
273
|
+
|
|
274
|
+
> 1. "Start a recording called 'standup' capturing both mic and system audio."
|
|
275
|
+
> 2. *(call happens…)*
|
|
276
|
+
> 3. "Stop the recording."
|
|
277
|
+
> 4. "Get the first 5 minutes and summarise what was discussed."
|
|
278
|
+
|
|
279
|
+
The agent calls `start_session` with `capture="both"`, then chunks
|
|
280
|
+
`get_audio` across the session. Because the file is stereo (L=mic,
|
|
281
|
+
R=system), the agent can reason about who said what.
|
|
282
|
+
|
|
283
|
+
**Dictate a voice memo (mic only):**
|
|
284
|
+
|
|
285
|
+
> "Start a mic-only recording called 'weekly plan'."
|
|
286
|
+
> "Stop the recording."
|
|
287
|
+
> "Analyse my weekly plan recording."
|
|
288
|
+
|
|
289
|
+
**Capture just system audio:**
|
|
290
|
+
|
|
291
|
+
> "Start a system-only recording called 'podcast clip'."
|
|
292
|
+
> "Stop it."
|
|
293
|
+
> "Get the audio and identify the speakers."
|
|
294
|
+
|
|
295
|
+
**Live monitoring (poll during recording):**
|
|
296
|
+
|
|
297
|
+
> "Start recording both mic and system."
|
|
298
|
+
> *(talk for 30 seconds)*
|
|
299
|
+
> "Get the last 10 seconds and tell me if I mentioned pricing."
|
|
300
|
+
> *(keep talking)*
|
|
301
|
+
> "Get the next 10 seconds."
|
|
302
|
+
> "Stop the recording."
|
|
303
|
+
|
|
304
|
+
---
|
|
305
|
+
|
|
306
|
+
## Audio format
|
|
307
|
+
|
|
308
|
+
| Parameter | Value |
|
|
309
|
+
| -------------- | ----------------- |
|
|
310
|
+
| Sample rate | 16,000 Hz |
|
|
311
|
+
| Channels | 1 (mic or system) / 2 (both, L=mic R=system) |
|
|
312
|
+
| Bit depth | 16-bit PCM |
|
|
313
|
+
| Container | WAV (RIFF) |
|
|
314
|
+
| Max session | No hard limit |
|
|
315
|
+
|
|
316
|
+
Sessions are streamed incrementally to disk — large recordings do not
|
|
317
|
+
load into memory.
|
|
318
|
+
|
|
319
|
+
---
|
|
320
|
+
|
|
321
|
+
## Storage layout
|
|
322
|
+
|
|
323
|
+
```
|
|
324
|
+
~/.audio-mcp/
|
|
325
|
+
├── config.json # user configuration
|
|
326
|
+
├── audio-mcp.log # rolling log, max 10 MB
|
|
327
|
+
└── sessions/
|
|
328
|
+
├── <uuid>.wav # audio file per session
|
|
329
|
+
└── <uuid>.json # session metadata
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
### `config.json`
|
|
333
|
+
|
|
334
|
+
```json
|
|
335
|
+
{
|
|
336
|
+
"default_source": null,
|
|
337
|
+
"default_capture_mode": "both",
|
|
338
|
+
"sample_rate": 16000,
|
|
339
|
+
"sessions_dir": null
|
|
340
|
+
}
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
All fields optional. `default_capture_mode` picks what
|
|
344
|
+
`start_session` defaults to when no `capture` is specified.
|
|
345
|
+
Sessions run for as long as you like — there is no built-in
|
|
346
|
+
time limit. Stop them explicitly with `stop_session`.
|
|
347
|
+
|
|
348
|
+
---
|
|
349
|
+
|
|
350
|
+
## Privacy
|
|
351
|
+
|
|
352
|
+
- The server has **no network code**. It only reads audio from the
|
|
353
|
+
helper subprocess, writes to `~/.audio-mcp/`, and talks to the MCP
|
|
354
|
+
client over stdio.
|
|
355
|
+
- Audio files are stored unencrypted on your local disk. Delete
|
|
356
|
+
sessions you no longer need with the `delete_session` tool.
|
|
357
|
+
- If you send audio from `get_audio` to a hosted model, that is under
|
|
358
|
+
the control of your agent / MCP client — not this server.
|
|
359
|
+
|
|
360
|
+
---
|
|
361
|
+
|
|
362
|
+
## Troubleshooting
|
|
363
|
+
|
|
364
|
+
**`AUDIO_DEVICE_ERROR` mentioning Microphone permission**
|
|
365
|
+
→ System Settings → Privacy & Security → Microphone → enable your MCP
|
|
366
|
+
client, then restart the client.
|
|
367
|
+
|
|
368
|
+
**`AUDIO_DEVICE_ERROR` mentioning Screen Recording permission**
|
|
369
|
+
→ System Settings → Privacy & Security → Screen Recording → enable
|
|
370
|
+
your MCP client, then **restart the client** (required by macOS).
|
|
371
|
+
|
|
372
|
+
**Gatekeeper blocks the helper binary anyway** (extremely rare — only
|
|
373
|
+
on offline machines or where Apple's notary service is unreachable)
|
|
374
|
+
→ `xattr -d com.apple.quarantine <path-to-audio-capture-helper>`, or
|
|
375
|
+
approve via System Settings → Privacy & Security.
|
|
376
|
+
|
|
377
|
+
**`CHUNK_TOO_LARGE`**
|
|
378
|
+
→ Split your request into ≤ 300-second slices.
|
|
379
|
+
|
|
380
|
+
**Session did not stop cleanly**
|
|
381
|
+
→ If the server process was killed mid-recording, the metadata file
|
|
382
|
+
may still have `is_active: true`. You can safely delete the session
|
|
383
|
+
JSON + WAV from `~/.audio-mcp/sessions/` by hand.
|
|
384
|
+
|
|
385
|
+
**System audio sounds silent**
|
|
386
|
+
→ On macOS, ScreenCaptureKit only captures audio that is actively
|
|
387
|
+
playing through the system output. If nothing's playing, the right
|
|
388
|
+
channel will be silent — that's expected.
|
|
389
|
+
|
|
390
|
+
---
|
|
391
|
+
|
|
392
|
+
## Contributing
|
|
393
|
+
|
|
394
|
+
Development setup:
|
|
395
|
+
|
|
396
|
+
```sh
|
|
397
|
+
npm install
|
|
398
|
+
npm run build:helper # requires Xcode command line tools for Swift
|
|
399
|
+
npm run build # tsc + helper build
|
|
400
|
+
npm test # Node/vitest tests
|
|
401
|
+
npm run test:swift # Swift/XCTest tests
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
Contributions welcome. Please open an issue first for anything beyond
|
|
405
|
+
a small fix.
|
|
406
|
+
|
|
407
|
+
---
|
|
408
|
+
|
|
409
|
+
## License
|
|
410
|
+
|
|
411
|
+
MIT — see `LICENSE`.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export type CaptureMode = "mic" | "system" | "both";
|
|
2
|
+
export interface CaptureOptions {
|
|
3
|
+
sampleRate: number;
|
|
4
|
+
/** 1 for mic/system, 2 for both. Must agree with captureMode. */
|
|
5
|
+
channels: number;
|
|
6
|
+
captureMode: CaptureMode;
|
|
7
|
+
/** Optional AVCaptureDevice uniqueID; undefined → system default input. */
|
|
8
|
+
micDeviceId?: string;
|
|
9
|
+
/** PID of the parent MCP client to exclude from system audio capture. */
|
|
10
|
+
excludePid?: number;
|
|
11
|
+
}
|
|
12
|
+
export interface DeviceSummary {
|
|
13
|
+
id: string;
|
|
14
|
+
name: string;
|
|
15
|
+
is_default: boolean;
|
|
16
|
+
channels: number;
|
|
17
|
+
sample_rates: number[];
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Produces raw PCM frames (Int16 little-endian, possibly interleaved
|
|
21
|
+
* stereo) to a consumer callback. One implementation (`HelperProcessCapture`)
|
|
22
|
+
* spawns the bundled Swift helper binary; tests swap in `MockCapture`.
|
|
23
|
+
*/
|
|
24
|
+
export interface CaptureDevice {
|
|
25
|
+
start(opts: CaptureOptions): Promise<void>;
|
|
26
|
+
onData(cb: (chunk: Buffer) => void): void;
|
|
27
|
+
stop(): Promise<void>;
|
|
28
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"capture.js","sourceRoot":"","sources":["../../src/audio/capture.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { DeviceSummary } from "./capture.js";
|
|
2
|
+
/**
|
|
3
|
+
* Enumerate input devices by invoking `audio-capture-helper list-devices`
|
|
4
|
+
* and parsing its JSON stdout. System audio is represented implicitly via
|
|
5
|
+
* the capture mode — it is not listed as a device.
|
|
6
|
+
*/
|
|
7
|
+
export declare function listInputDevices(): Promise<DeviceSummary[]>;
|
|
8
|
+
/**
|
|
9
|
+
* Resolve a user-supplied `source` (either a uniqueID or a case-insensitive
|
|
10
|
+
* substring of the device name) to a concrete device. Returns `undefined`
|
|
11
|
+
* as `deviceId` to mean "system default".
|
|
12
|
+
*/
|
|
13
|
+
export declare function resolveMicDevice(source?: string | null): Promise<{
|
|
14
|
+
deviceId: string | undefined;
|
|
15
|
+
deviceName: string;
|
|
16
|
+
}>;
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
2
|
+
import { promisify } from "node:util";
|
|
3
|
+
import { HELPER_BIN, verifyHelper } from "./helper-path.js";
|
|
4
|
+
import { AudioMcpError } from "../errors.js";
|
|
5
|
+
const execFileP = promisify(execFile);
|
|
6
|
+
/**
|
|
7
|
+
* Enumerate input devices by invoking `audio-capture-helper list-devices`
|
|
8
|
+
* and parsing its JSON stdout. System audio is represented implicitly via
|
|
9
|
+
* the capture mode — it is not listed as a device.
|
|
10
|
+
*/
|
|
11
|
+
export async function listInputDevices() {
|
|
12
|
+
const bad = verifyHelper();
|
|
13
|
+
if (bad) {
|
|
14
|
+
throw new AudioMcpError("AUDIO_DEVICE_ERROR", bad.message);
|
|
15
|
+
}
|
|
16
|
+
let stdout;
|
|
17
|
+
try {
|
|
18
|
+
const result = await execFileP(HELPER_BIN, ["list-devices"], {
|
|
19
|
+
timeout: 5000,
|
|
20
|
+
maxBuffer: 1 * 1024 * 1024,
|
|
21
|
+
});
|
|
22
|
+
stdout = result.stdout;
|
|
23
|
+
}
|
|
24
|
+
catch (err) {
|
|
25
|
+
throw new AudioMcpError("AUDIO_DEVICE_ERROR", `list-devices failed: ${err.message}`);
|
|
26
|
+
}
|
|
27
|
+
let parsed;
|
|
28
|
+
try {
|
|
29
|
+
parsed = JSON.parse(stdout);
|
|
30
|
+
}
|
|
31
|
+
catch (err) {
|
|
32
|
+
throw new AudioMcpError("AUDIO_DEVICE_ERROR", `list-devices output not JSON: ${err.message}`);
|
|
33
|
+
}
|
|
34
|
+
return parsed.input_devices.map((d) => ({
|
|
35
|
+
id: d.id,
|
|
36
|
+
name: d.name,
|
|
37
|
+
is_default: d.is_default,
|
|
38
|
+
channels: d.channels,
|
|
39
|
+
sample_rates: d.sample_rates,
|
|
40
|
+
}));
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Resolve a user-supplied `source` (either a uniqueID or a case-insensitive
|
|
44
|
+
* substring of the device name) to a concrete device. Returns `undefined`
|
|
45
|
+
* as `deviceId` to mean "system default".
|
|
46
|
+
*/
|
|
47
|
+
export async function resolveMicDevice(source) {
|
|
48
|
+
if (!source) {
|
|
49
|
+
const devices = await listInputDevices();
|
|
50
|
+
const def = devices.find((d) => d.is_default);
|
|
51
|
+
return { deviceId: undefined, deviceName: def?.name ?? "system default" };
|
|
52
|
+
}
|
|
53
|
+
const devices = await listInputDevices();
|
|
54
|
+
// Exact uniqueID match first.
|
|
55
|
+
const exact = devices.find((d) => d.id === source);
|
|
56
|
+
if (exact)
|
|
57
|
+
return { deviceId: exact.id, deviceName: exact.name };
|
|
58
|
+
const needle = source.toLowerCase();
|
|
59
|
+
const sub = devices.find((d) => d.name.toLowerCase().includes(needle));
|
|
60
|
+
if (!sub) {
|
|
61
|
+
throw new AudioMcpError("AUDIO_DEVICE_ERROR", `No input device matching "${source}"`);
|
|
62
|
+
}
|
|
63
|
+
return { deviceId: sub.id, deviceName: sub.name };
|
|
64
|
+
}
|
|
65
|
+
//# sourceMappingURL=devices.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"devices.js","sourceRoot":"","sources":["../../src/audio/devices.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,MAAM,SAAS,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AActC;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB;IACpC,MAAM,GAAG,GAAG,YAAY,EAAE,CAAC;IAC3B,IAAI,GAAG,EAAE,CAAC;QACR,MAAM,IAAI,aAAa,CAAC,oBAAoB,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;IAC7D,CAAC;IACD,IAAI,MAAc,CAAC;IACnB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,UAAU,EAAE,CAAC,cAAc,CAAC,EAAE;YAC3D,OAAO,EAAE,IAAI;YACb,SAAS,EAAE,CAAC,GAAG,IAAI,GAAG,IAAI;SAC3B,CAAC,CAAC;QACH,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;IACzB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,aAAa,CACrB,oBAAoB,EACpB,wBAAyB,GAAa,CAAC,OAAO,EAAE,CACjD,CAAC;IACJ,CAAC;IACD,IAAI,MAA+B,CAAC;IACpC,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAA4B,CAAC;IACzD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,aAAa,CACrB,oBAAoB,EACpB,iCAAkC,GAAa,CAAC,OAAO,EAAE,CAC1D,CAAC;IACJ,CAAC;IACD,OAAO,MAAM,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACtC,EAAE,EAAE,CAAC,CAAC,EAAE;QACR,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,YAAY,EAAE,CAAC,CAAC,YAAY;KAC7B,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,MAAsB;IAI3D,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,OAAO,GAAG,MAAM,gBAAgB,EAAE,CAAC;QACzC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;QAC9C,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,GAAG,EAAE,IAAI,IAAI,gBAAgB,EAAE,CAAC;IAC5E,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,gBAAgB,EAAE,CAAC;IACzC,8BAA8B;IAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,MAAM,CAAC,CAAC;IACnD,IAAI,KAAK;QAAE,OAAO,EAAE,QAAQ,EAAE,KAAK,CAAC,EAAE,EAAE,UAAU,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC;IACjE,MAAM,MAAM,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;IACvE,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,MAAM,IAAI,aAAa,CAAC,oBAAoB,EAAE,6BAA6B,MAAM,GAAG,CAAC,CAAC;IACxF,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,GAAG,CAAC,EAAE,EAAE,UAAU,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;AACpD,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type { CaptureDevice, CaptureOptions } from "./capture.js";
|
|
2
|
+
/**
|
|
3
|
+
* Spawns the bundled Swift audio-capture-helper as a child process and
|
|
4
|
+
* forwards its stdout (raw PCM) to an `onData` callback.
|
|
5
|
+
*
|
|
6
|
+
* Lifecycle:
|
|
7
|
+
* 1. `start()` verifies the binary exists, spawns it with the requested
|
|
8
|
+
* CLI flags, waits briefly to catch immediate permission/Gatekeeper
|
|
9
|
+
* failures, then returns.
|
|
10
|
+
* 2. While running, each `stdout` chunk is forwarded verbatim. stderr
|
|
11
|
+
* JSON-lines are held in a buffer and surfaced on error.
|
|
12
|
+
* 3. `stop()` sends SIGTERM and awaits the child's exit.
|
|
13
|
+
*/
|
|
14
|
+
export interface HelperProcessCaptureOptions {
|
|
15
|
+
/**
|
|
16
|
+
* Override the full `[executable, ...args]` prefix used to spawn the
|
|
17
|
+
* helper. Tests inject something like `["node", "/path/to/stub-helper.js"]`.
|
|
18
|
+
* Default: `[HELPER_BIN]` (the bundled Swift binary).
|
|
19
|
+
*/
|
|
20
|
+
command?: string[];
|
|
21
|
+
}
|
|
22
|
+
export declare class HelperProcessCapture implements CaptureDevice {
|
|
23
|
+
private child;
|
|
24
|
+
private dataCb;
|
|
25
|
+
private stderrBuf;
|
|
26
|
+
private exitPromise;
|
|
27
|
+
private readonly command;
|
|
28
|
+
constructor(opts?: HelperProcessCaptureOptions);
|
|
29
|
+
onData(cb: (chunk: Buffer) => void): void;
|
|
30
|
+
start(opts: CaptureOptions): Promise<void>;
|
|
31
|
+
private waitForEarlyFail;
|
|
32
|
+
stop(): Promise<void>;
|
|
33
|
+
private binaryErrorToAudioMcpError;
|
|
34
|
+
private translateExit;
|
|
35
|
+
private extractStderrMessage;
|
|
36
|
+
}
|