@moxxy/cli 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@moxxy/cli",
3
- "version": "1.3.1",
3
+ "version": "1.4.0",
4
4
  "description": "CLI for the Moxxy agentic framework — manage agents, skills, plugins, channels, and vaults from the terminal",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -50,5 +50,8 @@
50
50
  "marked": "^15.0.0",
51
51
  "react": "^19.0.0",
52
52
  "tsx": "^4.21.0"
53
+ },
54
+ "devDependencies": {
55
+ "react-devtools-core": "file:./stubs/react-devtools-core"
53
56
  }
54
57
  }
package/src/api-client.js CHANGED
@@ -127,6 +127,46 @@ export class ApiClient {
127
127
  return this.request(`/v1/agents/${encodeURIComponent(agentId)}/runs`, 'POST', { task });
128
128
  }
129
129
 
130
+ /**
131
+ * Upload a recorded voice clip to the gateway. The server transcribes it
132
+ * via the configured STT provider and immediately starts a run with the
133
+ * transcript as the task. Returns `{ transcript, run_id, status, ... }`.
134
+ */
135
+ async startRunWithAudio(agentId, { data, mime = 'audio/wav', filename = 'voice.wav' }) {
136
+ const form = new FormData();
137
+ const blob = new Blob([data], { type: mime });
138
+ form.append('audio', blob, filename);
139
+
140
+ const headers = {};
141
+ if (this.token) {
142
+ headers['authorization'] = `Bearer ${this.token}`;
143
+ }
144
+ // NOTE: do NOT set content-type — fetch will compute the multipart
145
+ // boundary for us.
146
+
147
+ const url = this.buildUrl(`/v1/agents/${encodeURIComponent(agentId)}/runs/audio`);
148
+ let resp;
149
+ try {
150
+ resp = await fetch(url, { method: 'POST', headers, body: form });
151
+ } catch (err) {
152
+ if (isConnectionError(err)) throw gatewayDownError();
153
+ throw err;
154
+ }
155
+ if (!resp.ok) {
156
+ const err = await resp.json().catch(() => ({
157
+ error: 'unknown',
158
+ message: resp.statusText,
159
+ }));
160
+ const error = new Error(err.message || `API error ${resp.status}`);
161
+ error.status = resp.status;
162
+ error.code = err.error;
163
+ throw error;
164
+ }
165
+ const text = await resp.text();
166
+ if (!text) return {};
167
+ return JSON.parse(text);
168
+ }
169
+
130
170
  async stopAgent(agentId) {
131
171
  return this.request(`/v1/agents/${encodeURIComponent(agentId)}/stop`, 'POST');
132
172
  }
@@ -296,6 +336,37 @@ export class ApiClient {
296
336
  async setAgentTemplate(name, template) {
297
337
  return this.request(`/v1/agents/${encodeURIComponent(name)}/template`, 'PATCH', { template });
298
338
  }
339
+
340
+ // --- Settings: Speech-to-text ---------------------------------------
341
+
342
+ /**
343
+ * Fetch the currently-active STT configuration from the gateway.
344
+ * Returns `{ enabled: false }` when voice messages are off, or
345
+ * `{ enabled: true, provider, model, secret_ref, ... }` otherwise.
346
+ * The API never returns the raw API key.
347
+ */
348
+ async getSttSettings() {
349
+ return this.request('/v1/settings/stt', 'GET');
350
+ }
351
+
352
+ /**
353
+ * Configure (or reconfigure) speech-to-text.
354
+ *
355
+ * Pass `api_key` to provision a fresh vault secret; omit it to reuse an
356
+ * existing `secret_ref`. The running gateway swaps providers in-place —
357
+ * no restart needed.
358
+ */
359
+ async updateSttSettings(body) {
360
+ return this.request('/v1/settings/stt', 'PUT', body);
361
+ }
362
+
363
+ /**
364
+ * Disable voice messages. Removes the `stt` block from settings.yaml
365
+ * and clears the in-memory provider. Does NOT delete the vault secret.
366
+ */
367
+ async deleteSttSettings() {
368
+ return this.request('/v1/settings/stt', 'DELETE');
369
+ }
299
370
  }
300
371
 
301
372
  export function createApiClient(baseUrl, token, authMode = 'token') {
package/src/cli.js CHANGED
@@ -94,8 +94,9 @@ Usage:
94
94
  moxxy chat [--agent <id>] Alias for tui
95
95
  moxxy events tail [--agent <id>] [--run <id>] [--json]
96
96
  moxxy settings network-mode [safe|unsafe] Get or set network mode
97
- moxxy settings get [--key <k>] [--json] View settings
98
- moxxy settings set --key <k> --value <v> Set a setting
97
+ moxxy settings stt [status|enable|disable] Configure voice messages (speech-to-text)
98
+ moxxy settings get [--key <k>] [--json] View settings
99
+ moxxy settings set --key <k> --value <v> Set a setting
99
100
  moxxy doctor Diagnose installation
100
101
  moxxy update [--check] [--force] [--json] Check for and install updates
101
102
  moxxy update --rollback Restore previous gateway version
@@ -807,9 +807,183 @@ export async function runInit(client, args) {
807
807
  }
808
808
  }
809
809
 
810
+ // Step 8: Voice messages (optional)
811
+ p.note(
812
+ 'Voice messages let users send audio to the agent on any channel\n' +
813
+ '(Telegram voice notes, the TUI /voice command, or direct audio upload\n' +
814
+ 'to the gateway). The audio is transcribed to text before the agent\n' +
815
+ 'sees it. The agent does not reply with voice.',
816
+ 'Voice Messages (Speech-to-Text)'
817
+ );
818
+
819
+ const enableVoice = await p.confirm({
820
+ message: 'Enable voice messages?',
821
+ initialValue: false,
822
+ });
823
+ handleCancel(enableVoice);
824
+
825
+ if (enableVoice) {
826
+ const sttProvider = await p.select({
827
+ message: 'Speech-to-text provider',
828
+ options: [
829
+ {
830
+ value: 'whisper',
831
+ label: 'OpenAI Whisper',
832
+ hint: 'Cloud API, requires an OpenAI key',
833
+ },
834
+ { value: '__skip__', label: 'Skip', hint: 'configure later' },
835
+ ],
836
+ });
837
+ handleCancel(sttProvider);
838
+
839
+ if (sttProvider === 'whisper') {
840
+ const configured = await configureWhisperStt(client, moxxyHome);
841
+ if (configured) {
842
+ p.log.success('Voice messages enabled (OpenAI Whisper).');
843
+ } else {
844
+ p.log.warn('Voice setup skipped. Retry later with: moxxy init');
845
+ }
846
+ }
847
+ }
848
+
810
849
  p.outro('Setup complete. Run moxxy to see available commands.');
811
850
  }
812
851
 
852
+ // ---------------------------------------------------------------------------
853
+ // Speech-to-text (voice message) helpers
854
+ // ---------------------------------------------------------------------------
855
+
856
+ const STT_WHISPER_BACKEND_KEY = 'moxxy_stt_whisper';
857
+ const STT_WHISPER_KEY_NAME = 'STT_WHISPER_API_KEY';
858
+ const OPENAI_PROVIDER_BACKEND_KEY = 'moxxy_provider_openai';
859
+
860
+ /**
861
+ * Configure Whisper STT: either reuse an existing OpenAI vault secret or
862
+ * prompt for a new key, then persist an `stt` block to settings.yaml.
863
+ * Returns true on success, false if the user bailed or storage failed.
864
+ */
865
+ async function configureWhisperStt(client, moxxyHome) {
866
+ // Look for an existing vault entry we can reuse. Prefer a secret already
867
+ // backing the OpenAI provider install so users don't enter the same key
868
+ // twice.
869
+ let reuseBackendKey = null;
870
+ try {
871
+ const secrets = await client.listSecrets();
872
+ const existing = (secrets || []).find(
873
+ (s) => s.backend_key === OPENAI_PROVIDER_BACKEND_KEY,
874
+ );
875
+ if (existing) {
876
+ const reuse = await p.confirm({
877
+ message: 'Reuse your existing OpenAI API key for Whisper?',
878
+ initialValue: true,
879
+ });
880
+ handleCancel(reuse);
881
+ if (reuse) reuseBackendKey = OPENAI_PROVIDER_BACKEND_KEY;
882
+ }
883
+ } catch (err) {
884
+ // Vault listing may fail if the gateway is down — fall through to prompt.
885
+ p.log.warn(`Could not check existing vault secrets: ${err.message}`);
886
+ }
887
+
888
+ let secretRef = reuseBackendKey;
889
+
890
+ if (!secretRef) {
891
+ const apiKey = await p.password({
892
+ message: 'Enter your OpenAI API key (used for Whisper transcription)',
893
+ validate: (val) => {
894
+ if (!val || !val.trim()) return 'API key cannot be empty';
895
+ },
896
+ });
897
+ handleCancel(apiKey);
898
+
899
+ try {
900
+ await withSpinner(
901
+ 'Storing API key in vault...',
902
+ async () => {
903
+ await client.createSecret({
904
+ key_name: STT_WHISPER_KEY_NAME,
905
+ backend_key: STT_WHISPER_BACKEND_KEY,
906
+ policy_label: 'stt-provider',
907
+ value: apiKey.trim(),
908
+ });
909
+ },
910
+ 'Whisper API key stored.',
911
+ );
912
+ secretRef = STT_WHISPER_BACKEND_KEY;
913
+ } catch (err) {
914
+ p.log.error(`Failed to store API key: ${err.message}`);
915
+ return false;
916
+ }
917
+ }
918
+
919
+ try {
920
+ saveSttSetting(moxxyHome, {
921
+ provider: 'whisper',
922
+ model: 'whisper-1',
923
+ secret_ref: secretRef,
924
+ });
925
+ } catch (err) {
926
+ p.log.error(`Failed to write settings.yaml: ${err.message}`);
927
+ return false;
928
+ }
929
+
930
+ return true;
931
+ }
932
+
933
+ /**
934
+ * Write (or clear) the `stt` block in `{moxxy_home}/settings.yaml`.
935
+ *
936
+ * Pass `null` to remove the block. Pass an object with at least `provider`,
937
+ * `model`, and `secret_ref` to write a fresh block. Any prior `stt:` block
938
+ * is removed in full — including nested indented child lines — before the
939
+ * new block is appended, so repeated runs don't accumulate stale entries.
940
+ */
941
+ export function saveSttSetting(moxxyHome, config) {
942
+ const settingsFile = join(moxxyHome, 'settings.yaml');
943
+
944
+ let existing = '';
945
+ try {
946
+ existing = readFileSync(settingsFile, 'utf-8');
947
+ } catch { /* no existing settings */ }
948
+
949
+ // Strip any previous `stt:` block. A block is the `stt:` line plus all
950
+ // subsequent indented (leading whitespace) lines — standard flow YAML.
951
+ const kept = [];
952
+ let inSttBlock = false;
953
+ for (const line of existing.split('\n')) {
954
+ if (inSttBlock) {
955
+ if (/^\s+\S/.test(line) || line.trim() === '') {
956
+ // indented child or blank line: still inside the block
957
+ if (line.trim() === '') {
958
+ inSttBlock = false;
959
+ kept.push(line);
960
+ }
961
+ continue;
962
+ }
963
+ inSttBlock = false;
964
+ }
965
+ if (/^stt:\s*$/.test(line) || /^stt:\s/.test(line)) {
966
+ inSttBlock = true;
967
+ continue;
968
+ }
969
+ kept.push(line);
970
+ }
971
+
972
+ // Drop trailing empty lines so we can cleanly append.
973
+ while (kept.length > 0 && kept[kept.length - 1].trim() === '') kept.pop();
974
+
975
+ if (config) {
976
+ kept.push('stt:');
977
+ kept.push(` provider: ${config.provider}`);
978
+ kept.push(` model: ${config.model}`);
979
+ kept.push(` secret_ref: ${config.secret_ref}`);
980
+ if (config.api_base) kept.push(` api_base: ${config.api_base}`);
981
+ }
982
+
983
+ mkdirSync(moxxyHome, { recursive: true });
984
+ writeFileSync(settingsFile, kept.join('\n') + '\n');
985
+ }
986
+
813
987
  // ---------------------------------------------------------------------------
814
988
  // Browser rendering helpers
815
989
  // ---------------------------------------------------------------------------
@@ -175,7 +175,103 @@ async function settingsBrowserRendering(flags) {
175
175
  }
176
176
  }
177
177
 
178
- export async function runSettings(_client, args) {
178
+ /**
179
+ * Speech-to-text (voice message) settings. Unlike network_mode and
180
+ * browser_rendering, STT is configured through the gateway's
181
+ * `/v1/settings/stt` API so the running bridge picks up the new provider
182
+ * without a restart AND the vault-stored API key is owned by the gateway.
183
+ */
184
+ async function settingsStt(client, flags, positional) {
185
+ if (!client) {
186
+ throw new Error('STT commands require a running gateway. Start it with: moxxy gateway start');
187
+ }
188
+
189
+ const sub = positional || 'status';
190
+
191
+ switch (sub) {
192
+ case 'status':
193
+ case 'get':
194
+ case 'show': {
195
+ const resp = await client.getSttSettings();
196
+ if (flags.json) {
197
+ console.log(JSON.stringify(resp, null, 2));
198
+ return;
199
+ }
200
+ if (!resp.enabled) {
201
+ p.log.info('Voice messages: disabled.');
202
+ p.log.info('Enable with: moxxy settings stt enable');
203
+ return;
204
+ }
205
+ p.log.info('Voice messages: enabled');
206
+ p.log.info(` provider: ${resp.provider}`);
207
+ p.log.info(` model: ${resp.model}`);
208
+ p.log.info(` secret_ref: ${resp.secret_ref}`);
209
+ if (resp.api_base) p.log.info(` api_base: ${resp.api_base}`);
210
+ p.log.info(` max_bytes: ${resp.max_bytes}`);
211
+ p.log.info(` max_seconds: ${resp.max_seconds}`);
212
+ return;
213
+ }
214
+
215
+ case 'enable':
216
+ case 'configure':
217
+ case 'set': {
218
+ // Non-interactive: `moxxy settings stt enable --api-key sk-... [--provider whisper] [--model whisper-1]`
219
+ const providerName = flags.provider || 'whisper';
220
+ const modelName = flags.model || 'whisper-1';
221
+ const apiBase = flags['api-base'] || flags.api_base || null;
222
+ let apiKey = flags['api-key'] || flags.api_key || null;
223
+ const secretRef = flags['secret-ref'] || flags.secret_ref || null;
224
+
225
+ if (!apiKey && !secretRef) {
226
+ if (!isInteractive()) {
227
+ throw new Error(
228
+ 'Provide --api-key <key>, or --secret-ref <backend_key> to reuse an existing vault entry.',
229
+ );
230
+ }
231
+ const keyInput = await p.password({
232
+ message: 'OpenAI API key for Whisper',
233
+ validate: (v) => {
234
+ if (!v || !v.trim()) return 'API key cannot be empty';
235
+ },
236
+ });
237
+ if (p.isCancel(keyInput)) return;
238
+ apiKey = keyInput;
239
+ }
240
+
241
+ const body = { provider: providerName, model: modelName };
242
+ if (apiKey) body.api_key = apiKey.trim();
243
+ if (apiBase) body.api_base = apiBase;
244
+ if (secretRef) body.secret_ref = secretRef;
245
+
246
+ const resp = await client.updateSttSettings(body);
247
+ if (flags.json) {
248
+ console.log(JSON.stringify(resp, null, 2));
249
+ } else {
250
+ p.log.success(`Voice messages enabled (${resp.provider}, ${resp.model}).`);
251
+ }
252
+ return;
253
+ }
254
+
255
+ case 'disable':
256
+ case 'off':
257
+ case 'clear': {
258
+ const resp = await client.deleteSttSettings();
259
+ if (flags.json) {
260
+ console.log(JSON.stringify(resp, null, 2));
261
+ } else {
262
+ p.log.success('Voice messages disabled.');
263
+ }
264
+ return;
265
+ }
266
+
267
+ default:
268
+ throw new Error(
269
+ `Unknown stt action '${sub}'. Use: status | enable [--api-key <key>] | disable`,
270
+ );
271
+ }
272
+ }
273
+
274
+ export async function runSettings(client, args) {
179
275
  const { action, flags } = parseSettingsCommand(args);
180
276
 
181
277
  // Collect first positional arg after the action for convenience
@@ -198,6 +294,10 @@ export async function runSettings(_client, args) {
198
294
  case 'browser-rendering':
199
295
  await settingsBrowserRendering(flags);
200
296
  break;
297
+ case 'stt':
298
+ case 'voice':
299
+ await settingsStt(client, flags, flags._positional);
300
+ break;
201
301
  default:
202
302
  if (isInteractive() && !action) {
203
303
  // Interactive: show settings menu
@@ -206,18 +306,20 @@ export async function runSettings(_client, args) {
206
306
  options: [
207
307
  { value: 'network-mode', label: 'Network mode', hint: 'safe / unsafe domain access' },
208
308
  { value: 'browser-rendering', label: 'Browser rendering', hint: 'headless Chrome for JS-heavy sites' },
309
+ { value: 'stt', label: 'Voice (STT)', hint: 'speech-to-text provider' },
209
310
  { value: 'get', label: 'View all settings', hint: 'show current configuration' },
210
311
  ],
211
312
  });
212
313
  if (p.isCancel(selected)) return;
213
- await runSettings(_client, [selected]);
314
+ await runSettings(client, [selected]);
214
315
  } else {
215
316
  throw new Error(
216
317
  'Usage: moxxy settings <action>\n' +
217
- ' network-mode [safe|unsafe] Get or set network mode\n' +
218
- ' browser-rendering [true|false] Enable/disable headless Chrome rendering\n' +
219
- ' get [--key <k>] View settings\n' +
220
- ' set --key <k> --value <v> Set a setting'
318
+ ' network-mode [safe|unsafe] Get or set network mode\n' +
319
+ ' browser-rendering [true|false] Enable/disable headless Chrome rendering\n' +
320
+ ' stt [status|enable|disable] [--api-key <key>] Configure voice messages (speech-to-text)\n' +
321
+ ' get [--key <k>] View settings\n' +
322
+ ' set --key <k> --value <v> Set a setting'
221
323
  );
222
324
  }
223
325
  }
@@ -1,5 +1,6 @@
1
- import { useReducer, useCallback } from 'react';
1
+ import { useReducer, useCallback, useRef } from 'react';
2
2
  import { SLASH_COMMANDS } from '../slash-commands.js';
3
+ import { startRecording } from '../voice-recorder.js';
3
4
 
4
5
  const INITIAL_STATE = { type: 'idle' };
5
6
 
@@ -23,6 +24,8 @@ function reducer(state, action) {
23
24
  return { type: 'mcp_test_id' };
24
25
  case 'template_assign_slug':
25
26
  return { type: 'template_assign_slug' };
27
+ case 'voice_recording':
28
+ return { type: 'voice_recording' };
26
29
  case 'reset':
27
30
  return INITIAL_STATE;
28
31
  default:
@@ -54,9 +57,49 @@ export function useCommandHandler({
54
57
  onOpenTemplateAssignWizard,
55
58
  }) {
56
59
  const [twoStep, dispatch] = useReducer(reducer, INITIAL_STATE);
60
+ const voiceHandleRef = useRef(null);
57
61
 
58
62
  const handleSubmit = useCallback(async (text) => {
59
63
  const task = text.trim().replace(/^\/{2,}/, '/');
64
+
65
+ // While a recording is active, ANY submit (including bare Enter) stops
66
+ // it and ships the clip. This must run before the empty-text early return
67
+ // below so hitting Enter with no text still ends the capture.
68
+ if (twoStep.type === 'voice_recording') {
69
+ const handle = voiceHandleRef.current;
70
+ dispatch({ type: 'reset' });
71
+ voiceHandleRef.current = null;
72
+ if (!handle) {
73
+ eventsHandler.addSystemMessage('No active recording.');
74
+ return;
75
+ }
76
+ try {
77
+ const clip = await handle.stop();
78
+ eventsHandler.addSystemMessage('Transcribing voice message…');
79
+ if (!agent) {
80
+ eventsHandler.addSystemMessage('No agent connected. Cannot run task.');
81
+ return;
82
+ }
83
+ try {
84
+ const result = await client.startRunWithAudio(agent.name, clip);
85
+ const transcript = (result && result.transcript) || '[voice]';
86
+ eventsHandler.addUserMessage(transcript);
87
+ if (onAgentUpdate) onAgentUpdate({ status: 'running' });
88
+ } catch (err) {
89
+ if (err.isGatewayDown) {
90
+ eventsHandler.addSystemMessage(err.message);
91
+ } else {
92
+ eventsHandler.addSystemMessage(`Voice error: ${err.message}`);
93
+ }
94
+ }
95
+ } catch (err) {
96
+ eventsHandler.addSystemMessage(`Recording failed: ${err.message}`);
97
+ } finally {
98
+ handle.cleanup();
99
+ }
100
+ return;
101
+ }
102
+
60
103
  if (!task) return;
61
104
 
62
105
  // Pending ask: agent asked for user input
@@ -419,6 +462,25 @@ export function useCommandHandler({
419
462
  }
420
463
  return;
421
464
  }
465
+ if (task === '/voice') {
466
+ if (voiceHandleRef.current) {
467
+ // Defensive: treat a second /voice as a stop even if state drifted.
468
+ dispatch({ type: 'voice_recording' });
469
+ return;
470
+ }
471
+ try {
472
+ const handle = await startRecording();
473
+ voiceHandleRef.current = handle;
474
+ dispatch({ type: 'voice_recording' });
475
+ eventsHandler.addSystemMessage(
476
+ `Recording (${handle.tool})… press Enter or /voice again to stop.`,
477
+ );
478
+ } catch (err) {
479
+ eventsHandler.addSystemMessage(`Cannot record voice: ${err.message}`);
480
+ }
481
+ return;
482
+ }
483
+
422
484
  if (task === '/template clear') {
423
485
  try {
424
486
  await client.setAgentTemplate(agentId, null);
@@ -9,6 +9,7 @@ export const SLASH_COMMANDS = [
9
9
  { name: '/vault', description: 'Open vault actions', aliases: ['/vault delete'] },
10
10
  { name: '/mcp', description: 'Open MCP actions', aliases: [] },
11
11
  { name: '/template', description: 'Open template actions', aliases: [] },
12
+ { name: '/voice', description: 'Record a voice message (needs sox or ffmpeg)', aliases: [] },
12
13
  ];
13
14
 
14
15
  export function matchCommands(input) {
@@ -0,0 +1,117 @@
1
+ import { spawn } from 'node:child_process';
2
+ import fs from 'node:fs';
3
+ import os from 'node:os';
4
+ import path from 'node:path';
5
+ import { promisify } from 'node:util';
6
+ import { execFile } from 'node:child_process';
7
+
8
+ const execFileP = promisify(execFile);
9
+
10
+ /**
11
+ * Check whether a binary is on PATH. Returns the absolute path, or null.
12
+ */
13
+ async function which(name) {
14
+ try {
15
+ const { stdout } = await execFileP('which', [name]);
16
+ const p = stdout.trim();
17
+ return p || null;
18
+ } catch {
19
+ return null;
20
+ }
21
+ }
22
+
23
+ /**
24
+ * Detect an available recording tool. Prefers `sox` (via `rec`) because it
25
+ * speaks WAV out of the box and handles Ctrl-C gracefully. Falls back to
26
+ * `ffmpeg` with a platform-appropriate input device. Returns `null` if
27
+ * neither is present.
28
+ */
29
+ export async function detectRecorder() {
30
+ const rec = await which('rec');
31
+ if (rec) return { tool: 'rec', bin: rec };
32
+ const sox = await which('sox');
33
+ if (sox) return { tool: 'sox', bin: sox };
34
+ const ffmpeg = await which('ffmpeg');
35
+ if (ffmpeg) return { tool: 'ffmpeg', bin: ffmpeg };
36
+ return null;
37
+ }
38
+
39
+ function ffmpegArgs(outPath) {
40
+ const platform = process.platform;
41
+ if (platform === 'darwin') {
42
+ // `avfoundation` default audio input is `:0`.
43
+ return ['-loglevel', 'error', '-f', 'avfoundation', '-i', ':0', '-ac', '1', '-ar', '16000', '-y', outPath];
44
+ }
45
+ // Linux: assume ALSA `default` — user can symlink their own if needed.
46
+ return ['-loglevel', 'error', '-f', 'alsa', '-i', 'default', '-ac', '1', '-ar', '16000', '-y', outPath];
47
+ }
48
+
49
+ /**
50
+ * Start a recording. Returns a handle with `stop()` that resolves to
51
+ * `{ path, data, mime }`. The caller owns cleanup of the temp file via
52
+ * `cleanup()`.
53
+ *
54
+ * The audio is written to a platform temp file so that even if the recorder
55
+ * dies mid-stream we never lose the buffer.
56
+ */
57
+ export async function startRecording() {
58
+ const recorder = await detectRecorder();
59
+ if (!recorder) {
60
+ throw new Error('No recorder found. Install `sox` (recommended) or `ffmpeg`.');
61
+ }
62
+
63
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'moxxy-voice-'));
64
+ const outPath = path.join(tmpDir, 'voice.wav');
65
+
66
+ let args;
67
+ if (recorder.tool === 'rec' || recorder.tool === 'sox') {
68
+ // `rec` is sox with sensible defaults; `sox` requires `-d` for default input.
69
+ args = recorder.tool === 'rec'
70
+ ? ['-q', '-c', '1', '-r', '16000', outPath]
71
+ : ['-q', '-d', '-c', '1', '-r', '16000', outPath];
72
+ } else {
73
+ args = ffmpegArgs(outPath);
74
+ }
75
+
76
+ const child = spawn(recorder.bin, args, { stdio: ['ignore', 'ignore', 'pipe'] });
77
+ let stderr = '';
78
+ child.stderr.on('data', chunk => {
79
+ stderr += chunk.toString();
80
+ });
81
+
82
+ let exited = false;
83
+ const exitPromise = new Promise((resolve) => {
84
+ child.on('exit', (code, signal) => {
85
+ exited = true;
86
+ resolve({ code, signal });
87
+ });
88
+ });
89
+
90
+ return {
91
+ tool: recorder.tool,
92
+ outPath,
93
+ async stop() {
94
+ if (!exited) {
95
+ // SIGINT is important: ffmpeg and sox both flush the output file
96
+ // cleanly on SIGINT. SIGTERM/KILL can leave a truncated WAV header.
97
+ try { child.kill('SIGINT'); } catch {}
98
+ }
99
+ await exitPromise;
100
+
101
+ if (!fs.existsSync(outPath)) {
102
+ throw new Error(`Recorder produced no output file. stderr: ${stderr.trim() || '<empty>'}`);
103
+ }
104
+ const data = fs.readFileSync(outPath);
105
+ if (data.length < 44) {
106
+ // 44 bytes is the minimum WAV header.
107
+ throw new Error('Recording too short or empty.');
108
+ }
109
+ return { path: outPath, data, mime: 'audio/wav', filename: 'voice.wav' };
110
+ },
111
+ cleanup() {
112
+ try {
113
+ fs.rmSync(tmpDir, { recursive: true, force: true });
114
+ } catch {}
115
+ },
116
+ };
117
+ }