@moxxy/cli 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -1
- package/src/api-client.js +71 -0
- package/src/cli.js +3 -2
- package/src/commands/init.js +174 -0
- package/src/commands/settings.js +108 -6
- package/src/tui/hooks/use-command-handler.js +63 -1
- package/src/tui/slash-commands.js +1 -0
- package/src/tui/voice-recorder.js +117 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@moxxy/cli",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.4.0",
|
|
4
4
|
"description": "CLI for the Moxxy agentic framework — manage agents, skills, plugins, channels, and vaults from the terminal",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -50,5 +50,8 @@
|
|
|
50
50
|
"marked": "^15.0.0",
|
|
51
51
|
"react": "^19.0.0",
|
|
52
52
|
"tsx": "^4.21.0"
|
|
53
|
+
},
|
|
54
|
+
"devDependencies": {
|
|
55
|
+
"react-devtools-core": "file:./stubs/react-devtools-core"
|
|
53
56
|
}
|
|
54
57
|
}
|
package/src/api-client.js
CHANGED
|
@@ -127,6 +127,46 @@ export class ApiClient {
|
|
|
127
127
|
return this.request(`/v1/agents/${encodeURIComponent(agentId)}/runs`, 'POST', { task });
|
|
128
128
|
}
|
|
129
129
|
|
|
130
|
+
/**
|
|
131
|
+
* Upload a recorded voice clip to the gateway. The server transcribes it
|
|
132
|
+
* via the configured STT provider and immediately starts a run with the
|
|
133
|
+
* transcript as the task. Returns `{ transcript, run_id, status, ... }`.
|
|
134
|
+
*/
|
|
135
|
+
async startRunWithAudio(agentId, { data, mime = 'audio/wav', filename = 'voice.wav' }) {
|
|
136
|
+
const form = new FormData();
|
|
137
|
+
const blob = new Blob([data], { type: mime });
|
|
138
|
+
form.append('audio', blob, filename);
|
|
139
|
+
|
|
140
|
+
const headers = {};
|
|
141
|
+
if (this.token) {
|
|
142
|
+
headers['authorization'] = `Bearer ${this.token}`;
|
|
143
|
+
}
|
|
144
|
+
// NOTE: do NOT set content-type — fetch will compute the multipart
|
|
145
|
+
// boundary for us.
|
|
146
|
+
|
|
147
|
+
const url = this.buildUrl(`/v1/agents/${encodeURIComponent(agentId)}/runs/audio`);
|
|
148
|
+
let resp;
|
|
149
|
+
try {
|
|
150
|
+
resp = await fetch(url, { method: 'POST', headers, body: form });
|
|
151
|
+
} catch (err) {
|
|
152
|
+
if (isConnectionError(err)) throw gatewayDownError();
|
|
153
|
+
throw err;
|
|
154
|
+
}
|
|
155
|
+
if (!resp.ok) {
|
|
156
|
+
const err = await resp.json().catch(() => ({
|
|
157
|
+
error: 'unknown',
|
|
158
|
+
message: resp.statusText,
|
|
159
|
+
}));
|
|
160
|
+
const error = new Error(err.message || `API error ${resp.status}`);
|
|
161
|
+
error.status = resp.status;
|
|
162
|
+
error.code = err.error;
|
|
163
|
+
throw error;
|
|
164
|
+
}
|
|
165
|
+
const text = await resp.text();
|
|
166
|
+
if (!text) return {};
|
|
167
|
+
return JSON.parse(text);
|
|
168
|
+
}
|
|
169
|
+
|
|
130
170
|
async stopAgent(agentId) {
|
|
131
171
|
return this.request(`/v1/agents/${encodeURIComponent(agentId)}/stop`, 'POST');
|
|
132
172
|
}
|
|
@@ -296,6 +336,37 @@ export class ApiClient {
|
|
|
296
336
|
async setAgentTemplate(name, template) {
|
|
297
337
|
return this.request(`/v1/agents/${encodeURIComponent(name)}/template`, 'PATCH', { template });
|
|
298
338
|
}
|
|
339
|
+
|
|
340
|
+
// --- Settings: Speech-to-text ---------------------------------------
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Fetch the currently-active STT configuration from the gateway.
|
|
344
|
+
* Returns `{ enabled: false }` when voice messages are off, or
|
|
345
|
+
* `{ enabled: true, provider, model, secret_ref, ... }` otherwise.
|
|
346
|
+
* The API never returns the raw API key.
|
|
347
|
+
*/
|
|
348
|
+
async getSttSettings() {
|
|
349
|
+
return this.request('/v1/settings/stt', 'GET');
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Configure (or reconfigure) speech-to-text.
|
|
354
|
+
*
|
|
355
|
+
* Pass `api_key` to provision a fresh vault secret; omit it to reuse an
|
|
356
|
+
* existing `secret_ref`. The running gateway swaps providers in-place —
|
|
357
|
+
* no restart needed.
|
|
358
|
+
*/
|
|
359
|
+
async updateSttSettings(body) {
|
|
360
|
+
return this.request('/v1/settings/stt', 'PUT', body);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Disable voice messages. Removes the `stt` block from settings.yaml
|
|
365
|
+
* and clears the in-memory provider. Does NOT delete the vault secret.
|
|
366
|
+
*/
|
|
367
|
+
async deleteSttSettings() {
|
|
368
|
+
return this.request('/v1/settings/stt', 'DELETE');
|
|
369
|
+
}
|
|
299
370
|
}
|
|
300
371
|
|
|
301
372
|
export function createApiClient(baseUrl, token, authMode = 'token') {
|
package/src/cli.js
CHANGED
|
@@ -94,8 +94,9 @@ Usage:
|
|
|
94
94
|
moxxy chat [--agent <id>] Alias for tui
|
|
95
95
|
moxxy events tail [--agent <id>] [--run <id>] [--json]
|
|
96
96
|
moxxy settings network-mode [safe|unsafe] Get or set network mode
|
|
97
|
-
moxxy settings
|
|
98
|
-
moxxy settings
|
|
97
|
+
moxxy settings stt [status|enable|disable] Configure voice messages (speech-to-text)
|
|
98
|
+
moxxy settings get [--key <k>] [--json] View settings
|
|
99
|
+
moxxy settings set --key <k> --value <v> Set a setting
|
|
99
100
|
moxxy doctor Diagnose installation
|
|
100
101
|
moxxy update [--check] [--force] [--json] Check for and install updates
|
|
101
102
|
moxxy update --rollback Restore previous gateway version
|
package/src/commands/init.js
CHANGED
|
@@ -807,9 +807,183 @@ export async function runInit(client, args) {
|
|
|
807
807
|
}
|
|
808
808
|
}
|
|
809
809
|
|
|
810
|
+
// Step 8: Voice messages (optional)
|
|
811
|
+
p.note(
|
|
812
|
+
'Voice messages let users send audio to the agent on any channel\n' +
|
|
813
|
+
'(Telegram voice notes, the TUI /voice command, or direct audio upload\n' +
|
|
814
|
+
'to the gateway). The audio is transcribed to text before the agent\n' +
|
|
815
|
+
'sees it. The agent does not reply with voice.',
|
|
816
|
+
'Voice Messages (Speech-to-Text)'
|
|
817
|
+
);
|
|
818
|
+
|
|
819
|
+
const enableVoice = await p.confirm({
|
|
820
|
+
message: 'Enable voice messages?',
|
|
821
|
+
initialValue: false,
|
|
822
|
+
});
|
|
823
|
+
handleCancel(enableVoice);
|
|
824
|
+
|
|
825
|
+
if (enableVoice) {
|
|
826
|
+
const sttProvider = await p.select({
|
|
827
|
+
message: 'Speech-to-text provider',
|
|
828
|
+
options: [
|
|
829
|
+
{
|
|
830
|
+
value: 'whisper',
|
|
831
|
+
label: 'OpenAI Whisper',
|
|
832
|
+
hint: 'Cloud API, requires an OpenAI key',
|
|
833
|
+
},
|
|
834
|
+
{ value: '__skip__', label: 'Skip', hint: 'configure later' },
|
|
835
|
+
],
|
|
836
|
+
});
|
|
837
|
+
handleCancel(sttProvider);
|
|
838
|
+
|
|
839
|
+
if (sttProvider === 'whisper') {
|
|
840
|
+
const configured = await configureWhisperStt(client, moxxyHome);
|
|
841
|
+
if (configured) {
|
|
842
|
+
p.log.success('Voice messages enabled (OpenAI Whisper).');
|
|
843
|
+
} else {
|
|
844
|
+
p.log.warn('Voice setup skipped. Retry later with: moxxy init');
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
|
|
810
849
|
p.outro('Setup complete. Run moxxy to see available commands.');
|
|
811
850
|
}
|
|
812
851
|
|
|
852
|
+
// ---------------------------------------------------------------------------
|
|
853
|
+
// Speech-to-text (voice message) helpers
|
|
854
|
+
// ---------------------------------------------------------------------------
|
|
855
|
+
|
|
856
|
+
const STT_WHISPER_BACKEND_KEY = 'moxxy_stt_whisper';
|
|
857
|
+
const STT_WHISPER_KEY_NAME = 'STT_WHISPER_API_KEY';
|
|
858
|
+
const OPENAI_PROVIDER_BACKEND_KEY = 'moxxy_provider_openai';
|
|
859
|
+
|
|
860
|
+
/**
|
|
861
|
+
* Configure Whisper STT: either reuse an existing OpenAI vault secret or
|
|
862
|
+
* prompt for a new key, then persist an `stt` block to settings.yaml.
|
|
863
|
+
* Returns true on success, false if the user bailed or storage failed.
|
|
864
|
+
*/
|
|
865
|
+
async function configureWhisperStt(client, moxxyHome) {
|
|
866
|
+
// Look for an existing vault entry we can reuse. Prefer a secret already
|
|
867
|
+
// backing the OpenAI provider install so users don't enter the same key
|
|
868
|
+
// twice.
|
|
869
|
+
let reuseBackendKey = null;
|
|
870
|
+
try {
|
|
871
|
+
const secrets = await client.listSecrets();
|
|
872
|
+
const existing = (secrets || []).find(
|
|
873
|
+
(s) => s.backend_key === OPENAI_PROVIDER_BACKEND_KEY,
|
|
874
|
+
);
|
|
875
|
+
if (existing) {
|
|
876
|
+
const reuse = await p.confirm({
|
|
877
|
+
message: 'Reuse your existing OpenAI API key for Whisper?',
|
|
878
|
+
initialValue: true,
|
|
879
|
+
});
|
|
880
|
+
handleCancel(reuse);
|
|
881
|
+
if (reuse) reuseBackendKey = OPENAI_PROVIDER_BACKEND_KEY;
|
|
882
|
+
}
|
|
883
|
+
} catch (err) {
|
|
884
|
+
// Vault listing may fail if the gateway is down — fall through to prompt.
|
|
885
|
+
p.log.warn(`Could not check existing vault secrets: ${err.message}`);
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
let secretRef = reuseBackendKey;
|
|
889
|
+
|
|
890
|
+
if (!secretRef) {
|
|
891
|
+
const apiKey = await p.password({
|
|
892
|
+
message: 'Enter your OpenAI API key (used for Whisper transcription)',
|
|
893
|
+
validate: (val) => {
|
|
894
|
+
if (!val || !val.trim()) return 'API key cannot be empty';
|
|
895
|
+
},
|
|
896
|
+
});
|
|
897
|
+
handleCancel(apiKey);
|
|
898
|
+
|
|
899
|
+
try {
|
|
900
|
+
await withSpinner(
|
|
901
|
+
'Storing API key in vault...',
|
|
902
|
+
async () => {
|
|
903
|
+
await client.createSecret({
|
|
904
|
+
key_name: STT_WHISPER_KEY_NAME,
|
|
905
|
+
backend_key: STT_WHISPER_BACKEND_KEY,
|
|
906
|
+
policy_label: 'stt-provider',
|
|
907
|
+
value: apiKey.trim(),
|
|
908
|
+
});
|
|
909
|
+
},
|
|
910
|
+
'Whisper API key stored.',
|
|
911
|
+
);
|
|
912
|
+
secretRef = STT_WHISPER_BACKEND_KEY;
|
|
913
|
+
} catch (err) {
|
|
914
|
+
p.log.error(`Failed to store API key: ${err.message}`);
|
|
915
|
+
return false;
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
try {
|
|
920
|
+
saveSttSetting(moxxyHome, {
|
|
921
|
+
provider: 'whisper',
|
|
922
|
+
model: 'whisper-1',
|
|
923
|
+
secret_ref: secretRef,
|
|
924
|
+
});
|
|
925
|
+
} catch (err) {
|
|
926
|
+
p.log.error(`Failed to write settings.yaml: ${err.message}`);
|
|
927
|
+
return false;
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
return true;
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
/**
|
|
934
|
+
* Write (or clear) the `stt` block in `{moxxy_home}/settings.yaml`.
|
|
935
|
+
*
|
|
936
|
+
* Pass `null` to remove the block. Pass an object with at least `provider`,
|
|
937
|
+
* `model`, and `secret_ref` to write a fresh block. Any prior `stt:` block
|
|
938
|
+
* is removed in full — including nested indented child lines — before the
|
|
939
|
+
* new block is appended, so repeated runs don't accumulate stale entries.
|
|
940
|
+
*/
|
|
941
|
+
export function saveSttSetting(moxxyHome, config) {
|
|
942
|
+
const settingsFile = join(moxxyHome, 'settings.yaml');
|
|
943
|
+
|
|
944
|
+
let existing = '';
|
|
945
|
+
try {
|
|
946
|
+
existing = readFileSync(settingsFile, 'utf-8');
|
|
947
|
+
} catch { /* no existing settings */ }
|
|
948
|
+
|
|
949
|
+
// Strip any previous `stt:` block. A block is the `stt:` line plus all
|
|
950
|
+
// subsequent indented (leading whitespace) lines — standard flow YAML.
|
|
951
|
+
const kept = [];
|
|
952
|
+
let inSttBlock = false;
|
|
953
|
+
for (const line of existing.split('\n')) {
|
|
954
|
+
if (inSttBlock) {
|
|
955
|
+
if (/^\s+\S/.test(line) || line.trim() === '') {
|
|
956
|
+
// indented child or blank line: still inside the block
|
|
957
|
+
if (line.trim() === '') {
|
|
958
|
+
inSttBlock = false;
|
|
959
|
+
kept.push(line);
|
|
960
|
+
}
|
|
961
|
+
continue;
|
|
962
|
+
}
|
|
963
|
+
inSttBlock = false;
|
|
964
|
+
}
|
|
965
|
+
if (/^stt:\s*$/.test(line) || /^stt:\s/.test(line)) {
|
|
966
|
+
inSttBlock = true;
|
|
967
|
+
continue;
|
|
968
|
+
}
|
|
969
|
+
kept.push(line);
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
// Drop trailing empty lines so we can cleanly append.
|
|
973
|
+
while (kept.length > 0 && kept[kept.length - 1].trim() === '') kept.pop();
|
|
974
|
+
|
|
975
|
+
if (config) {
|
|
976
|
+
kept.push('stt:');
|
|
977
|
+
kept.push(` provider: ${config.provider}`);
|
|
978
|
+
kept.push(` model: ${config.model}`);
|
|
979
|
+
kept.push(` secret_ref: ${config.secret_ref}`);
|
|
980
|
+
if (config.api_base) kept.push(` api_base: ${config.api_base}`);
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
mkdirSync(moxxyHome, { recursive: true });
|
|
984
|
+
writeFileSync(settingsFile, kept.join('\n') + '\n');
|
|
985
|
+
}
|
|
986
|
+
|
|
813
987
|
// ---------------------------------------------------------------------------
|
|
814
988
|
// Browser rendering helpers
|
|
815
989
|
// ---------------------------------------------------------------------------
|
package/src/commands/settings.js
CHANGED
|
@@ -175,7 +175,103 @@ async function settingsBrowserRendering(flags) {
|
|
|
175
175
|
}
|
|
176
176
|
}
|
|
177
177
|
|
|
178
|
-
|
|
178
|
+
/**
|
|
179
|
+
* Speech-to-text (voice message) settings. Unlike network_mode and
|
|
180
|
+
* browser_rendering, STT is configured through the gateway's
|
|
181
|
+
* `/v1/settings/stt` API so the running bridge picks up the new provider
|
|
182
|
+
* without a restart AND the vault-stored API key is owned by the gateway.
|
|
183
|
+
*/
|
|
184
|
+
async function settingsStt(client, flags, positional) {
|
|
185
|
+
if (!client) {
|
|
186
|
+
throw new Error('STT commands require a running gateway. Start it with: moxxy gateway start');
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const sub = positional || 'status';
|
|
190
|
+
|
|
191
|
+
switch (sub) {
|
|
192
|
+
case 'status':
|
|
193
|
+
case 'get':
|
|
194
|
+
case 'show': {
|
|
195
|
+
const resp = await client.getSttSettings();
|
|
196
|
+
if (flags.json) {
|
|
197
|
+
console.log(JSON.stringify(resp, null, 2));
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
if (!resp.enabled) {
|
|
201
|
+
p.log.info('Voice messages: disabled.');
|
|
202
|
+
p.log.info('Enable with: moxxy settings stt enable');
|
|
203
|
+
return;
|
|
204
|
+
}
|
|
205
|
+
p.log.info('Voice messages: enabled');
|
|
206
|
+
p.log.info(` provider: ${resp.provider}`);
|
|
207
|
+
p.log.info(` model: ${resp.model}`);
|
|
208
|
+
p.log.info(` secret_ref: ${resp.secret_ref}`);
|
|
209
|
+
if (resp.api_base) p.log.info(` api_base: ${resp.api_base}`);
|
|
210
|
+
p.log.info(` max_bytes: ${resp.max_bytes}`);
|
|
211
|
+
p.log.info(` max_seconds: ${resp.max_seconds}`);
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
case 'enable':
|
|
216
|
+
case 'configure':
|
|
217
|
+
case 'set': {
|
|
218
|
+
// Non-interactive: `moxxy settings stt enable --api-key sk-... [--provider whisper] [--model whisper-1]`
|
|
219
|
+
const providerName = flags.provider || 'whisper';
|
|
220
|
+
const modelName = flags.model || 'whisper-1';
|
|
221
|
+
const apiBase = flags['api-base'] || flags.api_base || null;
|
|
222
|
+
let apiKey = flags['api-key'] || flags.api_key || null;
|
|
223
|
+
const secretRef = flags['secret-ref'] || flags.secret_ref || null;
|
|
224
|
+
|
|
225
|
+
if (!apiKey && !secretRef) {
|
|
226
|
+
if (!isInteractive()) {
|
|
227
|
+
throw new Error(
|
|
228
|
+
'Provide --api-key <key>, or --secret-ref <backend_key> to reuse an existing vault entry.',
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
const keyInput = await p.password({
|
|
232
|
+
message: 'OpenAI API key for Whisper',
|
|
233
|
+
validate: (v) => {
|
|
234
|
+
if (!v || !v.trim()) return 'API key cannot be empty';
|
|
235
|
+
},
|
|
236
|
+
});
|
|
237
|
+
if (p.isCancel(keyInput)) return;
|
|
238
|
+
apiKey = keyInput;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
const body = { provider: providerName, model: modelName };
|
|
242
|
+
if (apiKey) body.api_key = apiKey.trim();
|
|
243
|
+
if (apiBase) body.api_base = apiBase;
|
|
244
|
+
if (secretRef) body.secret_ref = secretRef;
|
|
245
|
+
|
|
246
|
+
const resp = await client.updateSttSettings(body);
|
|
247
|
+
if (flags.json) {
|
|
248
|
+
console.log(JSON.stringify(resp, null, 2));
|
|
249
|
+
} else {
|
|
250
|
+
p.log.success(`Voice messages enabled (${resp.provider}, ${resp.model}).`);
|
|
251
|
+
}
|
|
252
|
+
return;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
case 'disable':
|
|
256
|
+
case 'off':
|
|
257
|
+
case 'clear': {
|
|
258
|
+
const resp = await client.deleteSttSettings();
|
|
259
|
+
if (flags.json) {
|
|
260
|
+
console.log(JSON.stringify(resp, null, 2));
|
|
261
|
+
} else {
|
|
262
|
+
p.log.success('Voice messages disabled.');
|
|
263
|
+
}
|
|
264
|
+
return;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
default:
|
|
268
|
+
throw new Error(
|
|
269
|
+
`Unknown stt action '${sub}'. Use: status | enable [--api-key <key>] | disable`,
|
|
270
|
+
);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
export async function runSettings(client, args) {
|
|
179
275
|
const { action, flags } = parseSettingsCommand(args);
|
|
180
276
|
|
|
181
277
|
// Collect first positional arg after the action for convenience
|
|
@@ -198,6 +294,10 @@ export async function runSettings(_client, args) {
|
|
|
198
294
|
case 'browser-rendering':
|
|
199
295
|
await settingsBrowserRendering(flags);
|
|
200
296
|
break;
|
|
297
|
+
case 'stt':
|
|
298
|
+
case 'voice':
|
|
299
|
+
await settingsStt(client, flags, flags._positional);
|
|
300
|
+
break;
|
|
201
301
|
default:
|
|
202
302
|
if (isInteractive() && !action) {
|
|
203
303
|
// Interactive: show settings menu
|
|
@@ -206,18 +306,20 @@ export async function runSettings(_client, args) {
|
|
|
206
306
|
options: [
|
|
207
307
|
{ value: 'network-mode', label: 'Network mode', hint: 'safe / unsafe domain access' },
|
|
208
308
|
{ value: 'browser-rendering', label: 'Browser rendering', hint: 'headless Chrome for JS-heavy sites' },
|
|
309
|
+
{ value: 'stt', label: 'Voice (STT)', hint: 'speech-to-text provider' },
|
|
209
310
|
{ value: 'get', label: 'View all settings', hint: 'show current configuration' },
|
|
210
311
|
],
|
|
211
312
|
});
|
|
212
313
|
if (p.isCancel(selected)) return;
|
|
213
|
-
await runSettings(
|
|
314
|
+
await runSettings(client, [selected]);
|
|
214
315
|
} else {
|
|
215
316
|
throw new Error(
|
|
216
317
|
'Usage: moxxy settings <action>\n' +
|
|
217
|
-
' network-mode [safe|unsafe]
|
|
218
|
-
' browser-rendering [true|false]
|
|
219
|
-
'
|
|
220
|
-
'
|
|
318
|
+
' network-mode [safe|unsafe] Get or set network mode\n' +
|
|
319
|
+
' browser-rendering [true|false] Enable/disable headless Chrome rendering\n' +
|
|
320
|
+
' stt [status|enable|disable] [--api-key <key>] Configure voice messages (speech-to-text)\n' +
|
|
321
|
+
' get [--key <k>] View settings\n' +
|
|
322
|
+
' set --key <k> --value <v> Set a setting'
|
|
221
323
|
);
|
|
222
324
|
}
|
|
223
325
|
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import { useReducer, useCallback } from 'react';
|
|
1
|
+
import { useReducer, useCallback, useRef } from 'react';
|
|
2
2
|
import { SLASH_COMMANDS } from '../slash-commands.js';
|
|
3
|
+
import { startRecording } from '../voice-recorder.js';
|
|
3
4
|
|
|
4
5
|
const INITIAL_STATE = { type: 'idle' };
|
|
5
6
|
|
|
@@ -23,6 +24,8 @@ function reducer(state, action) {
|
|
|
23
24
|
return { type: 'mcp_test_id' };
|
|
24
25
|
case 'template_assign_slug':
|
|
25
26
|
return { type: 'template_assign_slug' };
|
|
27
|
+
case 'voice_recording':
|
|
28
|
+
return { type: 'voice_recording' };
|
|
26
29
|
case 'reset':
|
|
27
30
|
return INITIAL_STATE;
|
|
28
31
|
default:
|
|
@@ -54,9 +57,49 @@ export function useCommandHandler({
|
|
|
54
57
|
onOpenTemplateAssignWizard,
|
|
55
58
|
}) {
|
|
56
59
|
const [twoStep, dispatch] = useReducer(reducer, INITIAL_STATE);
|
|
60
|
+
const voiceHandleRef = useRef(null);
|
|
57
61
|
|
|
58
62
|
const handleSubmit = useCallback(async (text) => {
|
|
59
63
|
const task = text.trim().replace(/^\/{2,}/, '/');
|
|
64
|
+
|
|
65
|
+
// While a recording is active, ANY submit (including bare Enter) stops
|
|
66
|
+
// it and ships the clip. This must run before the empty-text early return
|
|
67
|
+
// below so hitting Enter with no text still ends the capture.
|
|
68
|
+
if (twoStep.type === 'voice_recording') {
|
|
69
|
+
const handle = voiceHandleRef.current;
|
|
70
|
+
dispatch({ type: 'reset' });
|
|
71
|
+
voiceHandleRef.current = null;
|
|
72
|
+
if (!handle) {
|
|
73
|
+
eventsHandler.addSystemMessage('No active recording.');
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
try {
|
|
77
|
+
const clip = await handle.stop();
|
|
78
|
+
eventsHandler.addSystemMessage('Transcribing voice message…');
|
|
79
|
+
if (!agent) {
|
|
80
|
+
eventsHandler.addSystemMessage('No agent connected. Cannot run task.');
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
try {
|
|
84
|
+
const result = await client.startRunWithAudio(agent.name, clip);
|
|
85
|
+
const transcript = (result && result.transcript) || '[voice]';
|
|
86
|
+
eventsHandler.addUserMessage(transcript);
|
|
87
|
+
if (onAgentUpdate) onAgentUpdate({ status: 'running' });
|
|
88
|
+
} catch (err) {
|
|
89
|
+
if (err.isGatewayDown) {
|
|
90
|
+
eventsHandler.addSystemMessage(err.message);
|
|
91
|
+
} else {
|
|
92
|
+
eventsHandler.addSystemMessage(`Voice error: ${err.message}`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
} catch (err) {
|
|
96
|
+
eventsHandler.addSystemMessage(`Recording failed: ${err.message}`);
|
|
97
|
+
} finally {
|
|
98
|
+
handle.cleanup();
|
|
99
|
+
}
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
|
|
60
103
|
if (!task) return;
|
|
61
104
|
|
|
62
105
|
// Pending ask: agent asked for user input
|
|
@@ -419,6 +462,25 @@ export function useCommandHandler({
|
|
|
419
462
|
}
|
|
420
463
|
return;
|
|
421
464
|
}
|
|
465
|
+
if (task === '/voice') {
|
|
466
|
+
if (voiceHandleRef.current) {
|
|
467
|
+
// Defensive: treat a second /voice as a stop even if state drifted.
|
|
468
|
+
dispatch({ type: 'voice_recording' });
|
|
469
|
+
return;
|
|
470
|
+
}
|
|
471
|
+
try {
|
|
472
|
+
const handle = await startRecording();
|
|
473
|
+
voiceHandleRef.current = handle;
|
|
474
|
+
dispatch({ type: 'voice_recording' });
|
|
475
|
+
eventsHandler.addSystemMessage(
|
|
476
|
+
`Recording (${handle.tool})… press Enter or /voice again to stop.`,
|
|
477
|
+
);
|
|
478
|
+
} catch (err) {
|
|
479
|
+
eventsHandler.addSystemMessage(`Cannot record voice: ${err.message}`);
|
|
480
|
+
}
|
|
481
|
+
return;
|
|
482
|
+
}
|
|
483
|
+
|
|
422
484
|
if (task === '/template clear') {
|
|
423
485
|
try {
|
|
424
486
|
await client.setAgentTemplate(agentId, null);
|
|
@@ -9,6 +9,7 @@ export const SLASH_COMMANDS = [
|
|
|
9
9
|
{ name: '/vault', description: 'Open vault actions', aliases: ['/vault delete'] },
|
|
10
10
|
{ name: '/mcp', description: 'Open MCP actions', aliases: [] },
|
|
11
11
|
{ name: '/template', description: 'Open template actions', aliases: [] },
|
|
12
|
+
{ name: '/voice', description: 'Record a voice message (needs sox or ffmpeg)', aliases: [] },
|
|
12
13
|
];
|
|
13
14
|
|
|
14
15
|
export function matchCommands(input) {
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { spawn } from 'node:child_process';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import os from 'node:os';
|
|
4
|
+
import path from 'node:path';
|
|
5
|
+
import { promisify } from 'node:util';
|
|
6
|
+
import { execFile } from 'node:child_process';
|
|
7
|
+
|
|
8
|
+
const execFileP = promisify(execFile);
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Check whether a binary is on PATH. Returns the absolute path, or null.
|
|
12
|
+
*/
|
|
13
|
+
async function which(name) {
|
|
14
|
+
try {
|
|
15
|
+
const { stdout } = await execFileP('which', [name]);
|
|
16
|
+
const p = stdout.trim();
|
|
17
|
+
return p || null;
|
|
18
|
+
} catch {
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Detect an available recording tool. Prefers `sox` (via `rec`) because it
|
|
25
|
+
* speaks WAV out of the box and handles Ctrl-C gracefully. Falls back to
|
|
26
|
+
* `ffmpeg` with a platform-appropriate input device. Returns `null` if
|
|
27
|
+
* neither is present.
|
|
28
|
+
*/
|
|
29
|
+
export async function detectRecorder() {
|
|
30
|
+
const rec = await which('rec');
|
|
31
|
+
if (rec) return { tool: 'rec', bin: rec };
|
|
32
|
+
const sox = await which('sox');
|
|
33
|
+
if (sox) return { tool: 'sox', bin: sox };
|
|
34
|
+
const ffmpeg = await which('ffmpeg');
|
|
35
|
+
if (ffmpeg) return { tool: 'ffmpeg', bin: ffmpeg };
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function ffmpegArgs(outPath) {
|
|
40
|
+
const platform = process.platform;
|
|
41
|
+
if (platform === 'darwin') {
|
|
42
|
+
// `avfoundation` default audio input is `:0`.
|
|
43
|
+
return ['-loglevel', 'error', '-f', 'avfoundation', '-i', ':0', '-ac', '1', '-ar', '16000', '-y', outPath];
|
|
44
|
+
}
|
|
45
|
+
// Linux: assume ALSA `default` — user can symlink their own if needed.
|
|
46
|
+
return ['-loglevel', 'error', '-f', 'alsa', '-i', 'default', '-ac', '1', '-ar', '16000', '-y', outPath];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Start a recording. Returns a handle with `stop()` that resolves to
|
|
51
|
+
* `{ path, data, mime }`. The caller owns cleanup of the temp file via
|
|
52
|
+
* `cleanup()`.
|
|
53
|
+
*
|
|
54
|
+
* The audio is written to a platform temp file so that even if the recorder
|
|
55
|
+
* dies mid-stream we never lose the buffer.
|
|
56
|
+
*/
|
|
57
|
+
export async function startRecording() {
|
|
58
|
+
const recorder = await detectRecorder();
|
|
59
|
+
if (!recorder) {
|
|
60
|
+
throw new Error('No recorder found. Install `sox` (recommended) or `ffmpeg`.');
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'moxxy-voice-'));
|
|
64
|
+
const outPath = path.join(tmpDir, 'voice.wav');
|
|
65
|
+
|
|
66
|
+
let args;
|
|
67
|
+
if (recorder.tool === 'rec' || recorder.tool === 'sox') {
|
|
68
|
+
// `rec` is sox with sensible defaults; `sox` requires `-d` for default input.
|
|
69
|
+
args = recorder.tool === 'rec'
|
|
70
|
+
? ['-q', '-c', '1', '-r', '16000', outPath]
|
|
71
|
+
: ['-q', '-d', '-c', '1', '-r', '16000', outPath];
|
|
72
|
+
} else {
|
|
73
|
+
args = ffmpegArgs(outPath);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const child = spawn(recorder.bin, args, { stdio: ['ignore', 'ignore', 'pipe'] });
|
|
77
|
+
let stderr = '';
|
|
78
|
+
child.stderr.on('data', chunk => {
|
|
79
|
+
stderr += chunk.toString();
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
let exited = false;
|
|
83
|
+
const exitPromise = new Promise((resolve) => {
|
|
84
|
+
child.on('exit', (code, signal) => {
|
|
85
|
+
exited = true;
|
|
86
|
+
resolve({ code, signal });
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
tool: recorder.tool,
|
|
92
|
+
outPath,
|
|
93
|
+
async stop() {
|
|
94
|
+
if (!exited) {
|
|
95
|
+
// SIGINT is important: ffmpeg and sox both flush the output file
|
|
96
|
+
// cleanly on SIGINT. SIGTERM/KILL can leave a truncated WAV header.
|
|
97
|
+
try { child.kill('SIGINT'); } catch {}
|
|
98
|
+
}
|
|
99
|
+
await exitPromise;
|
|
100
|
+
|
|
101
|
+
if (!fs.existsSync(outPath)) {
|
|
102
|
+
throw new Error(`Recorder produced no output file. stderr: ${stderr.trim() || '<empty>'}`);
|
|
103
|
+
}
|
|
104
|
+
const data = fs.readFileSync(outPath);
|
|
105
|
+
if (data.length < 44) {
|
|
106
|
+
// 44 bytes is the minimum WAV header.
|
|
107
|
+
throw new Error('Recording too short or empty.');
|
|
108
|
+
}
|
|
109
|
+
return { path: outPath, data, mime: 'audio/wav', filename: 'voice.wav' };
|
|
110
|
+
},
|
|
111
|
+
cleanup() {
|
|
112
|
+
try {
|
|
113
|
+
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
114
|
+
} catch {}
|
|
115
|
+
},
|
|
116
|
+
};
|
|
117
|
+
}
|