@moxxy/cli 1.3.2 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/api-client.js +71 -0
- package/src/cli.js +3 -2
- package/src/commands/channel.js +88 -25
- package/src/commands/init.js +302 -65
- package/src/commands/settings.js +108 -6
- package/src/tui/hooks/use-command-handler.js +63 -1
- package/src/tui/slash-commands.js +1 -0
- package/src/tui/voice-recorder.js +117 -0
package/package.json
CHANGED
package/src/api-client.js
CHANGED
|
@@ -127,6 +127,46 @@ export class ApiClient {
|
|
|
127
127
|
return this.request(`/v1/agents/${encodeURIComponent(agentId)}/runs`, 'POST', { task });
|
|
128
128
|
}
|
|
129
129
|
|
|
130
|
+
/**
|
|
131
|
+
* Upload a recorded voice clip to the gateway. The server transcribes it
|
|
132
|
+
* via the configured STT provider and immediately starts a run with the
|
|
133
|
+
* transcript as the task. Returns `{ transcript, run_id, status, ... }`.
|
|
134
|
+
*/
|
|
135
|
+
async startRunWithAudio(agentId, { data, mime = 'audio/wav', filename = 'voice.wav' }) {
|
|
136
|
+
const form = new FormData();
|
|
137
|
+
const blob = new Blob([data], { type: mime });
|
|
138
|
+
form.append('audio', blob, filename);
|
|
139
|
+
|
|
140
|
+
const headers = {};
|
|
141
|
+
if (this.token) {
|
|
142
|
+
headers['authorization'] = `Bearer ${this.token}`;
|
|
143
|
+
}
|
|
144
|
+
// NOTE: do NOT set content-type — fetch will compute the multipart
|
|
145
|
+
// boundary for us.
|
|
146
|
+
|
|
147
|
+
const url = this.buildUrl(`/v1/agents/${encodeURIComponent(agentId)}/runs/audio`);
|
|
148
|
+
let resp;
|
|
149
|
+
try {
|
|
150
|
+
resp = await fetch(url, { method: 'POST', headers, body: form });
|
|
151
|
+
} catch (err) {
|
|
152
|
+
if (isConnectionError(err)) throw gatewayDownError();
|
|
153
|
+
throw err;
|
|
154
|
+
}
|
|
155
|
+
if (!resp.ok) {
|
|
156
|
+
const err = await resp.json().catch(() => ({
|
|
157
|
+
error: 'unknown',
|
|
158
|
+
message: resp.statusText,
|
|
159
|
+
}));
|
|
160
|
+
const error = new Error(err.message || `API error ${resp.status}`);
|
|
161
|
+
error.status = resp.status;
|
|
162
|
+
error.code = err.error;
|
|
163
|
+
throw error;
|
|
164
|
+
}
|
|
165
|
+
const text = await resp.text();
|
|
166
|
+
if (!text) return {};
|
|
167
|
+
return JSON.parse(text);
|
|
168
|
+
}
|
|
169
|
+
|
|
130
170
|
async stopAgent(agentId) {
|
|
131
171
|
return this.request(`/v1/agents/${encodeURIComponent(agentId)}/stop`, 'POST');
|
|
132
172
|
}
|
|
@@ -296,6 +336,37 @@ export class ApiClient {
|
|
|
296
336
|
async setAgentTemplate(name, template) {
|
|
297
337
|
return this.request(`/v1/agents/${encodeURIComponent(name)}/template`, 'PATCH', { template });
|
|
298
338
|
}
|
|
339
|
+
|
|
340
|
+
// --- Settings: Speech-to-text ---------------------------------------
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Fetch the currently-active STT configuration from the gateway.
|
|
344
|
+
* Returns `{ enabled: false }` when voice messages are off, or
|
|
345
|
+
* `{ enabled: true, provider, model, secret_ref, ... }` otherwise.
|
|
346
|
+
* The API never returns the raw API key.
|
|
347
|
+
*/
|
|
348
|
+
async getSttSettings() {
|
|
349
|
+
return this.request('/v1/settings/stt', 'GET');
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Configure (or reconfigure) speech-to-text.
|
|
354
|
+
*
|
|
355
|
+
* Pass `api_key` to provision a fresh vault secret; omit it to reuse an
|
|
356
|
+
* existing `secret_ref`. The running gateway swaps providers in-place —
|
|
357
|
+
* no restart needed.
|
|
358
|
+
*/
|
|
359
|
+
async updateSttSettings(body) {
|
|
360
|
+
return this.request('/v1/settings/stt', 'PUT', body);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Disable voice messages. Removes the `stt` block from settings.yaml
|
|
365
|
+
* and clears the in-memory provider. Does NOT delete the vault secret.
|
|
366
|
+
*/
|
|
367
|
+
async deleteSttSettings() {
|
|
368
|
+
return this.request('/v1/settings/stt', 'DELETE');
|
|
369
|
+
}
|
|
299
370
|
}
|
|
300
371
|
|
|
301
372
|
export function createApiClient(baseUrl, token, authMode = 'token') {
|
package/src/cli.js
CHANGED
|
@@ -94,8 +94,9 @@ Usage:
|
|
|
94
94
|
moxxy chat [--agent <id>] Alias for tui
|
|
95
95
|
moxxy events tail [--agent <id>] [--run <id>] [--json]
|
|
96
96
|
moxxy settings network-mode [safe|unsafe] Get or set network mode
|
|
97
|
-
moxxy settings
|
|
98
|
-
moxxy settings
|
|
97
|
+
moxxy settings stt [status|enable|disable] Configure voice messages (speech-to-text)
|
|
98
|
+
moxxy settings get [--key <k>] [--json] View settings
|
|
99
|
+
moxxy settings set --key <k> --value <v> Set a setting
|
|
99
100
|
moxxy doctor Diagnose installation
|
|
100
101
|
moxxy update [--check] [--force] [--json] Check for and install updates
|
|
101
102
|
moxxy update --rollback Restore previous gateway version
|
package/src/commands/channel.js
CHANGED
|
@@ -49,16 +49,12 @@ async function createChannel(client, args) {
|
|
|
49
49
|
message: 'Channel type',
|
|
50
50
|
options: [
|
|
51
51
|
{ value: 'telegram', label: 'Telegram', hint: 'BotFather bot token required' },
|
|
52
|
-
{ value: 'discord', label: 'Discord', hint: '
|
|
52
|
+
{ value: 'discord', label: 'Discord', hint: 'Discord bot token required' },
|
|
53
|
+
{ value: 'whatsapp', label: 'WhatsApp', hint: 'WhatsApp Business API token required' },
|
|
53
54
|
],
|
|
54
55
|
});
|
|
55
56
|
handleCancel(channelType);
|
|
56
57
|
|
|
57
|
-
if (channelType === 'discord') {
|
|
58
|
-
p.log.info('Discord channel support is coming soon.');
|
|
59
|
-
return;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
58
|
// Step 1: Select agent to bind
|
|
63
59
|
let agentId;
|
|
64
60
|
try {
|
|
@@ -80,33 +76,84 @@ async function createChannel(client, args) {
|
|
|
80
76
|
return;
|
|
81
77
|
}
|
|
82
78
|
|
|
83
|
-
// Step 2: Get
|
|
84
|
-
|
|
85
|
-
'1. Open Telegram and talk to @BotFather\n' +
|
|
86
|
-
'2. Send /newbot and follow the prompts\n' +
|
|
87
|
-
'3. Copy the bot token',
|
|
88
|
-
'Telegram Bot Setup'
|
|
89
|
-
);
|
|
79
|
+
// Step 2: Get credentials based on channel type
|
|
80
|
+
let botToken, displayName, config;
|
|
90
81
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
82
|
+
if (channelType === 'telegram') {
|
|
83
|
+
p.note(
|
|
84
|
+
'1. Open Telegram and talk to @BotFather\n' +
|
|
85
|
+
'2. Send /newbot and follow the prompts\n' +
|
|
86
|
+
'3. Copy the bot token',
|
|
87
|
+
'Telegram Bot Setup'
|
|
88
|
+
);
|
|
89
|
+
|
|
90
|
+
botToken = await p.password({
|
|
91
|
+
message: 'Paste your Telegram bot token',
|
|
92
|
+
});
|
|
93
|
+
handleCancel(botToken);
|
|
94
|
+
} else if (channelType === 'discord') {
|
|
95
|
+
p.note(
|
|
96
|
+
'1. Go to https://discord.com/developers/applications\n' +
|
|
97
|
+
'2. Create a new application → Bot → copy the bot token\n' +
|
|
98
|
+
'3. Enable MESSAGE CONTENT intent under Bot → Privileged Intents\n' +
|
|
99
|
+
'4. Invite the bot to your server with the Messages scope',
|
|
100
|
+
'Discord Bot Setup'
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
botToken = await p.password({
|
|
104
|
+
message: 'Paste your Discord bot token',
|
|
105
|
+
});
|
|
106
|
+
handleCancel(botToken);
|
|
107
|
+
} else if (channelType === 'whatsapp') {
|
|
108
|
+
p.note(
|
|
109
|
+
'1. Go to https://developers.facebook.com and create an app\n' +
|
|
110
|
+
'2. Add the WhatsApp product to your app\n' +
|
|
111
|
+
'3. Copy the permanent access token and Phone Number ID\n' +
|
|
112
|
+
'4. Configure the webhook URL to: <your-moxxy-url>/v1/channels/whatsapp/webhook',
|
|
113
|
+
'WhatsApp Business API Setup'
|
|
114
|
+
);
|
|
115
|
+
|
|
116
|
+
botToken = await p.password({
|
|
117
|
+
message: 'Paste your WhatsApp access token',
|
|
118
|
+
});
|
|
119
|
+
handleCancel(botToken);
|
|
120
|
+
|
|
121
|
+
const phoneNumberId = await p.text({
|
|
122
|
+
message: 'Phone Number ID (from WhatsApp Business API)',
|
|
123
|
+
});
|
|
124
|
+
handleCancel(phoneNumberId);
|
|
125
|
+
|
|
126
|
+
const verifyToken = await p.text({
|
|
127
|
+
message: 'Webhook verify token (you choose this, used to verify the webhook)',
|
|
128
|
+
placeholder: 'my-verify-token',
|
|
129
|
+
});
|
|
130
|
+
handleCancel(verifyToken);
|
|
95
131
|
|
|
96
|
-
|
|
132
|
+
config = {
|
|
133
|
+
phone_number_id: phoneNumberId,
|
|
134
|
+
verify_token: verifyToken || undefined,
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
displayName = await p.text({
|
|
97
139
|
message: 'Display name for this channel',
|
|
98
|
-
placeholder: 'My
|
|
140
|
+
placeholder: channelType === 'telegram' ? 'My Telegram Bot' :
|
|
141
|
+
channelType === 'discord' ? 'My Discord Bot' : 'My WhatsApp Bot',
|
|
99
142
|
});
|
|
100
143
|
handleCancel(displayName);
|
|
101
144
|
|
|
145
|
+
const defaultName = channelType === 'telegram' ? 'Telegram Bot' :
|
|
146
|
+
channelType === 'discord' ? 'Discord Bot' : 'WhatsApp Bot';
|
|
147
|
+
|
|
102
148
|
// Step 3: Create channel
|
|
103
149
|
let channel;
|
|
104
150
|
try {
|
|
105
151
|
channel = await withSpinner('Creating channel...', () =>
|
|
106
152
|
client.request('/v1/channels', 'POST', {
|
|
107
153
|
channel_type: channelType,
|
|
108
|
-
display_name: displayName ||
|
|
154
|
+
display_name: displayName || defaultName,
|
|
109
155
|
bot_token: botToken,
|
|
156
|
+
...(config ? { config } : {}),
|
|
110
157
|
}), 'Channel created.');
|
|
111
158
|
|
|
112
159
|
showResult('Channel Created', {
|
|
@@ -121,11 +168,27 @@ async function createChannel(client, args) {
|
|
|
121
168
|
}
|
|
122
169
|
|
|
123
170
|
// Step 4: Wait for pairing code
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
171
|
+
if (channelType === 'telegram') {
|
|
172
|
+
p.note(
|
|
173
|
+
'1. Open your Telegram bot and send /start\n' +
|
|
174
|
+
'2. Copy the 6-digit pairing code',
|
|
175
|
+
'Pair your chat'
|
|
176
|
+
);
|
|
177
|
+
} else if (channelType === 'discord') {
|
|
178
|
+
p.note(
|
|
179
|
+
'1. Send a message to your Discord bot or in a channel it can see\n' +
|
|
180
|
+
'2. The bot will respond with a pairing code if not yet paired\n' +
|
|
181
|
+
'3. Copy the 6-digit pairing code',
|
|
182
|
+
'Pair your chat'
|
|
183
|
+
);
|
|
184
|
+
} else if (channelType === 'whatsapp') {
|
|
185
|
+
p.note(
|
|
186
|
+
'1. Send a message to your WhatsApp number\n' +
|
|
187
|
+
'2. The bot will respond with a pairing code\n' +
|
|
188
|
+
'3. Copy the 6-digit pairing code',
|
|
189
|
+
'Pair your chat'
|
|
190
|
+
);
|
|
191
|
+
}
|
|
129
192
|
|
|
130
193
|
const code = await p.text({
|
|
131
194
|
message: 'Enter 6-digit pairing code',
|
package/src/commands/init.js
CHANGED
|
@@ -650,12 +650,12 @@ export async function runInit(client, args) {
|
|
|
650
650
|
|
|
651
651
|
// Step 6: Channel setup (optional)
|
|
652
652
|
p.note(
|
|
653
|
-
'Channels enable agent communication via Telegram or
|
|
653
|
+
'Channels enable agent communication via Telegram, Discord, or WhatsApp.\n' +
|
|
654
654
|
'You can set up channels later with: moxxy channel create',
|
|
655
655
|
'Channels'
|
|
656
656
|
);
|
|
657
657
|
const setupChannel = await p.confirm({
|
|
658
|
-
message: 'Set up a messaging channel
|
|
658
|
+
message: 'Set up a messaging channel?',
|
|
659
659
|
initialValue: false,
|
|
660
660
|
});
|
|
661
661
|
handleCancel(setupChannel);
|
|
@@ -665,11 +665,14 @@ export async function runInit(client, args) {
|
|
|
665
665
|
message: 'Channel type',
|
|
666
666
|
options: [
|
|
667
667
|
{ value: 'telegram', label: 'Telegram', hint: 'BotFather bot token required' },
|
|
668
|
-
{ value: 'discord', label: 'Discord', hint: '
|
|
668
|
+
{ value: 'discord', label: 'Discord', hint: 'Discord bot token required' },
|
|
669
|
+
{ value: 'whatsapp', label: 'WhatsApp', hint: 'WhatsApp Business API token required' },
|
|
669
670
|
],
|
|
670
671
|
});
|
|
671
672
|
handleCancel(channelType);
|
|
672
673
|
|
|
674
|
+
let botToken, displayName, channelConfig;
|
|
675
|
+
|
|
673
676
|
if (channelType === 'telegram') {
|
|
674
677
|
p.note(
|
|
675
678
|
'1. Open Telegram and talk to @BotFather\n' +
|
|
@@ -678,88 +681,148 @@ export async function runInit(client, args) {
|
|
|
678
681
|
'Telegram Bot Setup'
|
|
679
682
|
);
|
|
680
683
|
|
|
681
|
-
|
|
684
|
+
botToken = await p.password({
|
|
682
685
|
message: 'Paste your Telegram bot token',
|
|
683
686
|
});
|
|
684
687
|
handleCancel(botToken);
|
|
688
|
+
} else if (channelType === 'discord') {
|
|
689
|
+
p.note(
|
|
690
|
+
'1. Go to https://discord.com/developers/applications\n' +
|
|
691
|
+
'2. Create a new application → Bot → copy the bot token\n' +
|
|
692
|
+
'3. Enable MESSAGE CONTENT intent under Bot → Privileged Intents\n' +
|
|
693
|
+
'4. Invite the bot to your server with the Messages scope',
|
|
694
|
+
'Discord Bot Setup'
|
|
695
|
+
);
|
|
685
696
|
|
|
686
|
-
|
|
687
|
-
message: '
|
|
688
|
-
placeholder: 'My Moxxy Bot',
|
|
697
|
+
botToken = await p.password({
|
|
698
|
+
message: 'Paste your Discord bot token',
|
|
689
699
|
});
|
|
690
|
-
handleCancel(
|
|
700
|
+
handleCancel(botToken);
|
|
701
|
+
} else if (channelType === 'whatsapp') {
|
|
702
|
+
p.note(
|
|
703
|
+
'1. Go to https://developers.facebook.com and create an app\n' +
|
|
704
|
+
'2. Add the WhatsApp product to your app\n' +
|
|
705
|
+
'3. Copy the permanent access token and Phone Number ID\n' +
|
|
706
|
+
'4. Configure the webhook URL to: <your-moxxy-url>/v1/channels/whatsapp/webhook',
|
|
707
|
+
'WhatsApp Business API Setup'
|
|
708
|
+
);
|
|
691
709
|
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
710
|
+
botToken = await p.password({
|
|
711
|
+
message: 'Paste your WhatsApp access token',
|
|
712
|
+
});
|
|
713
|
+
handleCancel(botToken);
|
|
714
|
+
|
|
715
|
+
const phoneNumberId = await p.text({
|
|
716
|
+
message: 'Phone Number ID (from WhatsApp Business API)',
|
|
717
|
+
});
|
|
718
|
+
handleCancel(phoneNumberId);
|
|
719
|
+
|
|
720
|
+
const verifyToken = await p.text({
|
|
721
|
+
message: 'Webhook verify token (you choose this, used to verify the webhook)',
|
|
722
|
+
placeholder: 'my-verify-token',
|
|
723
|
+
});
|
|
724
|
+
handleCancel(verifyToken);
|
|
725
|
+
|
|
726
|
+
channelConfig = {
|
|
727
|
+
phone_number_id: phoneNumberId,
|
|
728
|
+
verify_token: verifyToken || undefined,
|
|
729
|
+
};
|
|
730
|
+
}
|
|
699
731
|
|
|
700
|
-
|
|
732
|
+
const defaultName = channelType === 'telegram' ? 'Telegram Bot' :
|
|
733
|
+
channelType === 'discord' ? 'Discord Bot' : 'WhatsApp Bot';
|
|
734
|
+
const channelLabel = channelType === 'telegram' ? 'Telegram' :
|
|
735
|
+
channelType === 'discord' ? 'Discord' : 'WhatsApp';
|
|
701
736
|
|
|
702
|
-
|
|
737
|
+
displayName = await p.text({
|
|
738
|
+
message: 'Display name for this channel',
|
|
739
|
+
placeholder: `My ${channelLabel} Bot`,
|
|
740
|
+
});
|
|
741
|
+
handleCancel(displayName);
|
|
742
|
+
|
|
743
|
+
try {
|
|
744
|
+
const result = await withSpinner(`Registering ${channelLabel} channel...`, () =>
|
|
745
|
+
client.request('/v1/channels', 'POST', {
|
|
746
|
+
channel_type: channelType,
|
|
747
|
+
display_name: displayName || defaultName,
|
|
748
|
+
bot_token: botToken,
|
|
749
|
+
...(channelConfig ? { config: channelConfig } : {}),
|
|
750
|
+
}), 'Channel registered.');
|
|
751
|
+
|
|
752
|
+
showResult(`${channelLabel} Channel`, { ID: result.id, Status: result.status });
|
|
753
|
+
|
|
754
|
+
// Interactive pairing
|
|
755
|
+
if (channelType === 'telegram') {
|
|
703
756
|
p.note(
|
|
704
757
|
'1. Open your Telegram bot and send /start\n' +
|
|
705
758
|
'2. You will receive a 6-digit pairing code',
|
|
706
759
|
'Pair your chat'
|
|
707
760
|
);
|
|
761
|
+
} else if (channelType === 'discord') {
|
|
762
|
+
p.note(
|
|
763
|
+
'1. Send a message to your Discord bot or in a channel it can see\n' +
|
|
764
|
+
'2. Type /start to receive a 6-digit pairing code',
|
|
765
|
+
'Pair your chat'
|
|
766
|
+
);
|
|
767
|
+
} else if (channelType === 'whatsapp') {
|
|
768
|
+
p.note(
|
|
769
|
+
'1. Send /start to your WhatsApp number\n' +
|
|
770
|
+
'2. You will receive a 6-digit pairing code',
|
|
771
|
+
'Pair your chat'
|
|
772
|
+
);
|
|
773
|
+
}
|
|
708
774
|
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
775
|
+
const pairCode = await p.text({
|
|
776
|
+
message: 'Enter the 6-digit pairing code',
|
|
777
|
+
placeholder: '123456',
|
|
778
|
+
validate: (v) => {
|
|
779
|
+
if (!v || v.trim().length === 0) return 'Code is required';
|
|
780
|
+
},
|
|
781
|
+
});
|
|
782
|
+
handleCancel(pairCode);
|
|
717
783
|
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
p.log.info(`Pair later with: moxxy channel pair --code ${pairCode} --agent <agent-id>`);
|
|
726
|
-
} else {
|
|
727
|
-
agentId = await p.select({
|
|
728
|
-
message: 'Select agent to bind',
|
|
729
|
-
options: agents.map(a => ({
|
|
730
|
-
value: a.name,
|
|
731
|
-
label: `${a.name} (${a.provider_id}/${a.model_id})`,
|
|
732
|
-
})),
|
|
733
|
-
});
|
|
734
|
-
handleCancel(agentId);
|
|
735
|
-
}
|
|
736
|
-
} catch (err) {
|
|
737
|
-
p.log.warn(`Could not list agents: ${err.message}`);
|
|
784
|
+
// Pick an agent to bind
|
|
785
|
+
let agentId;
|
|
786
|
+
try {
|
|
787
|
+
const agents = await withSpinner('Fetching agents...', () =>
|
|
788
|
+
client.listAgents(), 'Agents loaded.');
|
|
789
|
+
if (!agents || agents.length === 0) {
|
|
790
|
+
p.log.warn('No agents found. Create one first with: moxxy agent create');
|
|
738
791
|
p.log.info(`Pair later with: moxxy channel pair --code ${pairCode} --agent <agent-id>`);
|
|
792
|
+
} else {
|
|
793
|
+
agentId = await p.select({
|
|
794
|
+
message: 'Select agent to bind',
|
|
795
|
+
options: agents.map(a => ({
|
|
796
|
+
value: a.name,
|
|
797
|
+
label: `${a.name} (${a.provider_id}/${a.model_id})`,
|
|
798
|
+
})),
|
|
799
|
+
});
|
|
800
|
+
handleCancel(agentId);
|
|
739
801
|
}
|
|
802
|
+
} catch (err) {
|
|
803
|
+
p.log.warn(`Could not list agents: ${err.message}`);
|
|
804
|
+
p.log.info(`Pair later with: moxxy channel pair --code ${pairCode} --agent <agent-id>`);
|
|
805
|
+
}
|
|
740
806
|
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
}
|
|
807
|
+
if (agentId) {
|
|
808
|
+
try {
|
|
809
|
+
const pairResult = await withSpinner('Pairing...', () =>
|
|
810
|
+
client.request(`/v1/channels/${result.id}/pair`, 'POST', {
|
|
811
|
+
code: pairCode,
|
|
812
|
+
agent_id: agentId,
|
|
813
|
+
}), 'Paired successfully.');
|
|
814
|
+
showResult('Channel Paired', {
|
|
815
|
+
'Binding ID': pairResult.id,
|
|
816
|
+
Agent: pairResult.agent_id,
|
|
817
|
+
'External Chat': pairResult.external_chat_id,
|
|
818
|
+
});
|
|
819
|
+
} catch (err) {
|
|
820
|
+
p.log.error(`Failed to pair: ${err.message}`);
|
|
821
|
+
p.log.info(`Try again with: moxxy channel pair --code ${pairCode} --agent ${agentId}`);
|
|
757
822
|
}
|
|
758
|
-
} catch (err) {
|
|
759
|
-
p.log.error(`Failed to register channel: ${err.message}`);
|
|
760
823
|
}
|
|
761
|
-
}
|
|
762
|
-
p.log.
|
|
824
|
+
} catch (err) {
|
|
825
|
+
p.log.error(`Failed to register channel: ${err.message}`);
|
|
763
826
|
}
|
|
764
827
|
}
|
|
765
828
|
|
|
@@ -807,9 +870,183 @@ export async function runInit(client, args) {
|
|
|
807
870
|
}
|
|
808
871
|
}
|
|
809
872
|
|
|
873
|
+
// Step 8: Voice messages (optional)
|
|
874
|
+
p.note(
|
|
875
|
+
'Voice messages let users send audio to the agent on any channel\n' +
|
|
876
|
+
'(Telegram voice notes, the TUI /voice command, or direct audio upload\n' +
|
|
877
|
+
'to the gateway). The audio is transcribed to text before the agent\n' +
|
|
878
|
+
'sees it. The agent does not reply with voice.',
|
|
879
|
+
'Voice Messages (Speech-to-Text)'
|
|
880
|
+
);
|
|
881
|
+
|
|
882
|
+
const enableVoice = await p.confirm({
|
|
883
|
+
message: 'Enable voice messages?',
|
|
884
|
+
initialValue: false,
|
|
885
|
+
});
|
|
886
|
+
handleCancel(enableVoice);
|
|
887
|
+
|
|
888
|
+
if (enableVoice) {
|
|
889
|
+
const sttProvider = await p.select({
|
|
890
|
+
message: 'Speech-to-text provider',
|
|
891
|
+
options: [
|
|
892
|
+
{
|
|
893
|
+
value: 'whisper',
|
|
894
|
+
label: 'OpenAI Whisper',
|
|
895
|
+
hint: 'Cloud API, requires an OpenAI key',
|
|
896
|
+
},
|
|
897
|
+
{ value: '__skip__', label: 'Skip', hint: 'configure later' },
|
|
898
|
+
],
|
|
899
|
+
});
|
|
900
|
+
handleCancel(sttProvider);
|
|
901
|
+
|
|
902
|
+
if (sttProvider === 'whisper') {
|
|
903
|
+
const configured = await configureWhisperStt(client, moxxyHome);
|
|
904
|
+
if (configured) {
|
|
905
|
+
p.log.success('Voice messages enabled (OpenAI Whisper).');
|
|
906
|
+
} else {
|
|
907
|
+
p.log.warn('Voice setup skipped. Retry later with: moxxy init');
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
|
|
810
912
|
p.outro('Setup complete. Run moxxy to see available commands.');
|
|
811
913
|
}
|
|
812
914
|
|
|
915
|
+
// ---------------------------------------------------------------------------
|
|
916
|
+
// Speech-to-text (voice message) helpers
|
|
917
|
+
// ---------------------------------------------------------------------------
|
|
918
|
+
|
|
919
|
+
const STT_WHISPER_BACKEND_KEY = 'moxxy_stt_whisper';
|
|
920
|
+
const STT_WHISPER_KEY_NAME = 'STT_WHISPER_API_KEY';
|
|
921
|
+
const OPENAI_PROVIDER_BACKEND_KEY = 'moxxy_provider_openai';
|
|
922
|
+
|
|
923
|
+
/**
|
|
924
|
+
* Configure Whisper STT: either reuse an existing OpenAI vault secret or
|
|
925
|
+
* prompt for a new key, then persist an `stt` block to settings.yaml.
|
|
926
|
+
* Returns true on success, false if the user bailed or storage failed.
|
|
927
|
+
*/
|
|
928
|
+
async function configureWhisperStt(client, moxxyHome) {
|
|
929
|
+
// Look for an existing vault entry we can reuse. Prefer a secret already
|
|
930
|
+
// backing the OpenAI provider install so users don't enter the same key
|
|
931
|
+
// twice.
|
|
932
|
+
let reuseBackendKey = null;
|
|
933
|
+
try {
|
|
934
|
+
const secrets = await client.listSecrets();
|
|
935
|
+
const existing = (secrets || []).find(
|
|
936
|
+
(s) => s.backend_key === OPENAI_PROVIDER_BACKEND_KEY,
|
|
937
|
+
);
|
|
938
|
+
if (existing) {
|
|
939
|
+
const reuse = await p.confirm({
|
|
940
|
+
message: 'Reuse your existing OpenAI API key for Whisper?',
|
|
941
|
+
initialValue: true,
|
|
942
|
+
});
|
|
943
|
+
handleCancel(reuse);
|
|
944
|
+
if (reuse) reuseBackendKey = OPENAI_PROVIDER_BACKEND_KEY;
|
|
945
|
+
}
|
|
946
|
+
} catch (err) {
|
|
947
|
+
// Vault listing may fail if the gateway is down — fall through to prompt.
|
|
948
|
+
p.log.warn(`Could not check existing vault secrets: ${err.message}`);
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
let secretRef = reuseBackendKey;
|
|
952
|
+
|
|
953
|
+
if (!secretRef) {
|
|
954
|
+
const apiKey = await p.password({
|
|
955
|
+
message: 'Enter your OpenAI API key (used for Whisper transcription)',
|
|
956
|
+
validate: (val) => {
|
|
957
|
+
if (!val || !val.trim()) return 'API key cannot be empty';
|
|
958
|
+
},
|
|
959
|
+
});
|
|
960
|
+
handleCancel(apiKey);
|
|
961
|
+
|
|
962
|
+
try {
|
|
963
|
+
await withSpinner(
|
|
964
|
+
'Storing API key in vault...',
|
|
965
|
+
async () => {
|
|
966
|
+
await client.createSecret({
|
|
967
|
+
key_name: STT_WHISPER_KEY_NAME,
|
|
968
|
+
backend_key: STT_WHISPER_BACKEND_KEY,
|
|
969
|
+
policy_label: 'stt-provider',
|
|
970
|
+
value: apiKey.trim(),
|
|
971
|
+
});
|
|
972
|
+
},
|
|
973
|
+
'Whisper API key stored.',
|
|
974
|
+
);
|
|
975
|
+
secretRef = STT_WHISPER_BACKEND_KEY;
|
|
976
|
+
} catch (err) {
|
|
977
|
+
p.log.error(`Failed to store API key: ${err.message}`);
|
|
978
|
+
return false;
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
try {
|
|
983
|
+
saveSttSetting(moxxyHome, {
|
|
984
|
+
provider: 'whisper',
|
|
985
|
+
model: 'whisper-1',
|
|
986
|
+
secret_ref: secretRef,
|
|
987
|
+
});
|
|
988
|
+
} catch (err) {
|
|
989
|
+
p.log.error(`Failed to write settings.yaml: ${err.message}`);
|
|
990
|
+
return false;
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
return true;
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
/**
|
|
997
|
+
* Write (or clear) the `stt` block in `{moxxy_home}/settings.yaml`.
|
|
998
|
+
*
|
|
999
|
+
* Pass `null` to remove the block. Pass an object with at least `provider`,
|
|
1000
|
+
* `model`, and `secret_ref` to write a fresh block. Any prior `stt:` block
|
|
1001
|
+
* is removed in full — including nested indented child lines — before the
|
|
1002
|
+
* new block is appended, so repeated runs don't accumulate stale entries.
|
|
1003
|
+
*/
|
|
1004
|
+
export function saveSttSetting(moxxyHome, config) {
|
|
1005
|
+
const settingsFile = join(moxxyHome, 'settings.yaml');
|
|
1006
|
+
|
|
1007
|
+
let existing = '';
|
|
1008
|
+
try {
|
|
1009
|
+
existing = readFileSync(settingsFile, 'utf-8');
|
|
1010
|
+
} catch { /* no existing settings */ }
|
|
1011
|
+
|
|
1012
|
+
// Strip any previous `stt:` block. A block is the `stt:` line plus all
|
|
1013
|
+
// subsequent indented (leading whitespace) lines — standard flow YAML.
|
|
1014
|
+
const kept = [];
|
|
1015
|
+
let inSttBlock = false;
|
|
1016
|
+
for (const line of existing.split('\n')) {
|
|
1017
|
+
if (inSttBlock) {
|
|
1018
|
+
if (/^\s+\S/.test(line) || line.trim() === '') {
|
|
1019
|
+
// indented child or blank line: still inside the block
|
|
1020
|
+
if (line.trim() === '') {
|
|
1021
|
+
inSttBlock = false;
|
|
1022
|
+
kept.push(line);
|
|
1023
|
+
}
|
|
1024
|
+
continue;
|
|
1025
|
+
}
|
|
1026
|
+
inSttBlock = false;
|
|
1027
|
+
}
|
|
1028
|
+
if (/^stt:\s*$/.test(line) || /^stt:\s/.test(line)) {
|
|
1029
|
+
inSttBlock = true;
|
|
1030
|
+
continue;
|
|
1031
|
+
}
|
|
1032
|
+
kept.push(line);
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
// Drop trailing empty lines so we can cleanly append.
|
|
1036
|
+
while (kept.length > 0 && kept[kept.length - 1].trim() === '') kept.pop();
|
|
1037
|
+
|
|
1038
|
+
if (config) {
|
|
1039
|
+
kept.push('stt:');
|
|
1040
|
+
kept.push(` provider: ${config.provider}`);
|
|
1041
|
+
kept.push(` model: ${config.model}`);
|
|
1042
|
+
kept.push(` secret_ref: ${config.secret_ref}`);
|
|
1043
|
+
if (config.api_base) kept.push(` api_base: ${config.api_base}`);
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
mkdirSync(moxxyHome, { recursive: true });
|
|
1047
|
+
writeFileSync(settingsFile, kept.join('\n') + '\n');
|
|
1048
|
+
}
|
|
1049
|
+
|
|
813
1050
|
// ---------------------------------------------------------------------------
|
|
814
1051
|
// Browser rendering helpers
|
|
815
1052
|
// ---------------------------------------------------------------------------
|
package/src/commands/settings.js
CHANGED
|
@@ -175,7 +175,103 @@ async function settingsBrowserRendering(flags) {
|
|
|
175
175
|
}
|
|
176
176
|
}
|
|
177
177
|
|
|
178
|
-
|
|
178
|
+
/**
|
|
179
|
+
* Speech-to-text (voice message) settings. Unlike network_mode and
|
|
180
|
+
* browser_rendering, STT is configured through the gateway's
|
|
181
|
+
* `/v1/settings/stt` API so the running bridge picks up the new provider
|
|
182
|
+
* without a restart AND the vault-stored API key is owned by the gateway.
|
|
183
|
+
*/
|
|
184
|
+
async function settingsStt(client, flags, positional) {
|
|
185
|
+
if (!client) {
|
|
186
|
+
throw new Error('STT commands require a running gateway. Start it with: moxxy gateway start');
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const sub = positional || 'status';
|
|
190
|
+
|
|
191
|
+
switch (sub) {
|
|
192
|
+
case 'status':
|
|
193
|
+
case 'get':
|
|
194
|
+
case 'show': {
|
|
195
|
+
const resp = await client.getSttSettings();
|
|
196
|
+
if (flags.json) {
|
|
197
|
+
console.log(JSON.stringify(resp, null, 2));
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
if (!resp.enabled) {
|
|
201
|
+
p.log.info('Voice messages: disabled.');
|
|
202
|
+
p.log.info('Enable with: moxxy settings stt enable');
|
|
203
|
+
return;
|
|
204
|
+
}
|
|
205
|
+
p.log.info('Voice messages: enabled');
|
|
206
|
+
p.log.info(` provider: ${resp.provider}`);
|
|
207
|
+
p.log.info(` model: ${resp.model}`);
|
|
208
|
+
p.log.info(` secret_ref: ${resp.secret_ref}`);
|
|
209
|
+
if (resp.api_base) p.log.info(` api_base: ${resp.api_base}`);
|
|
210
|
+
p.log.info(` max_bytes: ${resp.max_bytes}`);
|
|
211
|
+
p.log.info(` max_seconds: ${resp.max_seconds}`);
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
case 'enable':
|
|
216
|
+
case 'configure':
|
|
217
|
+
case 'set': {
|
|
218
|
+
// Non-interactive: `moxxy settings stt enable --api-key sk-... [--provider whisper] [--model whisper-1]`
|
|
219
|
+
const providerName = flags.provider || 'whisper';
|
|
220
|
+
const modelName = flags.model || 'whisper-1';
|
|
221
|
+
const apiBase = flags['api-base'] || flags.api_base || null;
|
|
222
|
+
let apiKey = flags['api-key'] || flags.api_key || null;
|
|
223
|
+
const secretRef = flags['secret-ref'] || flags.secret_ref || null;
|
|
224
|
+
|
|
225
|
+
if (!apiKey && !secretRef) {
|
|
226
|
+
if (!isInteractive()) {
|
|
227
|
+
throw new Error(
|
|
228
|
+
'Provide --api-key <key>, or --secret-ref <backend_key> to reuse an existing vault entry.',
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
const keyInput = await p.password({
|
|
232
|
+
message: 'OpenAI API key for Whisper',
|
|
233
|
+
validate: (v) => {
|
|
234
|
+
if (!v || !v.trim()) return 'API key cannot be empty';
|
|
235
|
+
},
|
|
236
|
+
});
|
|
237
|
+
if (p.isCancel(keyInput)) return;
|
|
238
|
+
apiKey = keyInput;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
const body = { provider: providerName, model: modelName };
|
|
242
|
+
if (apiKey) body.api_key = apiKey.trim();
|
|
243
|
+
if (apiBase) body.api_base = apiBase;
|
|
244
|
+
if (secretRef) body.secret_ref = secretRef;
|
|
245
|
+
|
|
246
|
+
const resp = await client.updateSttSettings(body);
|
|
247
|
+
if (flags.json) {
|
|
248
|
+
console.log(JSON.stringify(resp, null, 2));
|
|
249
|
+
} else {
|
|
250
|
+
p.log.success(`Voice messages enabled (${resp.provider}, ${resp.model}).`);
|
|
251
|
+
}
|
|
252
|
+
return;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
case 'disable':
|
|
256
|
+
case 'off':
|
|
257
|
+
case 'clear': {
|
|
258
|
+
const resp = await client.deleteSttSettings();
|
|
259
|
+
if (flags.json) {
|
|
260
|
+
console.log(JSON.stringify(resp, null, 2));
|
|
261
|
+
} else {
|
|
262
|
+
p.log.success('Voice messages disabled.');
|
|
263
|
+
}
|
|
264
|
+
return;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
default:
|
|
268
|
+
throw new Error(
|
|
269
|
+
`Unknown stt action '${sub}'. Use: status | enable [--api-key <key>] | disable`,
|
|
270
|
+
);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
export async function runSettings(client, args) {
|
|
179
275
|
const { action, flags } = parseSettingsCommand(args);
|
|
180
276
|
|
|
181
277
|
// Collect first positional arg after the action for convenience
|
|
@@ -198,6 +294,10 @@ export async function runSettings(_client, args) {
|
|
|
198
294
|
case 'browser-rendering':
|
|
199
295
|
await settingsBrowserRendering(flags);
|
|
200
296
|
break;
|
|
297
|
+
case 'stt':
|
|
298
|
+
case 'voice':
|
|
299
|
+
await settingsStt(client, flags, flags._positional);
|
|
300
|
+
break;
|
|
201
301
|
default:
|
|
202
302
|
if (isInteractive() && !action) {
|
|
203
303
|
// Interactive: show settings menu
|
|
@@ -206,18 +306,20 @@ export async function runSettings(_client, args) {
|
|
|
206
306
|
options: [
|
|
207
307
|
{ value: 'network-mode', label: 'Network mode', hint: 'safe / unsafe domain access' },
|
|
208
308
|
{ value: 'browser-rendering', label: 'Browser rendering', hint: 'headless Chrome for JS-heavy sites' },
|
|
309
|
+
{ value: 'stt', label: 'Voice (STT)', hint: 'speech-to-text provider' },
|
|
209
310
|
{ value: 'get', label: 'View all settings', hint: 'show current configuration' },
|
|
210
311
|
],
|
|
211
312
|
});
|
|
212
313
|
if (p.isCancel(selected)) return;
|
|
213
|
-
await runSettings(
|
|
314
|
+
await runSettings(client, [selected]);
|
|
214
315
|
} else {
|
|
215
316
|
throw new Error(
|
|
216
317
|
'Usage: moxxy settings <action>\n' +
|
|
217
|
-
' network-mode [safe|unsafe]
|
|
218
|
-
' browser-rendering [true|false]
|
|
219
|
-
'
|
|
220
|
-
'
|
|
318
|
+
' network-mode [safe|unsafe] Get or set network mode\n' +
|
|
319
|
+
' browser-rendering [true|false] Enable/disable headless Chrome rendering\n' +
|
|
320
|
+
' stt [status|enable|disable] [--api-key <key>] Configure voice messages (speech-to-text)\n' +
|
|
321
|
+
' get [--key <k>] View settings\n' +
|
|
322
|
+
' set --key <k> --value <v> Set a setting'
|
|
221
323
|
);
|
|
222
324
|
}
|
|
223
325
|
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import { useReducer, useCallback } from 'react';
|
|
1
|
+
import { useReducer, useCallback, useRef } from 'react';
|
|
2
2
|
import { SLASH_COMMANDS } from '../slash-commands.js';
|
|
3
|
+
import { startRecording } from '../voice-recorder.js';
|
|
3
4
|
|
|
4
5
|
const INITIAL_STATE = { type: 'idle' };
|
|
5
6
|
|
|
@@ -23,6 +24,8 @@ function reducer(state, action) {
|
|
|
23
24
|
return { type: 'mcp_test_id' };
|
|
24
25
|
case 'template_assign_slug':
|
|
25
26
|
return { type: 'template_assign_slug' };
|
|
27
|
+
case 'voice_recording':
|
|
28
|
+
return { type: 'voice_recording' };
|
|
26
29
|
case 'reset':
|
|
27
30
|
return INITIAL_STATE;
|
|
28
31
|
default:
|
|
@@ -54,9 +57,49 @@ export function useCommandHandler({
|
|
|
54
57
|
onOpenTemplateAssignWizard,
|
|
55
58
|
}) {
|
|
56
59
|
const [twoStep, dispatch] = useReducer(reducer, INITIAL_STATE);
|
|
60
|
+
const voiceHandleRef = useRef(null);
|
|
57
61
|
|
|
58
62
|
const handleSubmit = useCallback(async (text) => {
|
|
59
63
|
const task = text.trim().replace(/^\/{2,}/, '/');
|
|
64
|
+
|
|
65
|
+
// While a recording is active, ANY submit (including bare Enter) stops
|
|
66
|
+
// it and ships the clip. This must run before the empty-text early return
|
|
67
|
+
// below so hitting Enter with no text still ends the capture.
|
|
68
|
+
if (twoStep.type === 'voice_recording') {
|
|
69
|
+
const handle = voiceHandleRef.current;
|
|
70
|
+
dispatch({ type: 'reset' });
|
|
71
|
+
voiceHandleRef.current = null;
|
|
72
|
+
if (!handle) {
|
|
73
|
+
eventsHandler.addSystemMessage('No active recording.');
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
try {
|
|
77
|
+
const clip = await handle.stop();
|
|
78
|
+
eventsHandler.addSystemMessage('Transcribing voice message…');
|
|
79
|
+
if (!agent) {
|
|
80
|
+
eventsHandler.addSystemMessage('No agent connected. Cannot run task.');
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
try {
|
|
84
|
+
const result = await client.startRunWithAudio(agent.name, clip);
|
|
85
|
+
const transcript = (result && result.transcript) || '[voice]';
|
|
86
|
+
eventsHandler.addUserMessage(transcript);
|
|
87
|
+
if (onAgentUpdate) onAgentUpdate({ status: 'running' });
|
|
88
|
+
} catch (err) {
|
|
89
|
+
if (err.isGatewayDown) {
|
|
90
|
+
eventsHandler.addSystemMessage(err.message);
|
|
91
|
+
} else {
|
|
92
|
+
eventsHandler.addSystemMessage(`Voice error: ${err.message}`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
} catch (err) {
|
|
96
|
+
eventsHandler.addSystemMessage(`Recording failed: ${err.message}`);
|
|
97
|
+
} finally {
|
|
98
|
+
handle.cleanup();
|
|
99
|
+
}
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
|
|
60
103
|
if (!task) return;
|
|
61
104
|
|
|
62
105
|
// Pending ask: agent asked for user input
|
|
@@ -419,6 +462,25 @@ export function useCommandHandler({
|
|
|
419
462
|
}
|
|
420
463
|
return;
|
|
421
464
|
}
|
|
465
|
+
if (task === '/voice') {
|
|
466
|
+
if (voiceHandleRef.current) {
|
|
467
|
+
// Defensive: treat a second /voice as a stop even if state drifted.
|
|
468
|
+
dispatch({ type: 'voice_recording' });
|
|
469
|
+
return;
|
|
470
|
+
}
|
|
471
|
+
try {
|
|
472
|
+
const handle = await startRecording();
|
|
473
|
+
voiceHandleRef.current = handle;
|
|
474
|
+
dispatch({ type: 'voice_recording' });
|
|
475
|
+
eventsHandler.addSystemMessage(
|
|
476
|
+
`Recording (${handle.tool})… press Enter or /voice again to stop.`,
|
|
477
|
+
);
|
|
478
|
+
} catch (err) {
|
|
479
|
+
eventsHandler.addSystemMessage(`Cannot record voice: ${err.message}`);
|
|
480
|
+
}
|
|
481
|
+
return;
|
|
482
|
+
}
|
|
483
|
+
|
|
422
484
|
if (task === '/template clear') {
|
|
423
485
|
try {
|
|
424
486
|
await client.setAgentTemplate(agentId, null);
|
|
@@ -9,6 +9,7 @@ export const SLASH_COMMANDS = [
|
|
|
9
9
|
{ name: '/vault', description: 'Open vault actions', aliases: ['/vault delete'] },
|
|
10
10
|
{ name: '/mcp', description: 'Open MCP actions', aliases: [] },
|
|
11
11
|
{ name: '/template', description: 'Open template actions', aliases: [] },
|
|
12
|
+
{ name: '/voice', description: 'Record a voice message (needs sox or ffmpeg)', aliases: [] },
|
|
12
13
|
];
|
|
13
14
|
|
|
14
15
|
export function matchCommands(input) {
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { spawn } from 'node:child_process';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import os from 'node:os';
|
|
4
|
+
import path from 'node:path';
|
|
5
|
+
import { promisify } from 'node:util';
|
|
6
|
+
import { execFile } from 'node:child_process';
|
|
7
|
+
|
|
8
|
+
const execFileP = promisify(execFile);
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Check whether a binary is on PATH. Returns the absolute path, or null.
|
|
12
|
+
*/
|
|
13
|
+
async function which(name) {
|
|
14
|
+
try {
|
|
15
|
+
const { stdout } = await execFileP('which', [name]);
|
|
16
|
+
const p = stdout.trim();
|
|
17
|
+
return p || null;
|
|
18
|
+
} catch {
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Detect an available recording tool. Prefers `sox` (via `rec`) because it
|
|
25
|
+
* speaks WAV out of the box and handles Ctrl-C gracefully. Falls back to
|
|
26
|
+
* `ffmpeg` with a platform-appropriate input device. Returns `null` if
|
|
27
|
+
* neither is present.
|
|
28
|
+
*/
|
|
29
|
+
export async function detectRecorder() {
|
|
30
|
+
const rec = await which('rec');
|
|
31
|
+
if (rec) return { tool: 'rec', bin: rec };
|
|
32
|
+
const sox = await which('sox');
|
|
33
|
+
if (sox) return { tool: 'sox', bin: sox };
|
|
34
|
+
const ffmpeg = await which('ffmpeg');
|
|
35
|
+
if (ffmpeg) return { tool: 'ffmpeg', bin: ffmpeg };
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function ffmpegArgs(outPath) {
|
|
40
|
+
const platform = process.platform;
|
|
41
|
+
if (platform === 'darwin') {
|
|
42
|
+
// `avfoundation` default audio input is `:0`.
|
|
43
|
+
return ['-loglevel', 'error', '-f', 'avfoundation', '-i', ':0', '-ac', '1', '-ar', '16000', '-y', outPath];
|
|
44
|
+
}
|
|
45
|
+
// Linux: assume ALSA `default` — user can symlink their own if needed.
|
|
46
|
+
return ['-loglevel', 'error', '-f', 'alsa', '-i', 'default', '-ac', '1', '-ar', '16000', '-y', outPath];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Start a recording. Returns a handle with `stop()` that resolves to
|
|
51
|
+
* `{ path, data, mime }`. The caller owns cleanup of the temp file via
|
|
52
|
+
* `cleanup()`.
|
|
53
|
+
*
|
|
54
|
+
* The audio is written to a platform temp file so that even if the recorder
|
|
55
|
+
* dies mid-stream we never lose the buffer.
|
|
56
|
+
*/
|
|
57
|
+
export async function startRecording() {
|
|
58
|
+
const recorder = await detectRecorder();
|
|
59
|
+
if (!recorder) {
|
|
60
|
+
throw new Error('No recorder found. Install `sox` (recommended) or `ffmpeg`.');
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'moxxy-voice-'));
|
|
64
|
+
const outPath = path.join(tmpDir, 'voice.wav');
|
|
65
|
+
|
|
66
|
+
let args;
|
|
67
|
+
if (recorder.tool === 'rec' || recorder.tool === 'sox') {
|
|
68
|
+
// `rec` is sox with sensible defaults; `sox` requires `-d` for default input.
|
|
69
|
+
args = recorder.tool === 'rec'
|
|
70
|
+
? ['-q', '-c', '1', '-r', '16000', outPath]
|
|
71
|
+
: ['-q', '-d', '-c', '1', '-r', '16000', outPath];
|
|
72
|
+
} else {
|
|
73
|
+
args = ffmpegArgs(outPath);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const child = spawn(recorder.bin, args, { stdio: ['ignore', 'ignore', 'pipe'] });
|
|
77
|
+
let stderr = '';
|
|
78
|
+
child.stderr.on('data', chunk => {
|
|
79
|
+
stderr += chunk.toString();
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
let exited = false;
|
|
83
|
+
const exitPromise = new Promise((resolve) => {
|
|
84
|
+
child.on('exit', (code, signal) => {
|
|
85
|
+
exited = true;
|
|
86
|
+
resolve({ code, signal });
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
tool: recorder.tool,
|
|
92
|
+
outPath,
|
|
93
|
+
async stop() {
|
|
94
|
+
if (!exited) {
|
|
95
|
+
// SIGINT is important: ffmpeg and sox both flush the output file
|
|
96
|
+
// cleanly on SIGINT. SIGTERM/KILL can leave a truncated WAV header.
|
|
97
|
+
try { child.kill('SIGINT'); } catch {}
|
|
98
|
+
}
|
|
99
|
+
await exitPromise;
|
|
100
|
+
|
|
101
|
+
if (!fs.existsSync(outPath)) {
|
|
102
|
+
throw new Error(`Recorder produced no output file. stderr: ${stderr.trim() || '<empty>'}`);
|
|
103
|
+
}
|
|
104
|
+
const data = fs.readFileSync(outPath);
|
|
105
|
+
if (data.length < 44) {
|
|
106
|
+
// 44 bytes is the minimum WAV header.
|
|
107
|
+
throw new Error('Recording too short or empty.');
|
|
108
|
+
}
|
|
109
|
+
return { path: outPath, data, mime: 'audio/wav', filename: 'voice.wav' };
|
|
110
|
+
},
|
|
111
|
+
cleanup() {
|
|
112
|
+
try {
|
|
113
|
+
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
114
|
+
} catch {}
|
|
115
|
+
},
|
|
116
|
+
};
|
|
117
|
+
}
|