convoai 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,709 @@
1
+ import { createServer as createHttpServer } from 'node:http';
2
+ import { WebSocketServer } from 'ws';
3
+ import { readFileSync } from 'node:fs';
4
+ import { fileURLToPath } from 'node:url';
5
+ import { dirname, join } from 'node:path';
6
+ import { execSync } from 'node:child_process';
7
+ import chalk from 'chalk';
8
+ import { resolveConfig, loadConfig, saveConfig } from '../config/manager.js';
9
+ import { getAgentAPI } from './agent/_helpers.js';
10
+ import { runPanel } from './agent/panel.js';
11
+ import { generateRtcToken } from '../utils/token.js';
12
+ import { findChrome } from '../utils/find-chrome.js';
13
+ import { withSpinner } from '../ui/spinner.js';
14
+ import { printSuccess, printError, printHint } from '../ui/output.js';
15
+ import { printKeyValue } from '../ui/table.js';
16
+ import { handleError } from '../utils/errors.js';
17
+ import { track } from '../utils/telemetry.js';
18
+ import { getStrings } from '../ui/i18n.js';
19
+ import { LLM_PROVIDERS, TTS_PROVIDERS, ASR_PROVIDERS, ASR_LANGUAGES, } from '../providers/catalog.js';
20
+ // ─── __dirname for ESM ──────────────────────────────────────────────────────
21
+ const __filename = fileURLToPath(import.meta.url);
22
+ const __dirname = dirname(__filename);
23
+ // ─── Greeting by Language ───────────────────────────────────────────────────
24
+ const GREETINGS = {
25
+ 'zh-CN': '\u4F60\u597D\uFF0C\u6211\u662F\u58F0\u7F51 ConvoAI \u8BED\u97F3\u52A9\u624B\uFF0C\u6709\u4EC0\u4E48\u53EF\u4EE5\u5E2E\u4F60\u7684\u5417\uFF1F',
26
+ 'zh-HK': '\u4F60\u597D\uFF0C\u6211\u662F\u8072\u7DB2 ConvoAI \u8A9E\u97F3\u52A9\u624B\uFF0C\u6709\u4EC0\u9EBC\u53EF\u4EE5\u5E6B\u4F60\u7684\u55CE\uFF1F',
27
+ 'zh-TW': '\u4F60\u597D\uFF0C\u6211\u662F\u8072\u7DB2 ConvoAI \u8A9E\u97F3\u52A9\u624B\uFF0C\u6709\u4EC0\u9EBC\u53EF\u4EE5\u5E6B\u4F60\u7684\u55CE\uFF1F',
28
+ 'en-US': 'Hi, I\'m your Agora ConvoAI voice assistant. How can I help you?',
29
+ 'ja-JP': '\u3053\u3093\u306B\u3061\u306F\u3001Agora ConvoAI \u97F3\u58F0\u30A2\u30B7\u30B9\u30BF\u30F3\u30C8\u3067\u3059\u3002\u4F55\u304B\u304A\u624B\u4F1D\u3044\u3067\u304D\u307E\u3059\u304B\uFF1F',
30
+ 'ko-KR': '\uC548\uB155\uD558\uC138\uC694, Agora ConvoAI \uC74C\uC131 \uC5B4\uC2DC\uC2A4\uD134\uD2B8\uC785\uB2C8\uB2E4. \uBB34\uC5C7\uC744 \uB3C4\uC640\uB4DC\uB9B4\uAE4C\uC694?',
31
+ };
32
+ function getGreeting(language) {
33
+ return GREETINGS[language] ?? GREETINGS['en-US'];
34
+ }
35
+ // ─── Locate HTML Clients ────────────────────────────────────────────────────
36
+ function findClientHtml() {
37
+ let dir = __dirname;
38
+ for (let i = 0; i < 6; i++) {
39
+ const candidate = join(dir, 'src', 'web', 'client.html');
40
+ try {
41
+ readFileSync(candidate);
42
+ return candidate;
43
+ }
44
+ catch { /* keep looking */ }
45
+ dir = dirname(dir);
46
+ }
47
+ throw new Error('Could not find web client HTML. Reinstall the package.');
48
+ }
49
+ function findChatClientHtml() {
50
+ let dir = __dirname;
51
+ for (let i = 0; i < 6; i++) {
52
+ const candidate = join(dir, 'src', 'web', 'chat-client.html');
53
+ try {
54
+ readFileSync(candidate);
55
+ return candidate;
56
+ }
57
+ catch { /* keep looking */ }
58
+ dir = dirname(dir);
59
+ }
60
+ throw new Error('Could not find chat-client.html. Reinstall the package.');
61
+ }
62
+ // ─── LLM Provider Ordering ─────────────────────────────────────────────────
63
+ const LLM_ORDER_CN = [
64
+ 'dashscope', 'deepseek', 'openai', 'groq', 'anthropic',
65
+ 'gemini', 'azure', 'bedrock', 'dify', 'custom',
66
+ ];
67
+ const LLM_ORDER_EN = [
68
+ 'openai', 'groq', 'anthropic', 'gemini', 'dashscope',
69
+ 'deepseek', 'azure', 'bedrock', 'dify', 'custom',
70
+ ];
71
+ const LLM_CN_NAMES = {
72
+ dashscope: '\u963F\u91CC\u901A\u4E49\u5343\u95EE',
73
+ deepseek: 'DeepSeek',
74
+ openai: 'OpenAI',
75
+ groq: 'Groq',
76
+ anthropic: 'Anthropic',
77
+ gemini: 'Gemini',
78
+ azure: 'Azure',
79
+ bedrock: 'Bedrock',
80
+ dify: 'Dify',
81
+ custom: 'Custom',
82
+ };
83
+ function getOrderedLlmChoices(lang) {
84
+ const order = lang === 'cn' ? LLM_ORDER_CN : LLM_ORDER_EN;
85
+ return order
86
+ .map((value) => {
87
+ const provider = LLM_PROVIDERS.find((p) => p.value === value);
88
+ if (!provider)
89
+ return null;
90
+ const name = lang === 'cn' ? (LLM_CN_NAMES[value] ?? provider.name) : provider.name;
91
+ return { name, value: provider.value };
92
+ })
93
+ .filter((c) => c !== null);
94
+ }
95
+ // ─── Command Registration ───────────────────────────────────────────────────
96
+ export function registerGo(program) {
97
+ program
98
+ .command('go')
99
+ .description('Start a voice conversation (uses last config)')
100
+ .option('-c, --channel <name>', 'Override channel name')
101
+ .option('--setup', 'Re-configure ASR/LLM/TTS before starting')
102
+ .option('--model <model>', 'One-time model override')
103
+ .option('--tts <vendor>', 'One-time TTS override')
104
+ .option('--asr <vendor>', 'One-time ASR override')
105
+ .option('--browser', 'Force browser mode')
106
+ .option('--profile <name>', 'Config profile')
107
+ .action(async (opts) => {
108
+ try {
109
+ await goAction(opts);
110
+ }
111
+ catch (error) {
112
+ handleError(error);
113
+ }
114
+ });
115
+ }
116
+ // ─── Action ─────────────────────────────────────────────────────────────────
117
+ async function goAction(opts) {
118
+ // ═════════════════════════════════════════════════════════════════════════
119
+ // Step 1: Validate config
120
+ // ═════════════════════════════════════════════════════════════════════════
121
+ const config = resolveConfig(opts.profile);
122
+ const configObj = loadConfig();
123
+ const profileName = opts.profile ?? configObj.default_profile ?? 'default';
124
+ const profile = configObj.profiles?.[profileName] ?? {};
125
+ const lang = config.region === 'cn' ? 'cn' : 'global';
126
+ const str = getStrings(lang);
127
+ // Check Agora credentials
128
+ if (!config.app_id || !config.customer_id || !config.customer_secret) {
129
+ printError(lang === 'cn'
130
+ ? '\u672A\u914D\u7F6E Agora \u51ED\u8BC1\u3002'
131
+ : 'No Agora credentials configured.');
132
+ printHint(lang === 'cn'
133
+ ? '\u8FD0\u884C: convoai quickstart'
134
+ : 'Run: convoai quickstart');
135
+ process.exit(1);
136
+ }
137
+ // Check App Certificate
138
+ if (!configObj.app_certificate && !process.env.AGORA_APP_CERTIFICATE) {
139
+ printError(lang === 'cn'
140
+ ? '\u672A\u914D\u7F6E App Certificate\u3002'
141
+ : 'No App Certificate configured.');
142
+ printHint(lang === 'cn'
143
+ ? '\u8FD0\u884C: convoai config set app_certificate <cert>'
144
+ : 'Run: convoai config set app_certificate <cert>');
145
+ process.exit(1);
146
+ }
147
+ // Check LLM
148
+ if (!profile.llm?.url && !profile.llm?.api_key && !config.llm?.url && !config.llm?.api_key) {
149
+ printError(lang === 'cn'
150
+ ? '\u672A\u914D\u7F6E LLM\u3002'
151
+ : 'No LLM configured.');
152
+ printHint(lang === 'cn'
153
+ ? '\u8FD0\u884C: convoai go --setup'
154
+ : 'Run: convoai go --setup');
155
+ process.exit(1);
156
+ }
157
+ // Check TTS
158
+ if (!profile.tts?.vendor && !config.tts?.vendor) {
159
+ printError(lang === 'cn'
160
+ ? '\u672A\u914D\u7F6E TTS\u3002'
161
+ : 'No TTS configured.');
162
+ printHint(lang === 'cn'
163
+ ? '\u8FD0\u884C: convoai go --setup'
164
+ : 'Run: convoai go --setup');
165
+ process.exit(1);
166
+ }
167
+ // ═════════════════════════════════════════════════════════════════════════
168
+ // Step 2: --setup mode (optional inline config)
169
+ // ═════════════════════════════════════════════════════════════════════════
170
+ if (opts.setup) {
171
+ await runSetupFlow(configObj, profileName, lang, str);
172
+ // Reload config after setup
173
+ const updatedConfig = loadConfig();
174
+ const updatedProfile = updatedConfig.profiles?.[profileName] ?? {};
175
+ Object.assign(profile, updatedProfile);
176
+ Object.assign(config, resolveConfig(opts.profile));
177
+ }
178
+ // ═════════════════════════════════════════════════════════════════════════
179
+ // Step 3: Apply one-time overrides (not saved to config)
180
+ // ═════════════════════════════════════════════════════════════════════════
181
+ // Work with a mutable copy for overrides
182
+ const effectiveLlm = { ...config.llm };
183
+ const effectiveTts = { ...config.tts };
184
+ const effectiveAsr = { ...config.asr };
185
+ if (opts.model) {
186
+ if (!effectiveLlm.params)
187
+ effectiveLlm.params = {};
188
+ effectiveLlm.params.model = opts.model;
189
+ }
190
+ if (opts.tts) {
191
+ effectiveTts.vendor = opts.tts;
192
+ }
193
+ if (opts.asr) {
194
+ effectiveAsr.vendor = opts.asr;
195
+ }
196
+ // ═════════════════════════════════════════════════════════════════════════
197
+ // Step 4: Cleanup leftover agents + ports
198
+ // ═════════════════════════════════════════════════════════════════════════
199
+ // Kill leftover processes on ports 3210 and 3211
200
+ try {
201
+ execSync('lsof -ti:3210,3211 | xargs kill -9 2>/dev/null', { stdio: 'ignore' });
202
+ await new Promise(r => setTimeout(r, 300));
203
+ }
204
+ catch { /* no leftover processes */ }
205
+ // Stop any running agents
206
+ const api = getAgentAPI(opts.profile);
207
+ try {
208
+ const existing = await api.list({ state: 2, limit: 10 });
209
+ if (existing.data.list.length > 0) {
210
+ await withSpinner(lang === 'cn'
211
+ ? `\u6B63\u5728\u6E05\u7406 ${existing.data.list.length} \u4E2A\u6B8B\u7559 Agent...`
212
+ : `Cleaning up ${existing.data.list.length} leftover agent(s)...`, async () => {
213
+ for (const a of existing.data.list) {
214
+ try {
215
+ await api.stop(a.agent_id);
216
+ }
217
+ catch { /* */ }
218
+ }
219
+ });
220
+ }
221
+ }
222
+ catch { /* */ }
223
+ // ═════════════════════════════════════════════════════════════════════════
224
+ // Step 5: Generate channel + tokens
225
+ // ═════════════════════════════════════════════════════════════════════════
226
+ const channelName = opts.channel ?? `go-${Date.now().toString(36)}`;
227
+ const agentUid = 0;
228
+ const clientUid = 12345;
229
+ const agentToken = await generateRtcToken(channelName, agentUid);
230
+ const clientToken = await generateRtcToken(channelName, clientUid);
231
+ if (!agentToken) {
232
+ printError(lang === 'cn'
233
+ ? 'RTC Token \u751F\u6210\u5931\u8D25\u3002\u8BF7\u68C0\u67E5 app_certificate\u3002'
234
+ : 'RTC token generation failed. Check app_certificate.');
235
+ printHint(lang === 'cn'
236
+ ? '\u8FD0\u884C: convoai config set app_certificate <cert>'
237
+ : 'Run: convoai config set app_certificate <cert>');
238
+ process.exit(1);
239
+ }
240
+ // ═════════════════════════════════════════════════════════════════════════
241
+ // Step 6: Build agent request
242
+ // ═════════════════════════════════════════════════════════════════════════
243
+ // Auto-greeting based on ASR language
244
+ const asrLang = effectiveAsr?.language ?? effectiveAsr?.params?.language ?? 'en-US';
245
+ const greeting = getGreeting(asrLang);
246
+ const llmForRequest = { ...effectiveLlm };
247
+ llmForRequest.greeting_message = greeting;
248
+ // ASR fallback: if no vendor configured, use ares/zh-CN
249
+ const asrForRequest = effectiveAsr?.vendor
250
+ ? effectiveAsr
251
+ : { vendor: 'ares', language: 'zh-CN' };
252
+ const request = {
253
+ name: `go-${Date.now()}-${Math.random().toString(36).slice(2, 6)}`,
254
+ properties: {
255
+ channel: channelName,
256
+ token: agentToken,
257
+ agent_rtc_uid: String(agentUid),
258
+ remote_rtc_uids: ['*'],
259
+ idle_timeout: 600,
260
+ llm: llmForRequest,
261
+ tts: effectiveTts,
262
+ asr: asrForRequest,
263
+ turn_detection: {
264
+ silence_duration_ms: 1000,
265
+ },
266
+ parameters: {
267
+ enable_metrics: true,
268
+ },
269
+ },
270
+ };
271
+ // ═════════════════════════════════════════════════════════════════════════
272
+ // Step 7: Start agent
273
+ // ═════════════════════════════════════════════════════════════════════════
274
+ const result = await withSpinner(lang === 'cn' ? '\u6B63\u5728\u542F\u52A8 Agent...' : 'Starting agent...', () => api.start(request));
275
+ printSuccess(lang === 'cn' ? 'Agent \u5DF2\u542F\u52A8\u3002' : 'Agent started.');
276
+ track('go');
277
+ printKeyValue([
278
+ ['Agent ID', result.agent_id],
279
+ ['Channel', channelName],
280
+ ['Status', result.status],
281
+ ]);
282
+ // ═════════════════════════════════════════════════════════════════════════
283
+ // Step 8: Detect Chrome -> terminal or browser mode
284
+ // ═════════════════════════════════════════════════════════════════════════
285
+ const chromePath = findChrome();
286
+ const useBrowser = opts.browser || !chromePath;
287
+ let server;
288
+ let browser = null;
289
+ if (!useBrowser && chromePath) {
290
+ // ── Terminal mode: headless Chrome with audio ──────────────────────
291
+ const htmlPath = findChatClientHtml();
292
+ const html = readFileSync(htmlPath, 'utf-8');
293
+ const httpPort = 3210;
294
+ const wsPort = 3211;
295
+ server = createHttpServer((_, res) => {
296
+ res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' });
297
+ res.end(html);
298
+ });
299
+ await new Promise((resolve, reject) => {
300
+ server.listen(httpPort, () => resolve());
301
+ server.on('error', reject);
302
+ });
303
+ const wss = new WebSocketServer({ port: wsPort });
304
+ // Launch puppeteer-core headless=false for audio support
305
+ const puppeteer = await import('puppeteer-core');
306
+ const launched = await puppeteer.default.launch({
307
+ executablePath: chromePath,
308
+ headless: false,
309
+ args: [
310
+ '--use-fake-ui-for-media-stream',
311
+ '--autoplay-policy=no-user-gesture-required',
312
+ '--no-sandbox',
313
+ '--window-size=1,1',
314
+ '--window-position=-2000,-2000',
315
+ '--enable-features=WebRtcAecAudioProcessing',
316
+ ],
317
+ });
318
+ browser = launched;
319
+ const page = await launched.newPage();
320
+ const context = launched.defaultBrowserContext();
321
+ await context.overridePermissions(`http://localhost:${httpPort}`, ['microphone']);
322
+ const params = new URLSearchParams({
323
+ appId: config.app_id,
324
+ channel: channelName,
325
+ token: clientToken ?? '',
326
+ uid: String(clientUid),
327
+ wsPort: String(wsPort),
328
+ });
329
+ await page.goto(`http://localhost:${httpPort}?${params}`);
330
+ // macOS: hide Chrome window and restore terminal focus
331
+ if (process.platform === 'darwin') {
332
+ try {
333
+ execSync(`osascript -e 'tell application "System Events"
334
+ set visible of process "Google Chrome" to false
335
+ end tell' 2>/dev/null`, { stdio: 'ignore' });
336
+ execSync(`osascript -e 'tell application "System Events"
337
+ set frontProcess to first process whose frontmost is false and visible is true and name is not "Google Chrome"
338
+ set frontmost of frontProcess to true
339
+ end tell' 2>/dev/null`, { stdio: 'ignore' });
340
+ }
341
+ catch { /* osascript may fail, not critical */ }
342
+ }
343
+ console.log('');
344
+ printSuccess(lang === 'cn'
345
+ ? '\u8BED\u97F3\u5BF9\u8BDD\u5DF2\u5F00\u542F\uFF01\u76F4\u63A5\u5F00\u53E3\u8BF4\u8BDD\u5373\u53EF\u3002'
346
+ : 'Voice chat is live! Start speaking.');
347
+ console.log('');
348
+ // ── Enter panel ─────────────────────────────────────────────────────
349
+ await runPanel({
350
+ api,
351
+ agentId: result.agent_id,
352
+ channel: channelName,
353
+ lang,
354
+ config: profile,
355
+ onExit: async () => {
356
+ wss.close();
357
+ server.close();
358
+ if (browser)
359
+ try {
360
+ await browser.close();
361
+ }
362
+ catch { /* */ }
363
+ try {
364
+ await api.stop(result.agent_id);
365
+ }
366
+ catch { /* */ }
367
+ },
368
+ });
369
+ }
370
+ else {
371
+ // ── Browser mode: open client.html in default browser ──────────────
372
+ const htmlPath = findClientHtml();
373
+ const html = readFileSync(htmlPath, 'utf-8');
374
+ const port = 3210;
375
+ server = createHttpServer((_, res) => {
376
+ res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' });
377
+ res.end(html);
378
+ });
379
+ await new Promise((resolve, reject) => {
380
+ server.listen(port, () => resolve());
381
+ server.on('error', reject);
382
+ });
383
+ const params = new URLSearchParams({
384
+ appId: config.app_id,
385
+ channel: channelName,
386
+ token: clientToken ?? '',
387
+ uid: String(clientUid),
388
+ });
389
+ const url = `http://localhost:${port}?${params}`;
390
+ console.log('');
391
+ console.log(chalk.cyan(' Voice chat: ') + chalk.bold(url));
392
+ console.log('');
393
+ try {
394
+ const cmd = process.platform === 'darwin' ? 'open' : process.platform === 'win32' ? 'start' : 'xdg-open';
395
+ execSync(`${cmd} "${url}"`);
396
+ printSuccess(lang === 'cn'
397
+ ? '\u6D4F\u89C8\u5668\u5DF2\u6253\u5F00\u3002\u5141\u8BB8\u9EA6\u514B\u98CE\u540E\u5F00\u59CB\u5BF9\u8BDD\uFF01'
398
+ : 'Browser opened. Allow microphone and start talking!');
399
+ }
400
+ catch {
401
+ console.log(chalk.dim(lang === 'cn'
402
+ ? ' \u8BF7\u5728\u6D4F\u89C8\u5668\u4E2D\u6253\u5F00\u4E0A\u65B9\u94FE\u63A5\u3002'
403
+ : ' Open the URL above in your browser.'));
404
+ }
405
+ // ── Enter panel ─────────────────────────────────────────────────────
406
+ await runPanel({
407
+ api,
408
+ agentId: result.agent_id,
409
+ channel: channelName,
410
+ lang,
411
+ config: profile,
412
+ onExit: async () => {
413
+ server.close();
414
+ try {
415
+ await api.stop(result.agent_id);
416
+ }
417
+ catch { /* */ }
418
+ },
419
+ });
420
+ }
421
+ }
422
+ // ═══════════════════════════════════════════════════════════════════════════
423
+ // --setup flow: inline ASR/LLM/TTS configuration
424
+ // ═══════════════════════════════════════════════════════════════════════════
425
+ async function runSetupFlow(configObj, profileName, lang, str) {
426
+ const { default: inquirer } = await import('inquirer');
427
+ if (!configObj.profiles)
428
+ configObj.profiles = {};
429
+ const profile = configObj.profiles[profileName] ?? {};
430
+ // ── ASR ──────────────────────────────────────────────────────────────
431
+ console.log('');
432
+ console.log(chalk.bold(` ${str.step2.emoji} ${str.step2.title}`));
433
+ console.log(chalk.dim(` ${str.step2.subtitle}`));
434
+ console.log('');
435
+ const asrChoices = ASR_PROVIDERS.map((p) => {
436
+ let label = p.name;
437
+ if (p.vendor === 'ares')
438
+ label += chalk.dim(` \u2014 ${str.asrRecommend}`);
439
+ else if (p.note)
440
+ label += chalk.dim(` \u2014 ${p.note}`);
441
+ if (p.beta)
442
+ label += chalk.dim(' (Beta)');
443
+ return { name: label, value: p.vendor };
444
+ });
445
+ const { vendor: asrVendor } = await inquirer.prompt([
446
+ {
447
+ type: 'list',
448
+ name: 'vendor',
449
+ message: str.asrProvider + ':',
450
+ choices: asrChoices,
451
+ default: profile.asr?.vendor ?? 'ares',
452
+ },
453
+ ]);
454
+ const selectedAsr = ASR_PROVIDERS.find((p) => p.vendor === asrVendor);
455
+ // API Key (not needed for ARES)
456
+ let asrKey;
457
+ if (asrVendor !== 'ares') {
458
+ const { key } = await inquirer.prompt([
459
+ {
460
+ type: 'password',
461
+ name: 'key',
462
+ message: str.apiKey + ':',
463
+ mask: '*',
464
+ validate: (v) => v.trim().length > 0 || 'Required',
465
+ },
466
+ ]);
467
+ asrKey = key;
468
+ }
469
+ // Microsoft ASR region
470
+ let asrRegion;
471
+ if (selectedAsr.requiresRegion) {
472
+ const { region } = await inquirer.prompt([
473
+ {
474
+ type: 'input',
475
+ name: 'region',
476
+ message: 'Azure ASR Region:',
477
+ default: 'eastus',
478
+ validate: (v) => v.trim().length > 0 || 'Required',
479
+ },
480
+ ]);
481
+ asrRegion = region;
482
+ }
483
+ // Language selection
484
+ const defaultLang = lang === 'cn' ? 'zh-CN' : 'en-US';
485
+ const langChoices = ASR_LANGUAGES.map((l) => ({ name: l.name, value: l.value }));
486
+ const { language: asrLanguage } = await inquirer.prompt([
487
+ {
488
+ type: 'list',
489
+ name: 'language',
490
+ message: str.language + ':',
491
+ choices: langChoices,
492
+ default: profile.asr?.language ?? defaultLang,
493
+ },
494
+ ]);
495
+ // Build ASR config
496
+ if (asrVendor === 'ares') {
497
+ profile.asr = { vendor: 'ares', language: asrLanguage };
498
+ }
499
+ else {
500
+ const asrParams = { key: asrKey };
501
+ if (asrRegion)
502
+ asrParams.region = asrRegion;
503
+ if (asrLanguage)
504
+ asrParams.language = asrLanguage;
505
+ if (selectedAsr.defaultParams) {
506
+ for (const [k, v] of Object.entries(selectedAsr.defaultParams)) {
507
+ if (!(k in asrParams))
508
+ asrParams[k] = v;
509
+ }
510
+ }
511
+ profile.asr = { vendor: asrVendor, language: asrLanguage, params: asrParams };
512
+ }
513
+ configObj.profiles[profileName] = profile;
514
+ saveConfig(configObj);
515
+ printSuccess(`${str.asrConfigured}: ${asrVendor} (${asrLanguage})`);
516
+ // ── LLM ─────────────────────────────────────────────────────────────
517
+ console.log('');
518
+ console.log(chalk.bold(` ${str.step3.emoji} ${str.step3.title}`));
519
+ console.log(chalk.dim(` ${str.step3.subtitle}`));
520
+ console.log('');
521
+ const llmChoices = getOrderedLlmChoices(lang);
522
+ const { provider: llmProvider } = await inquirer.prompt([
523
+ {
524
+ type: 'list',
525
+ name: 'provider',
526
+ message: str.llmProvider + ':',
527
+ choices: llmChoices,
528
+ },
529
+ ]);
530
+ const selectedLlm = LLM_PROVIDERS.find((p) => p.value === llmProvider);
531
+ // API Key
532
+ const { apiKey: llmApiKey } = await inquirer.prompt([
533
+ {
534
+ type: 'password',
535
+ name: 'apiKey',
536
+ message: str.apiKey + ':',
537
+ mask: '*',
538
+ validate: (v) => v.trim().length > 0 || 'Required',
539
+ },
540
+ ]);
541
+ // Model
542
+ let llmModel;
543
+ if (selectedLlm.models.length > 0) {
544
+ const { model } = await inquirer.prompt([
545
+ {
546
+ type: 'list',
547
+ name: 'model',
548
+ message: str.model + ':',
549
+ choices: selectedLlm.models,
550
+ default: selectedLlm.defaultModel,
551
+ },
552
+ ]);
553
+ llmModel = model;
554
+ }
555
+ else {
556
+ const { model } = await inquirer.prompt([
557
+ {
558
+ type: 'input',
559
+ name: 'model',
560
+ message: str.model + ':',
561
+ default: selectedLlm.defaultModel || undefined,
562
+ validate: (v) => v.trim().length > 0 || 'Required',
563
+ },
564
+ ]);
565
+ llmModel = model;
566
+ }
567
+ // URL
568
+ let llmUrl;
569
+ if (selectedLlm.url) {
570
+ const { url } = await inquirer.prompt([
571
+ {
572
+ type: 'input',
573
+ name: 'url',
574
+ message: 'API URL:',
575
+ default: selectedLlm.url,
576
+ validate: (v) => v.trim().length > 0 || 'Required',
577
+ },
578
+ ]);
579
+ llmUrl = url;
580
+ }
581
+ else {
582
+ const { url } = await inquirer.prompt([
583
+ {
584
+ type: 'input',
585
+ name: 'url',
586
+ message: 'API URL:',
587
+ validate: (v) => v.trim().length > 0 || 'Required',
588
+ },
589
+ ]);
590
+ llmUrl = url;
591
+ }
592
+ // Build LLM config based on provider style
593
+ const llmConfig = {};
594
+ if (selectedLlm.style === 'gemini') {
595
+ const resolvedUrl = llmUrl
596
+ .replace('{model}', llmModel)
597
+ .replace('{api_key}', llmApiKey);
598
+ llmConfig.url = resolvedUrl;
599
+ llmConfig.style = 'gemini';
600
+ llmConfig.system_messages = [
601
+ { parts: [{ text: 'You are a friendly AI voice assistant. Please respond concisely.' }], role: 'user' },
602
+ ];
603
+ llmConfig.params = { model: llmModel, max_tokens: 512, temperature: 0.7 };
604
+ }
605
+ else if (selectedLlm.style === 'anthropic') {
606
+ llmConfig.url = llmUrl;
607
+ llmConfig.api_key = llmApiKey;
608
+ llmConfig.style = 'anthropic';
609
+ llmConfig.headers = '{"anthropic-version":"2023-06-01"}';
610
+ llmConfig.system_messages = [
611
+ { role: 'system', content: 'You are a friendly AI voice assistant. Please respond concisely.' },
612
+ ];
613
+ llmConfig.params = { model: llmModel, max_tokens: 512, temperature: 0.7 };
614
+ }
615
+ else {
616
+ llmConfig.url = llmUrl;
617
+ llmConfig.api_key = llmApiKey;
618
+ llmConfig.system_messages = [
619
+ { role: 'system', content: 'You are a friendly AI voice assistant. Please respond concisely.' },
620
+ ];
621
+ llmConfig.params = { model: llmModel, max_tokens: 512, temperature: 0.7 };
622
+ }
623
+ profile.llm = llmConfig;
624
+ configObj.profiles[profileName] = profile;
625
+ saveConfig(configObj);
626
+ printSuccess(`${str.llmConfigured}: ${llmModel} via ${selectedLlm.name}`);
627
+ // ── TTS ─────────────────────────────────────────────────────────────
628
+ console.log('');
629
+ console.log(chalk.bold(` ${str.step4.emoji} ${str.step4.title}`));
630
+ console.log(chalk.dim(` ${str.step4.subtitle}`));
631
+ console.log('');
632
+ const ttsChoices = TTS_PROVIDERS.map((p) => {
633
+ let label = p.name;
634
+ if (p.beta)
635
+ label += chalk.dim(' (Beta)');
636
+ return { name: label, value: p.vendor };
637
+ });
638
+ const { vendor: ttsVendor } = await inquirer.prompt([
639
+ {
640
+ type: 'list',
641
+ name: 'vendor',
642
+ message: str.ttsProvider + ':',
643
+ choices: ttsChoices,
644
+ default: profile.tts?.vendor,
645
+ },
646
+ ]);
647
+ const selectedTts = TTS_PROVIDERS.find((p) => p.vendor === ttsVendor);
648
+ // API Key
649
+ const { key: ttsKey } = await inquirer.prompt([
650
+ {
651
+ type: 'password',
652
+ name: 'key',
653
+ message: str.ttsApiKey + ':',
654
+ mask: '*',
655
+ validate: (v) => v.trim().length > 0 || 'Required',
656
+ },
657
+ ]);
658
+ const ttsParams = { key: ttsKey };
659
+ // Microsoft-specific: region + voice name
660
+ if (ttsVendor === 'microsoft') {
661
+ const msAnswers = await inquirer.prompt([
662
+ {
663
+ type: 'input',
664
+ name: 'region',
665
+ message: 'Azure TTS Region:',
666
+ default: 'eastus',
667
+ validate: (v) => v.trim().length > 0 || 'Required',
668
+ },
669
+ {
670
+ type: 'input',
671
+ name: 'voiceName',
672
+ message: 'Voice Name:',
673
+ default: 'en-US-AndrewMultilingualNeural',
674
+ validate: (v) => v.trim().length > 0 || 'Required',
675
+ },
676
+ ]);
677
+ ttsParams.region = msAnswers.region;
678
+ ttsParams.voice_name = msAnswers.voiceName;
679
+ }
680
+ // MiniMax-specific: group_id
681
+ if (selectedTts.requiresGroupId) {
682
+ const { groupId } = await inquirer.prompt([
683
+ {
684
+ type: 'input',
685
+ name: 'groupId',
686
+ message: str.groupId + ':',
687
+ validate: (v) => v.trim().length > 0 || 'Required',
688
+ },
689
+ ]);
690
+ ttsParams.group_id = groupId;
691
+ }
692
+ // OpenAI TTS and Cartesia use api_key instead of key
693
+ if (ttsVendor === 'openai' || ttsVendor === 'cartesia') {
694
+ ttsParams.api_key = ttsParams.key;
695
+ delete ttsParams.key;
696
+ }
697
+ // Auto-fill vendor defaults
698
+ if (selectedTts.defaultParams) {
699
+ for (const [k, v] of Object.entries(selectedTts.defaultParams)) {
700
+ if (!(k in ttsParams))
701
+ ttsParams[k] = v;
702
+ }
703
+ }
704
+ profile.tts = { vendor: ttsVendor, params: ttsParams };
705
+ configObj.profiles[profileName] = profile;
706
+ saveConfig(configObj);
707
+ printSuccess(`${str.ttsConfigured}: ${selectedTts.name}`);
708
+ }
709
+ //# sourceMappingURL=go.js.map