uwonbot 1.1.6 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/uwonbot.js CHANGED
@@ -14,7 +14,7 @@ showBanner();
14
14
  program
15
15
  .name('uwonbot')
16
16
  .description('Uwonbot AI Assistant — Your AI controls your computer')
17
- .version('1.1.6');
17
+ .version('1.1.7');
18
18
 
19
19
  program
20
20
  .command('login')
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "uwonbot",
3
- "version": "1.1.6",
3
+ "version": "1.1.9",
4
4
  "description": "Uwonbot AI Assistant CLI — Your AI controls your computer",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/agent.js CHANGED
@@ -221,6 +221,118 @@ async function takeScreenshot() {
221
221
  }
222
222
  }
223
223
 
224
+ // ─── Window Management ───
225
+
226
+ async function windowFrontmost() {
227
+ try {
228
+ if (platform === 'darwin') {
229
+ const { stdout } = await execAsync(`osascript -e '
230
+ tell application "System Events"
231
+ set fp to first application process whose frontmost is true
232
+ set appName to name of fp
233
+ try
234
+ set w to front window of fp
235
+ set {px, py} to position of w
236
+ set {sw, sh} to size of w
237
+ return appName & "||" & px & "," & py & "||" & sw & "," & sh
238
+ end try
239
+ return appName & "||0,0||800,600"
240
+ end tell'`);
241
+ const parts = stdout.trim().split('||');
242
+ const [px, py] = (parts[1] || '0,0').split(',').map(Number);
243
+ const [sw, sh] = (parts[2] || '800,600').split(',').map(Number);
244
+ return { app: parts[0], x: px, y: py, width: sw, height: sh };
245
+ } else if (platform === 'win32') {
246
+ const { stdout } = await execAsync(`powershell -Command "[Console]::OutputEncoding = [Text.Encoding]::UTF8; Add-Type -TypeDefinition 'using System; using System.Runtime.InteropServices; public class Win { [DllImport(\\\"user32.dll\\\")] public static extern IntPtr GetForegroundWindow(); [DllImport(\\\"user32.dll\\\")] public static extern bool GetWindowRect(IntPtr h, out RECT r); [StructLayout(LayoutKind.Sequential)] public struct RECT { public int L,T,R,B; } }'; $h=[Win]::GetForegroundWindow(); $r=New-Object Win+RECT; [Win]::GetWindowRect($h,[ref]$r); Write-Output ($r.L.ToString()+','+$r.T.ToString()+'||'+($r.R-$r.L).ToString()+','+($r.B-$r.T).ToString())"`);
247
+ const parts = stdout.trim().split('||');
248
+ const [px, py] = (parts[0] || '0,0').split(',').map(Number);
249
+ const [sw, sh] = (parts[1] || '800,600').split(',').map(Number);
250
+ return { app: 'foreground', x: px, y: py, width: sw, height: sh };
251
+ }
252
+ return { app: 'unknown', x: 0, y: 0, width: 800, height: 600 };
253
+ } catch (e) { return { app: 'unknown', x: 0, y: 0, width: 800, height: 600 }; }
254
+ }
255
+
256
+ async function windowMove(app, x, y) {
257
+ try {
258
+ if (platform === 'darwin') {
259
+ const script = app
260
+ ? `tell application "System Events" to tell process "${app}" to set position of front window to {${Math.round(x)}, ${Math.round(y)}}`
261
+ : `tell application "System Events"
262
+ set fp to first application process whose frontmost is true
263
+ set position of front window of fp to {${Math.round(x)}, ${Math.round(y)}}
264
+ end tell`;
265
+ await execAsync(`osascript -e '${script}'`);
266
+ } else if (platform === 'win32') {
267
+ await execAsync(`powershell -Command "Add-Type -TypeDefinition 'using System; using System.Runtime.InteropServices; public class WM { [DllImport(\\\"user32.dll\\\")] public static extern IntPtr GetForegroundWindow(); [DllImport(\\\"user32.dll\\\")] public static extern bool MoveWindow(IntPtr h,int x,int y,int w,int h2,bool r); [DllImport(\\\"user32.dll\\\")] public static extern bool GetWindowRect(IntPtr h,out RECT r); [StructLayout(LayoutKind.Sequential)] public struct RECT{public int L,T,R,B;} }'; $h=[WM]::GetForegroundWindow(); $r=New-Object WM+RECT; [WM]::GetWindowRect($h,[ref]$r); [WM]::MoveWindow($h,${Math.round(x)},${Math.round(y)},$r.R-$r.L,$r.B-$r.T,$true)"`);
268
+ }
269
+ return true;
270
+ } catch { return false; }
271
+ }
272
+
273
+ async function windowResize(app, w, h) {
274
+ try {
275
+ if (platform === 'darwin') {
276
+ const script = app
277
+ ? `tell application "System Events" to tell process "${app}" to set size of front window to {${Math.round(w)}, ${Math.round(h)}}`
278
+ : `tell application "System Events"
279
+ set fp to first application process whose frontmost is true
280
+ set size of front window of fp to {${Math.round(w)}, ${Math.round(h)}}
281
+ end tell`;
282
+ await execAsync(`osascript -e '${script}'`);
283
+ } else if (platform === 'win32') {
284
+ await execAsync(`powershell -Command "Add-Type -TypeDefinition 'using System; using System.Runtime.InteropServices; public class WR { [DllImport(\\\"user32.dll\\\")] public static extern IntPtr GetForegroundWindow(); [DllImport(\\\"user32.dll\\\")] public static extern bool MoveWindow(IntPtr h,int x,int y,int w,int h2,bool r); [DllImport(\\\"user32.dll\\\")] public static extern bool GetWindowRect(IntPtr h,out RECT r); [StructLayout(LayoutKind.Sequential)] public struct RECT{public int L,T,R,B;} }'; $h=[WR]::GetForegroundWindow(); $r=New-Object WR+RECT; [WR]::GetWindowRect($h,[ref]$r); [WR]::MoveWindow($h,$r.L,$r.T,${Math.round(w)},${Math.round(h)},$true)"`);
285
+ }
286
+ return true;
287
+ } catch { return false; }
288
+ }
289
+
290
+ async function windowClose(app) {
291
+ try {
292
+ if (platform === 'darwin') {
293
+ const script = app
294
+ ? `tell application "${app}" to close front window`
295
+ : `tell application "System Events"
296
+ set fp to first application process whose frontmost is true
297
+ tell fp to click (first button of front window whose subrole is "AXCloseButton")
298
+ end tell`;
299
+ await execAsync(`osascript -e '${script}'`);
300
+ } else if (platform === 'win32') {
301
+ await execAsync(`powershell -Command "Add-Type -TypeDefinition 'using System; using System.Runtime.InteropServices; public class WC { [DllImport(\\\"user32.dll\\\")] public static extern IntPtr GetForegroundWindow(); [DllImport(\\\"user32.dll\\\")] public static extern bool PostMessage(IntPtr h,uint m,IntPtr w,IntPtr l); }'; [WC]::PostMessage([WC]::GetForegroundWindow(),0x0010,[IntPtr]::Zero,[IntPtr]::Zero)"`);
302
+ }
303
+ return true;
304
+ } catch { return false; }
305
+ }
306
+
307
+ async function windowList() {
308
+ try {
309
+ if (platform === 'darwin') {
310
+ const { stdout } = await execAsync(`osascript -e '
311
+ set output to ""
312
+ tell application "System Events"
313
+ repeat with p in (every application process whose visible is true)
314
+ set pName to name of p
315
+ try
316
+ repeat with w in (every window of p)
317
+ set {px, py} to position of w
318
+ set {sw, sh} to size of w
319
+ set output to output & pName & "||" & px & "," & py & "||" & sw & "," & sh & "\\n"
320
+ end repeat
321
+ end try
322
+ end repeat
323
+ end tell
324
+ return output'`);
325
+ return stdout.trim().split('\n').filter(Boolean).map(line => {
326
+ const [app, pos, sz] = line.split('||');
327
+ const [x, y] = (pos || '0,0').split(',').map(Number);
328
+ const [w, h] = (sz || '800,600').split(',').map(Number);
329
+ return { app, x, y, width: w, height: h };
330
+ });
331
+ }
332
+ return [];
333
+ } catch { return []; }
334
+ }
335
+
224
336
  async function openApp(appName) {
225
337
  const name = appName.toLowerCase();
226
338
  try {
@@ -312,6 +424,26 @@ async function handleCommand(msg) {
312
424
  case 'open_terminal':
313
425
  const termOk = await openTerminalWithChat(cmd.assistantName || cmd.name);
314
426
  return { ok: termOk };
427
+ case 'window_frontmost': {
428
+ const wf = await windowFrontmost();
429
+ return { ok: true, ...wf };
430
+ }
431
+ case 'window_move': {
432
+ const wm = await windowMove(cmd.app, cmd.x, cmd.y);
433
+ return { ok: wm };
434
+ }
435
+ case 'window_resize': {
436
+ const wr = await windowResize(cmd.app, cmd.w, cmd.h);
437
+ return { ok: wr };
438
+ }
439
+ case 'window_close': {
440
+ const wc = await windowClose(cmd.app);
441
+ return { ok: wc };
442
+ }
443
+ case 'window_list': {
444
+ const wl = await windowList();
445
+ return { ok: true, windows: wl };
446
+ }
315
447
  case 'ping':
316
448
  return { ok: true, pong: true };
317
449
  default:
@@ -323,36 +455,26 @@ async function handleCommand(msg) {
323
455
  }
324
456
 
325
457
  async function openWebAssistant(assistantId) {
326
- const url = `${WEB_APP_URL}/assistant/live?id=${assistantId}`;
458
+ const chatUrl = `${WEB_APP_URL}/assistant/live?id=${assistantId}`;
327
459
  try {
328
460
  if (platform === 'darwin') {
329
- await execAsync(`open -na "Google Chrome" --args --new-window "${url}" 2>/dev/null || open -na "Safari" --args "${url}" 2>/dev/null || open "${url}"`);
461
+ await execAsync(`open -na "Google Chrome" --args --new-window --window-size=380,520 --window-position=1000,200 "${chatUrl}" 2>/dev/null || open "${chatUrl}"`);
330
462
  } else if (platform === 'win32') {
331
- await execAsync(`start chrome --new-window "${url}" 2>nul || start "" "${url}"`);
463
+ await execAsync(`start chrome --new-window --window-size=380,520 --window-position=1000,200 "${chatUrl}" 2>nul || start "" "${chatUrl}"`);
332
464
  } else {
333
- await execAsync(`google-chrome --new-window "${url}" 2>/dev/null || xdg-open "${url}"`);
465
+ await execAsync(`google-chrome --new-window "${chatUrl}" 2>/dev/null || xdg-open "${chatUrl}"`);
334
466
  }
335
467
  return true;
336
468
  } catch {
337
- try { await execAsync(`open "${url}"`); return true; } catch { return false; }
469
+ try { await execAsync(`open "${chatUrl}"`); return true; } catch { return false; }
338
470
  }
339
471
  }
340
472
 
341
473
  async function activateAllAssistants(assistants) {
342
474
  const opened = [];
343
475
  for (const a of assistants) {
344
- const mode = a.activationMode || 'web';
345
- if (mode === 'terminal') {
346
- await openTerminalWithChat(a.name);
347
- opened.push({ name: a.name, mode: 'terminal' });
348
- } else if (mode === 'web') {
349
- await openWebAssistant(a.id);
350
- opened.push({ name: a.name, mode: 'web' });
351
- } else {
352
- await openTerminalWithChat(a.name);
353
- await openWebAssistant(a.id);
354
- opened.push({ name: a.name, mode: 'both' });
355
- }
476
+ await openWebAssistant(a.id);
477
+ opened.push({ name: a.name, mode: 'web' });
356
478
  await new Promise(r => setTimeout(r, 500));
357
479
  }
358
480
  return opened;
@@ -449,17 +571,23 @@ export async function startAgent(port = 9876, options = {}) {
449
571
  console.log(chalk.bold.cyan(' 👏 박수 감지! 비서 활성화 중...'));
450
572
 
451
573
  if (userAssistants.length === 0) {
452
- console.log(chalk.gray(' → 기본 Uwonbot 실행'));
453
- openTerminalWithChat('Uwonbot');
574
+ console.log(chalk.gray(' → 기본 Uwonbot 실행'));
575
+ try {
576
+ if (platform === 'darwin') {
577
+ await execAsync(`open "${WEB_APP_URL}/assistant"`);
578
+ } else if (platform === 'win32') {
579
+ await execAsync(`start "" "${WEB_APP_URL}/assistant"`);
580
+ } else {
581
+ await execAsync(`xdg-open "${WEB_APP_URL}/assistant"`);
582
+ }
583
+ } catch {}
454
584
  return;
455
585
  }
456
586
 
457
587
  if (userAssistants.length === 1) {
458
588
  const a = userAssistants[0];
459
- const mode = a.activationMode || 'web';
460
- console.log(chalk.green(` → ${a.name} 실행 (${mode})`));
461
- if (mode === 'terminal' || mode === 'both') await openTerminalWithChat(a.name);
462
- if (mode === 'web' || mode === 'both') await openWebAssistant(a.id);
589
+ console.log(chalk.green(` → ${a.name} 실행`));
590
+ await openWebAssistant(a.id);
463
591
  return;
464
592
  }
465
593
 
@@ -488,7 +616,11 @@ export async function startAgent(port = 9876, options = {}) {
488
616
  console.log(chalk.green(` ✓ Client connected from ${origin}`));
489
617
 
490
618
  ws.on('message', async (data) => {
491
- const result = await handleCommand(data.toString());
619
+ const raw = data.toString();
620
+ let _reqId;
621
+ try { _reqId = JSON.parse(raw)._reqId; } catch {}
622
+ const result = await handleCommand(raw);
623
+ if (_reqId) result._reqId = _reqId;
492
624
  ws.send(JSON.stringify(result));
493
625
  });
494
626
 
@@ -508,10 +640,14 @@ export async function startAgent(port = 9876, options = {}) {
508
640
  const retry = new WebSocketServer({ port });
509
641
  retry.on('connection', (ws, req) => {
510
642
  ws.on('message', async (data) => {
511
- const result = await handleCommand(data.toString());
643
+ const raw = data.toString();
644
+ let _reqId;
645
+ try { _reqId = JSON.parse(raw)._reqId; } catch {}
646
+ const result = await handleCommand(raw);
647
+ if (_reqId) result._reqId = _reqId;
512
648
  ws.send(JSON.stringify(result));
513
649
  });
514
- ws.send(JSON.stringify({ type: 'welcome', agent: 'uwonbot', version: '1.1.4', uid }));
650
+ ws.send(JSON.stringify({ type: 'welcome', agent: 'uwonbot', version: '1.1.8', uid }));
515
651
  });
516
652
  console.log(chalk.green(` ✓ 재시도 성공 — ws://localhost:${port}`));
517
653
  } catch {
package/src/chat.js CHANGED
@@ -7,7 +7,8 @@ import open from 'open';
7
7
  import { getConfig } from './config.js';
8
8
  import { sendToBrain } from './brain.js';
9
9
  import { showMiniBar } from './banner.js';
10
- import { printOrb, animateOrb } from './terminalOrb.js';
10
+ // terminalOrb is used by agent.js for display; chat.js shows text-only
11
+ import { speak } from './terminalTTS.js';
11
12
  import VoiceInput from './voiceInput.js';
12
13
  import {
13
14
  fetchAssistants,
@@ -177,19 +178,19 @@ export async function startChat(assistantName, assistant, initialCommand, option
177
178
 
178
179
  const colorsArr = assistant.colors;
179
180
  const orbColorHex = (Array.isArray(colorsArr) && colorsArr[0]) || assistant.orbColor || brainColor;
180
- const orbRgb = hexToRgb(orbColorHex);
181
181
 
182
182
  console.clear();
183
- printOrb({
184
- radius: 10,
185
- color: orbRgb,
186
- label: assistant.name.toUpperCase(),
187
- status: '준비됨',
188
- });
183
+
184
+ const c = chalk.hex(brainColor);
185
+ console.log('');
186
+ console.log(c(' ●') + chalk.bold.white(` ${assistant.name}`));
187
+ console.log(chalk.gray(` ${brainLabel} | ${assistant.voiceLang || 'ko-KR'}`));
188
+ console.log('');
189
189
 
190
190
  await bootSequence(assistant, brainLabel, brainColor);
191
191
 
192
- const voiceMode = options.voice || false;
192
+ const hasApiKey = !!(assistant.apiKey || process.env.GEMINI_API_KEY);
193
+ const voiceMode = options.voice !== undefined ? options.voice : hasApiKey;
193
194
 
194
195
  console.log('');
195
196
  console.log(chalk.gray(' ─────────────────────────────────────────'));
@@ -221,22 +222,27 @@ export async function startChat(assistantName, assistant, initialCommand, option
221
222
  const ok = await voiceInput.start({
222
223
  onListening: () => {
223
224
  if (!processingVoice) {
224
- process.stdout.write(chalk.gray(' 🎙 듣고 있습니다...') + '\r');
225
+ process.stdout.write('\x1b[2K\r' + chalk.gray(' 🎙 듣고 있습니다... (말하세요)'));
225
226
  }
226
227
  },
227
228
  onSpeechStart: () => {
228
229
  process.stdout.write('\x1b[2K\r');
229
- process.stdout.write(chalk.cyan(' 🔴 음성 감지 중...') + '\r');
230
230
  },
231
231
  onSpeechEnd: () => {
232
- process.stdout.write('\x1b[2K\r');
233
- process.stdout.write(chalk.gray(' ⏳ 음성 인식 중...') + '\r');
232
+ process.stdout.write('\x1b[2K\r' + chalk.yellow(' ⏳ 음성 인식 중...'));
233
+ },
234
+ onAmplitude: (amp, isSpeaking) => {
235
+ if (processingVoice) return;
236
+ if (isSpeaking) {
237
+ const bar = renderVoiceBar(amp);
238
+ process.stdout.write('\x1b[2K\r' + chalk.cyan(' 🔴 ') + chalk.hex(brainColor)(bar) + chalk.gray(' 말하는 중...'));
239
+ }
234
240
  },
235
241
  onTranscript: async (text) => {
236
242
  if (processingVoice) return;
237
243
  processingVoice = true;
238
244
  process.stdout.write('\x1b[2K\r');
239
- console.log(chalk.hex(brainColor)(` You (voice) > `) + chalk.white(text));
245
+ console.log(chalk.hex(brainColor)(` 🗣 You > `) + chalk.white(text));
240
246
  rl.pause();
241
247
  await processMessage(text, messages, assistant, brainColor);
242
248
  processingVoice = false;
@@ -285,13 +291,18 @@ export async function startChat(assistantName, assistant, initialCommand, option
285
291
  voiceInput = new VoiceInput(apiKey);
286
292
  await voiceInput.start({
287
293
  onListening: () => {},
288
- onSpeechStart: () => process.stdout.write(chalk.cyan('\r 🔴 음성 감지 중...') + '\r'),
289
- onSpeechEnd: () => process.stdout.write(chalk.gray('\r ⏳ 인식 중...') + '\r'),
294
+ onSpeechStart: () => {},
295
+ onSpeechEnd: () => process.stdout.write('\x1b[2K\r' + chalk.yellow(' ⏳ 인식 중...')),
296
+ onAmplitude: (amp, isSpeaking) => {
297
+ if (processingVoice || !isSpeaking) return;
298
+ const bar = renderVoiceBar(amp);
299
+ process.stdout.write('\x1b[2K\r' + chalk.cyan(' 🔴 ') + chalk.hex(brainColor)(bar));
300
+ },
290
301
  onTranscript: async (text) => {
291
302
  if (processingVoice) return;
292
303
  processingVoice = true;
293
304
  process.stdout.write('\x1b[2K\r');
294
- console.log(chalk.hex(brainColor)(` You (voice) > `) + chalk.white(text));
305
+ console.log(chalk.hex(brainColor)(` 🗣 You > `) + chalk.white(text));
295
306
  rl.pause();
296
307
  await processMessage(text, messages, assistant, brainColor);
297
308
  processingVoice = false;
@@ -330,6 +341,13 @@ export async function startChat(assistantName, assistant, initialCommand, option
330
341
  });
331
342
  }
332
343
 
344
+ function renderVoiceBar(amplitude) {
345
+ const barLen = Math.round(amplitude * 40);
346
+ const bar = '█'.repeat(Math.min(barLen, 40));
347
+ const empty = '░'.repeat(40 - Math.min(barLen, 40));
348
+ return bar + empty;
349
+ }
350
+
333
351
  async function processMessage(input, messages, assistant, brainColor) {
334
352
  const spinner = ora({
335
353
  text: chalk.gray('Thinking...'),
@@ -337,6 +355,9 @@ async function processMessage(input, messages, assistant, brainColor) {
337
355
  color: 'blue',
338
356
  }).start();
339
357
 
358
+ const vLang = assistant.voiceLang || 'ko-KR';
359
+ const vGender = assistant.voiceGender || assistant.voiceStyle || 'male';
360
+
340
361
  try {
341
362
  const reply = await sendToBrain(assistant, messages, input);
342
363
  spinner.stop();
@@ -348,12 +369,20 @@ async function processMessage(input, messages, assistant, brainColor) {
348
369
  console.log(chalk.white(` ${line}`));
349
370
  }
350
371
  console.log('');
372
+
373
+ speak(reply, { lang: vLang, gender: vGender }).catch(() => {});
351
374
  } catch (err) {
352
375
  spinner.stop();
353
- console.log(chalk.red(`\n ❌ Error: ${err.message}\n`));
354
- if (err.message.includes('API key') || err.message.includes('api key')) {
355
- console.log(chalk.yellow(' Make sure your assistant has a valid API key configured.'));
356
- console.log(chalk.gray(' Update it at: https://chartapp-653e1.web.app/assistant/create\n'));
376
+ const rawMsg = err.message || '';
377
+ let friendlyMsg = rawMsg;
378
+ if (rawMsg.includes('API_KEY_INVALID') || rawMsg.includes('API key not valid')) {
379
+ friendlyMsg = 'API 키가 유효하지 않습니다. 비서 설정에서 올바른 API 키를 확인해주세요.';
380
+ } else if (rawMsg.includes('No API key')) {
381
+ friendlyMsg = 'API 키가 설정되지 않았습니다. 웹에서 비서의 API 키를 설정해주세요.';
382
+ }
383
+ console.log(chalk.red(`\n ❌ ${friendlyMsg}\n`));
384
+ if (rawMsg.includes('API key') || rawMsg.includes('api key') || rawMsg.includes('No API key')) {
385
+ console.log(chalk.gray(' API 키 설정: https://chartapp-653e1.web.app/assistant/create\n'));
357
386
  }
358
387
  }
359
388
  }
@@ -0,0 +1,99 @@
1
+ import { exec } from 'child_process';
2
+ import { promisify } from 'util';
3
+ import { writeFileSync, unlinkSync, existsSync } from 'fs';
4
+ import { join } from 'path';
5
+ import { tmpdir } from 'os';
6
+ import fetch from 'node-fetch';
7
+
8
+ const execAsync = promisify(exec);
9
+ const platform = process.platform;
10
+
11
+ const GOOGLE_TTS_FUNCTION = 'https://us-central1-chartapp-653e1.cloudfunctions.net/googleTTS';
12
+
13
+ async function speakWithGoogleTTS(text, lang = 'ko-KR', gender = 'male') {
14
+ try {
15
+ const res = await fetch(GOOGLE_TTS_FUNCTION, {
16
+ method: 'POST',
17
+ headers: { 'Content-Type': 'application/json' },
18
+ body: JSON.stringify({ text, lang, gender }),
19
+ });
20
+ if (!res.ok) throw new Error(`TTS API ${res.status}`);
21
+ const data = await res.json();
22
+ if (!data.audio) throw new Error('No audio');
23
+
24
+ const tmpFile = join(tmpdir(), `uwonbot_tts_${Date.now()}.mp3`);
25
+ writeFileSync(tmpFile, Buffer.from(data.audio, 'base64'));
26
+
27
+ if (platform === 'darwin') {
28
+ await execAsync(`afplay "${tmpFile}"`);
29
+ } else if (platform === 'win32') {
30
+ await execAsync(`powershell -c "(New-Object Media.SoundPlayer '${tmpFile}').PlaySync()"`);
31
+ } else {
32
+ try {
33
+ await execAsync(`mpg123 "${tmpFile}" 2>/dev/null || ffplay -nodisp -autoexit "${tmpFile}" 2>/dev/null || aplay "${tmpFile}" 2>/dev/null`);
34
+ } catch {}
35
+ }
36
+
37
+ try { unlinkSync(tmpFile); } catch {}
38
+ return true;
39
+ } catch {
40
+ return false;
41
+ }
42
+ }
43
+
44
+ async function speakWithSay(text, lang = 'ko-KR', gender = 'male') {
45
+ if (platform !== 'darwin') return false;
46
+
47
+ let voice;
48
+ if (lang.startsWith('ko')) {
49
+ voice = gender === 'female' ? 'Yuna' : 'Jian';
50
+ } else {
51
+ voice = gender === 'female' ? 'Samantha' : 'Daniel';
52
+ }
53
+
54
+ try {
55
+ await execAsync(`say -v "${voice}" "${text.replace(/"/g, '\\"')}"`);
56
+ return true;
57
+ } catch {
58
+ try {
59
+ await execAsync(`say "${text.replace(/"/g, '\\"')}"`);
60
+ return true;
61
+ } catch { return false; }
62
+ }
63
+ }
64
+
65
+ async function speakWithEspeak(text, lang = 'ko-KR') {
66
+ if (platform === 'darwin') return false;
67
+ const espeakLang = lang.startsWith('ko') ? 'ko' : 'en';
68
+ try {
69
+ await execAsync(`espeak -v ${espeakLang} "${text.replace(/"/g, '\\"')}" 2>/dev/null`);
70
+ return true;
71
+ } catch { return false; }
72
+ }
73
+
74
+ export async function speak(text, options = {}) {
75
+ const lang = options.lang || 'ko-KR';
76
+ const gender = options.gender || 'male';
77
+
78
+ if (!text || !text.trim()) return;
79
+
80
+ const ok = await speakWithGoogleTTS(text, lang, gender);
81
+ if (ok) return;
82
+
83
+ if (platform === 'darwin') {
84
+ await speakWithSay(text, lang, gender);
85
+ return;
86
+ }
87
+
88
+ await speakWithEspeak(text, lang);
89
+ }
90
+
91
+ export async function speakStreaming(text, options = {}) {
92
+ const sentences = text.match(/[^.!?。!?\n]+[.!?。!?\n]?/g) || [text];
93
+ for (const s of sentences) {
94
+ const t = s.trim();
95
+ if (t) await speak(t, options);
96
+ }
97
+ }
98
+
99
+ export default { speak, speakStreaming };
package/src/voiceInput.js CHANGED
@@ -78,11 +78,12 @@ export default class VoiceInput {
78
78
  this.onSpeechEnd = null;
79
79
  }
80
80
 
81
- async start({ onTranscript, onListening, onSpeechStart, onSpeechEnd }) {
81
+ async start({ onTranscript, onListening, onSpeechStart, onSpeechEnd, onAmplitude }) {
82
82
  this.onTranscript = onTranscript;
83
83
  this.onListening = onListening;
84
84
  this.onSpeechStart = onSpeechStart;
85
85
  this.onSpeechEnd = onSpeechEnd;
86
+ this.onAmplitude = onAmplitude;
86
87
 
87
88
  let micModule;
88
89
  try {
@@ -120,10 +121,14 @@ export default class VoiceInput {
120
121
 
121
122
  this.onListening?.();
122
123
 
124
+ let speechDuration = 0;
125
+
123
126
  stream.on('data', (buf) => {
124
127
  if (!this.running) return;
125
128
  const amp = getAmplitude(buf);
126
129
 
130
+ this.onAmplitude?.(amp, isSpeaking);
131
+
127
132
  if (amp > SILENCE_THRESHOLD) {
128
133
  if (!isSpeaking) {
129
134
  isSpeaking = true;
@@ -133,6 +138,7 @@ export default class VoiceInput {
133
138
  }
134
139
  silenceStart = null;
135
140
  speechChunks.push(Buffer.from(buf));
141
+ speechDuration = Date.now() - speechStart;
136
142
  } else if (isSpeaking) {
137
143
  speechChunks.push(Buffer.from(buf));
138
144
  if (!silenceStart) silenceStart = Date.now();
@@ -141,6 +147,7 @@ export default class VoiceInput {
141
147
  const duration = Date.now() - speechStart;
142
148
  isSpeaking = false;
143
149
  silenceStart = null;
150
+ speechDuration = 0;
144
151
  this.onSpeechEnd?.();
145
152
 
146
153
  if (duration >= MIN_SPEECH_MS && speechChunks.length > 0) {