uwonbot 1.0.9 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/uwonbot.js CHANGED
@@ -13,7 +13,7 @@ showBanner();
13
13
  program
14
14
  .name('uwonbot')
15
15
  .description('Uwonbot AI Assistant — Your AI controls your computer')
16
- .version('1.0.9');
16
+ .version('1.1.1');
17
17
 
18
18
  program
19
19
  .command('login')
@@ -44,19 +44,21 @@ program
44
44
  .command('chat [assistantName]')
45
45
  .description('Start chatting with an AI assistant')
46
46
  .option('-n, --name <name>', 'Assistant name to launch directly')
47
+ .option('-v, --voice', 'Enable hands-free voice input mode')
47
48
  .action(async (assistantName, opts) => {
48
49
  const config = getConfig();
49
50
  if (!config.get('uid')) {
50
51
  console.log('\n ⚠️ Please log in first: uwonbot login\n');
51
52
  process.exit(1);
52
53
  }
54
+ const chatOpts = { voice: opts.voice || false };
53
55
  const targetName = opts.name || assistantName;
54
56
  if (targetName) {
55
- await startChat(targetName);
57
+ await startChat(targetName, null, null, chatOpts);
56
58
  } else {
57
59
  const assistant = await selectAssistant();
58
60
  if (assistant) {
59
- await startChat(null, assistant);
61
+ await startChat(null, assistant, null, chatOpts);
60
62
  } else {
61
63
  const defaultBot = {
62
64
  name: 'Uwonbot',
@@ -66,7 +68,7 @@ program
66
68
  voiceStyle: 'male',
67
69
  isDefaultBot: true,
68
70
  };
69
- await startChat(null, defaultBot);
71
+ await startChat(null, defaultBot, null, chatOpts);
70
72
  }
71
73
  }
72
74
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "uwonbot",
3
- "version": "1.0.9",
3
+ "version": "1.1.1",
4
4
  "description": "Uwonbot AI Assistant CLI — Your AI controls your computer",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/chat.js CHANGED
@@ -7,6 +7,8 @@ import open from 'open';
7
7
  import { getConfig } from './config.js';
8
8
  import { sendToBrain } from './brain.js';
9
9
  import { showMiniBar } from './banner.js';
10
+ import { printOrb, animateOrb } from './terminalOrb.js';
11
+ import VoiceInput from './voiceInput.js';
10
12
  import {
11
13
  hasRegisteredDevices,
12
14
  createCLISession,
@@ -16,6 +18,40 @@ import {
16
18
 
17
19
  const WEB_APP_URL = 'https://chartapp-653e1.web.app';
18
20
 
21
+ function hexToRgb(hex) {
22
+ const h = hex.replace('#', '');
23
+ return [
24
+ parseInt(h.substring(0, 2), 16) || 37,
25
+ parseInt(h.substring(2, 4), 16) || 99,
26
+ parseInt(h.substring(4, 6), 16) || 235,
27
+ ];
28
+ }
29
+
30
+ async function sleep(ms) {
31
+ return new Promise(r => setTimeout(r, ms));
32
+ }
33
+
34
+ async function bootSequence(assistant, brainLabel, brainColor) {
35
+ const c = chalk.hex(brainColor);
36
+ const g = chalk.gray;
37
+ const w = chalk.white;
38
+
39
+ const steps = [
40
+ [300, `${g(' [SYS]')} ${w('INITIATING SYSTEM I...')}`],
41
+ [300, `${g(' [SYS]')} ${c(`NETWARE ${assistant.name.toUpperCase()} v1.0`)}`],
42
+ [400, `${g(' [SYS]')} ${w('PROTOCOL: SCANNING...')}`],
43
+ [300, `${g(' [SYS]')} ${w('RELEASING CONFIGURATION')}`],
44
+ [200, `${g(' [SYS]')} ${w(`Brain: ${brainLabel} | API: connected`)}`],
45
+ [200, `${g(' [SYS]')} ${w('Authentication verified.')}`],
46
+ [500, `${g(' [SYS]')} ${chalk.green('All systems operational. Standing by.')}`],
47
+ ];
48
+
49
+ for (const [delay, text] of steps) {
50
+ await sleep(delay);
51
+ console.log(text);
52
+ }
53
+ }
54
+
19
55
  async function requireBiometricAuth(uid) {
20
56
  try {
21
57
  const hasDevices = await hasRegisteredDevices(uid);
@@ -75,7 +111,7 @@ async function requireBiometricAuth(uid) {
75
111
  return false;
76
112
  }
77
113
 
78
- export async function startChat(assistantName, assistant, initialCommand) {
114
+ export async function startChat(assistantName, assistant, initialCommand, options = {}) {
79
115
  const config = getConfig();
80
116
  const uid = config.get('uid');
81
117
  if (!uid) {
@@ -107,26 +143,85 @@ export async function startChat(assistantName, assistant, initialCommand) {
107
143
  : assistant.brain === 'gemini' ? '#8b5cf6'
108
144
  : '#2563eb';
109
145
 
146
+ const orbColorHex = assistant.orbColor || brainColor;
147
+ const orbRgb = hexToRgb(orbColorHex);
148
+
149
+ console.clear();
150
+ printOrb({
151
+ radius: 10,
152
+ color: orbRgb,
153
+ label: assistant.name.toUpperCase(),
154
+ status: '준비됨',
155
+ });
156
+
157
+ await bootSequence(assistant, brainLabel, brainColor);
158
+
159
+ const voiceMode = options.voice || false;
160
+
110
161
  console.log('');
111
- console.log(chalk.hex('#2563eb')(' ╔══════════════════════════════════════════╗'));
112
- console.log(chalk.hex('#2563eb')(' ║') + chalk.white.bold(` ${assistant.avatar || '🤖'} ${assistant.name}`) + ' '.repeat(Math.max(1, 38 - assistant.name.length)) + chalk.hex('#2563eb')('║'));
113
- console.log(chalk.hex('#2563eb')(' ║') + chalk.gray(` Brain: `) + chalk.hex(brainColor)(brainLabel) + ' '.repeat(Math.max(1, 30 - brainLabel.length)) + chalk.hex('#2563eb')('║'));
114
- console.log(chalk.hex('#2563eb')(' ║') + chalk.gray(' Type "exit" to quit, "clear" to reset ') + chalk.hex('#2563eb')('║'));
115
- console.log(chalk.hex('#2563eb')(' ╚══════════════════════════════════════════╝'));
162
+ console.log(chalk.gray(' ─────────────────────────────────────────'));
163
+ if (voiceMode) {
164
+ console.log(chalk.cyan(' 🎙 음성 모드 활성화 말하면 자동 인식됩니다'));
165
+ console.log(chalk.gray(' 텍스트 입력도 가능합니다'));
166
+ }
167
+ console.log(chalk.gray(' "exit" 종료 | "clear" 대화 초기화'));
168
+ console.log(chalk.gray(' ─────────────────────────────────────────'));
116
169
  console.log('');
117
170
 
118
171
  const messages = [];
119
-
120
- if (initialCommand) {
121
- await processMessage(initialCommand, messages, assistant, brainColor);
122
- }
172
+ let voiceInput = null;
173
+ let processingVoice = false;
123
174
 
124
175
  const rl = readline.createInterface({
125
176
  input: process.stdin,
126
177
  output: process.stdout,
127
- prompt: chalk.hex('#2563eb')(' You > '),
178
+ prompt: chalk.hex(brainColor)(' You > '),
128
179
  });
129
180
 
181
+ if (voiceMode) {
182
+ const apiKey = assistant.apiKey || process.env.GEMINI_API_KEY || '';
183
+ if (!apiKey) {
184
+ console.log(chalk.yellow(' ⚠ 음성 모드에는 API 키가 필요합니다 (음성→텍스트 변환에 Gemini 사용)'));
185
+ console.log(chalk.gray(' 텍스트 입력으로 진행합니다.\n'));
186
+ } else {
187
+ voiceInput = new VoiceInput(apiKey);
188
+ const ok = await voiceInput.start({
189
+ onListening: () => {
190
+ if (!processingVoice) {
191
+ process.stdout.write(chalk.gray(' 🎙 듣고 있습니다...') + '\r');
192
+ }
193
+ },
194
+ onSpeechStart: () => {
195
+ process.stdout.write('\x1b[2K\r');
196
+ process.stdout.write(chalk.cyan(' 🔴 음성 감지 중...') + '\r');
197
+ },
198
+ onSpeechEnd: () => {
199
+ process.stdout.write('\x1b[2K\r');
200
+ process.stdout.write(chalk.gray(' ⏳ 음성 인식 중...') + '\r');
201
+ },
202
+ onTranscript: async (text) => {
203
+ if (processingVoice) return;
204
+ processingVoice = true;
205
+ process.stdout.write('\x1b[2K\r');
206
+ console.log(chalk.hex(brainColor)(` You (voice) > `) + chalk.white(text));
207
+ rl.pause();
208
+ await processMessage(text, messages, assistant, brainColor);
209
+ processingVoice = false;
210
+ rl.resume();
211
+ rl.prompt();
212
+ },
213
+ });
214
+ if (!ok) {
215
+ voiceInput = null;
216
+ console.log(chalk.gray(' 텍스트 입력으로 진행합니다.\n'));
217
+ }
218
+ }
219
+ }
220
+
221
+ if (initialCommand) {
222
+ await processMessage(initialCommand, messages, assistant, brainColor);
223
+ }
224
+
130
225
  rl.prompt();
131
226
 
132
227
  rl.on('line', async (line) => {
@@ -137,7 +232,8 @@ export async function startChat(assistantName, assistant, initialCommand) {
137
232
  }
138
233
 
139
234
  if (input.toLowerCase() === 'exit' || input.toLowerCase() === 'quit') {
140
- console.log(chalk.gray('\n Goodbye! 👋\n'));
235
+ if (voiceInput) voiceInput.stop();
236
+ console.log(chalk.gray('\n Goodbye!\n'));
141
237
  rl.close();
142
238
  process.exit(0);
143
239
  }
@@ -149,17 +245,42 @@ export async function startChat(assistantName, assistant, initialCommand) {
149
245
  return;
150
246
  }
151
247
 
152
- if (input.toLowerCase() === 'tools') {
153
- console.log('');
154
- console.log(chalk.white.bold(' Available Tools:'));
155
- console.log(chalk.gray(' ────────────────'));
156
- console.log(' 📂 read_file, write_file, list_directory, create_directory');
157
- console.log(' 🗑️ delete_file, move_file, search_files');
158
- console.log(' ⚙️ run_shell, install_package');
159
- console.log(' 🌐 open_url, open_application');
160
- console.log(' 📋 get_clipboard, set_clipboard');
161
- console.log(' 💻 system_info');
162
- console.log('');
248
+ if (input.toLowerCase() === 'voice on') {
249
+ if (!voiceInput) {
250
+ const apiKey = assistant.apiKey || process.env.GEMINI_API_KEY || '';
251
+ if (apiKey) {
252
+ voiceInput = new VoiceInput(apiKey);
253
+ await voiceInput.start({
254
+ onListening: () => {},
255
+ onSpeechStart: () => process.stdout.write(chalk.cyan('\r 🔴 음성 감지 중...') + '\r'),
256
+ onSpeechEnd: () => process.stdout.write(chalk.gray('\r 인식 중...') + '\r'),
257
+ onTranscript: async (text) => {
258
+ if (processingVoice) return;
259
+ processingVoice = true;
260
+ process.stdout.write('\x1b[2K\r');
261
+ console.log(chalk.hex(brainColor)(` You (voice) > `) + chalk.white(text));
262
+ rl.pause();
263
+ await processMessage(text, messages, assistant, brainColor);
264
+ processingVoice = false;
265
+ rl.resume();
266
+ rl.prompt();
267
+ },
268
+ });
269
+ console.log(chalk.green(' 🎙 음성 모드 활성화'));
270
+ } else {
271
+ console.log(chalk.yellow(' ⚠ 음성 모드에는 API 키가 필요합니다.'));
272
+ }
273
+ }
274
+ rl.prompt();
275
+ return;
276
+ }
277
+
278
+ if (input.toLowerCase() === 'voice off') {
279
+ if (voiceInput) {
280
+ voiceInput.stop();
281
+ voiceInput = null;
282
+ console.log(chalk.gray(' 🔇 음성 모드 비활성화'));
283
+ }
163
284
  rl.prompt();
164
285
  return;
165
286
  }
@@ -171,6 +292,7 @@ export async function startChat(assistantName, assistant, initialCommand) {
171
292
  });
172
293
 
173
294
  rl.on('close', () => {
295
+ if (voiceInput) voiceInput.stop();
174
296
  process.exit(0);
175
297
  });
176
298
  }
@@ -0,0 +1,232 @@
1
+ import chalk from 'chalk';
2
+
3
+ const SHADING_CHARS = ' .:-=+*#%@';
4
+
5
+ /**
6
+ * Renders a 3D sphere in the terminal using ray-traced shading with true color.
7
+ * @param {object} opts
8
+ * @param {number} [opts.radius=10] - Sphere radius in character cells
9
+ * @param {number[]} [opts.color=[37,99,235]] - RGB base color
10
+ * @param {string} [opts.label] - Text label below the orb
11
+ * @param {string} [opts.status] - Status text below label
12
+ * @param {boolean} [opts.glow=true] - Enable glow effect
13
+ * @returns {string} The rendered orb as a string
14
+ */
15
+ export function renderOrb(opts = {}) {
16
+ const radius = opts.radius || 10;
17
+ const [baseR, baseG, baseB] = opts.color || [37, 99, 235];
18
+ const label = opts.label || '';
19
+ const status = opts.status || '';
20
+ const glow = opts.glow !== false;
21
+
22
+ const width = radius * 2 + 4;
23
+ const height = radius + 2;
24
+ const lines = [];
25
+
26
+ const lightX = -0.4;
27
+ const lightY = -0.5;
28
+ const lightZ = 0.8;
29
+ const lightLen = Math.sqrt(lightX * lightX + lightY * lightY + lightZ * lightZ);
30
+ const lx = lightX / lightLen;
31
+ const ly = lightY / lightLen;
32
+ const lz = lightZ / lightLen;
33
+
34
+ for (let y = -radius; y <= radius; y++) {
35
+ let line = '';
36
+ const rowY = y / radius;
37
+
38
+ for (let x = -width / 2; x <= width / 2; x++) {
39
+ const rowX = x / (width / 2);
40
+ const distSq = rowX * rowX + rowY * rowY;
41
+
42
+ if (distSq > 1.0) {
43
+ if (glow && distSq < 1.5) {
44
+ const glowIntensity = 1.0 - (distSq - 1.0) / 0.5;
45
+ const gr = Math.round(baseR * glowIntensity * 0.3);
46
+ const gg = Math.round(baseG * glowIntensity * 0.3);
47
+ const gb = Math.round(baseB * glowIntensity * 0.3);
48
+ line += chalk.rgb(gr, gg, gb)('·');
49
+ } else {
50
+ line += ' ';
51
+ }
52
+ continue;
53
+ }
54
+
55
+ const nz = Math.sqrt(1 - distSq);
56
+ const nx = rowX;
57
+ const ny = rowY;
58
+
59
+ const diffuse = Math.max(0, nx * lx + ny * ly + nz * lz);
60
+
61
+ const rx = 2 * (nx * lx + ny * ly + nz * lz) * nx - lx;
62
+ const ry = 2 * (nx * lx + ny * ly + nz * lz) * ny - ly;
63
+ const rz = 2 * (nx * lx + ny * ly + nz * lz) * nz - lz;
64
+ const viewZ = 1.0;
65
+ const specular = Math.pow(Math.max(0, rz / Math.sqrt(rx * rx + ry * ry + rz * rz) * viewZ), 32);
66
+
67
+ const ambient = 0.12;
68
+ const intensity = Math.min(1.0, ambient + diffuse * 0.75 + specular * 0.5);
69
+
70
+ const r = Math.min(255, Math.round(baseR * intensity + specular * 180));
71
+ const g = Math.min(255, Math.round(baseG * intensity + specular * 180));
72
+ const b = Math.min(255, Math.round(baseB * intensity + specular * 180));
73
+
74
+ const charIdx = Math.min(SHADING_CHARS.length - 1, Math.round(intensity * (SHADING_CHARS.length - 1)));
75
+ const ch = SHADING_CHARS[charIdx];
76
+
77
+ line += chalk.rgb(r, g, b)(ch || ' ');
78
+ }
79
+ lines.push(line);
80
+ }
81
+
82
+ if (label) {
83
+ lines.push('');
84
+ const padded = label.length < width * 2
85
+ ? ' '.repeat(Math.max(0, Math.floor((width - label.length / 2)))) + label
86
+ : label;
87
+ lines.push(chalk.bold.white(padded));
88
+ }
89
+
90
+ if (status) {
91
+ const padded = status.length < width * 2
92
+ ? ' '.repeat(Math.max(0, Math.floor((width - status.length / 2)))) + status
93
+ : status;
94
+ lines.push(chalk.gray(padded));
95
+ }
96
+
97
+ return lines.join('\n');
98
+ }
99
+
100
+ /**
101
+ * Animated spinning orb in the terminal.
102
+ * @param {object} opts
103
+ * @param {number[]} [opts.color=[37,99,235]]
104
+ * @param {string} [opts.label]
105
+ * @param {string} [opts.status]
106
+ * @param {number} [opts.radius=10]
107
+ * @param {number} [opts.fps=12]
108
+ * @returns {{ update(opts), stop() }}
109
+ */
110
+ export function animateOrb(opts = {}) {
111
+ const radius = opts.radius || 10;
112
+ const fps = opts.fps || 12;
113
+ let color = opts.color || [37, 99, 235];
114
+ let label = opts.label || '';
115
+ let statusText = opts.status || '';
116
+ let frame = 0;
117
+ let running = true;
118
+ const totalLines = (radius * 2 + 3) + (label ? 1 : 0) + (statusText ? 1 : 0) + 1;
119
+
120
+ function draw() {
121
+ const angle = (frame * 0.05);
122
+ const lightX = Math.cos(angle) * 0.6;
123
+ const lightY = -0.5;
124
+ const lightZ = Math.sin(angle) * 0.4 + 0.6;
125
+ const lightLen = Math.sqrt(lightX * lightX + lightY * lightY + lightZ * lightZ);
126
+
127
+ const width = radius * 2 + 4;
128
+ const lines = [];
129
+
130
+ for (let y = -radius; y <= radius; y++) {
131
+ let line = ' ';
132
+ const rowY = y / radius;
133
+
134
+ for (let x = -width / 2; x <= width / 2; x++) {
135
+ const rowX = x / (width / 2);
136
+ const distSq = rowX * rowX + rowY * rowY;
137
+
138
+ if (distSq > 1.0) {
139
+ if (distSq < 1.4) {
140
+ const glowIntensity = 1.0 - (distSq - 1.0) / 0.4;
141
+ const pulse = 0.3 + Math.sin(frame * 0.15) * 0.1;
142
+ const gr = Math.round(color[0] * glowIntensity * pulse);
143
+ const gg = Math.round(color[1] * glowIntensity * pulse);
144
+ const gb = Math.round(color[2] * glowIntensity * pulse);
145
+ line += chalk.rgb(gr, gg, gb)('·');
146
+ } else {
147
+ line += ' ';
148
+ }
149
+ continue;
150
+ }
151
+
152
+ const nz = Math.sqrt(1 - distSq);
153
+ const nx = rowX;
154
+ const ny = rowY;
155
+
156
+ const lx = lightX / lightLen;
157
+ const ly = lightY / lightLen;
158
+ const lz = lightZ / lightLen;
159
+
160
+ const diffuse = Math.max(0, nx * lx + ny * ly + nz * lz);
161
+ const dot = nx * lx + ny * ly + nz * lz;
162
+ const rz = 2 * dot * nz - lz;
163
+ const specular = Math.pow(Math.max(0, rz), 40);
164
+
165
+ const ambient = 0.08;
166
+ const intensity = Math.min(1.0, ambient + diffuse * 0.7 + specular * 0.6);
167
+
168
+ const r = Math.min(255, Math.round(color[0] * intensity + specular * 200));
169
+ const g = Math.min(255, Math.round(color[1] * intensity + specular * 200));
170
+ const b = Math.min(255, Math.round(color[2] * intensity + specular * 200));
171
+
172
+ const charIdx = Math.min(SHADING_CHARS.length - 1, Math.round(intensity * (SHADING_CHARS.length - 1)));
173
+ line += chalk.rgb(r, g, b)(SHADING_CHARS[charIdx] || ' ');
174
+ }
175
+ lines.push(line);
176
+ }
177
+
178
+ if (label) {
179
+ lines.push('');
180
+ const pad = Math.max(0, Math.floor((width + 2 - label.length) / 2));
181
+ lines.push(' '.repeat(pad) + chalk.bold.white(label));
182
+ }
183
+ if (statusText) {
184
+ const pad = Math.max(0, Math.floor((width + 2 - statusText.length) / 2));
185
+ lines.push(' '.repeat(pad) + chalk.gray(statusText));
186
+ }
187
+
188
+ return lines.join('\n');
189
+ }
190
+
191
+ let prevLineCount = 0;
192
+
193
+ const interval = setInterval(() => {
194
+ if (!running) return;
195
+ frame++;
196
+
197
+ if (prevLineCount > 0) {
198
+ process.stdout.write(`\x1b[${prevLineCount}A\x1b[J`);
199
+ }
200
+
201
+ const output = draw();
202
+ process.stdout.write(output + '\n');
203
+ prevLineCount = output.split('\n').length;
204
+ }, 1000 / fps);
205
+
206
+ const output = draw();
207
+ process.stdout.write(output + '\n');
208
+ prevLineCount = output.split('\n').length;
209
+
210
+ return {
211
+ update(newOpts) {
212
+ if (newOpts.color) color = newOpts.color;
213
+ if (newOpts.label !== undefined) label = newOpts.label;
214
+ if (newOpts.status !== undefined) statusText = newOpts.status;
215
+ },
216
+ stop() {
217
+ running = false;
218
+ clearInterval(interval);
219
+ },
220
+ };
221
+ }
222
+
223
+ /**
224
+ * Prints a static orb once.
225
+ */
226
+ export function printOrb(opts = {}) {
227
+ console.log('');
228
+ console.log(renderOrb(opts));
229
+ console.log('');
230
+ }
231
+
232
+ export default { renderOrb, animateOrb, printOrb };
@@ -0,0 +1,185 @@
1
+ import chalk from 'chalk';
2
+ import fetch from 'node-fetch';
3
+
4
+ const SILENCE_THRESHOLD = 0.02;
5
+ const SILENCE_DURATION_MS = 1200;
6
+ const MIN_SPEECH_MS = 400;
7
+ const SAMPLE_RATE = 16000;
8
+ const BITS_PER_SAMPLE = 16;
9
+ const CHANNELS = 1;
10
+
11
+ function createWavHeader(dataLength) {
12
+ const header = Buffer.alloc(44);
13
+ header.write('RIFF', 0);
14
+ header.writeUInt32LE(36 + dataLength, 4);
15
+ header.write('WAVE', 8);
16
+ header.write('fmt ', 12);
17
+ header.writeUInt32LE(16, 16);
18
+ header.writeUInt16LE(1, 20);
19
+ header.writeUInt16LE(CHANNELS, 22);
20
+ header.writeUInt32LE(SAMPLE_RATE, 24);
21
+ header.writeUInt32LE(SAMPLE_RATE * CHANNELS * (BITS_PER_SAMPLE / 8), 28);
22
+ header.writeUInt16LE(CHANNELS * (BITS_PER_SAMPLE / 8), 32);
23
+ header.writeUInt16LE(BITS_PER_SAMPLE, 34);
24
+ header.write('data', 36);
25
+ header.writeUInt32LE(dataLength, 40);
26
+ return header;
27
+ }
28
+
29
+ function getAmplitude(buf) {
30
+ let max = 0;
31
+ for (let i = 0; i < buf.length - 1; i += 2) {
32
+ const sample = buf.readInt16LE(i);
33
+ const abs = Math.abs(sample) / 32768;
34
+ if (abs > max) max = abs;
35
+ }
36
+ return max;
37
+ }
38
+
39
+ async function transcribeWithGemini(wavBuffer, apiKey) {
40
+ const base64Audio = wavBuffer.toString('base64');
41
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${encodeURIComponent(apiKey)}`;
42
+
43
+ const body = {
44
+ contents: [{
45
+ parts: [
46
+ { text: 'Transcribe the following audio exactly as spoken. Output ONLY the transcribed text, nothing else. If the audio is in Korean, output Korean. If in English, output English. If mixed, output mixed.' },
47
+ { inlineData: { mimeType: 'audio/wav', data: base64Audio } },
48
+ ],
49
+ }],
50
+ generationConfig: { maxOutputTokens: 512, temperature: 0.1 },
51
+ };
52
+
53
+ const res = await fetch(url, {
54
+ method: 'POST',
55
+ headers: { 'Content-Type': 'application/json' },
56
+ body: JSON.stringify(body),
57
+ });
58
+
59
+ if (!res.ok) {
60
+ const err = await res.json().catch(() => ({}));
61
+ throw new Error(err?.error?.message || `Transcription failed (${res.status})`);
62
+ }
63
+
64
+ const data = await res.json();
65
+ const text = data?.candidates?.[0]?.content?.parts?.[0]?.text;
66
+ return text?.trim() || '';
67
+ }
68
+
69
+ export default class VoiceInput {
70
+ constructor(apiKey) {
71
+ this.apiKey = apiKey;
72
+ this.mic = null;
73
+ this.micInstance = null;
74
+ this.running = false;
75
+ this.onTranscript = null;
76
+ this.onListening = null;
77
+ this.onSpeechStart = null;
78
+ this.onSpeechEnd = null;
79
+ }
80
+
81
+ async start({ onTranscript, onListening, onSpeechStart, onSpeechEnd }) {
82
+ this.onTranscript = onTranscript;
83
+ this.onListening = onListening;
84
+ this.onSpeechStart = onSpeechStart;
85
+ this.onSpeechEnd = onSpeechEnd;
86
+
87
+ let micModule;
88
+ try {
89
+ micModule = await import('mic');
90
+ this.mic = micModule.default || micModule;
91
+ } catch {
92
+ console.log(chalk.yellow('\n ⚠ mic 모듈이 없습니다. 음성 모드를 사용하려면:'));
93
+ console.log(chalk.gray(' npm install -g mic'));
94
+ console.log(chalk.gray(' brew install sox (macOS)\n'));
95
+ return false;
96
+ }
97
+
98
+ this.running = true;
99
+ this._listen();
100
+ return true;
101
+ }
102
+
103
+ _listen() {
104
+ const micInstance = this.mic({
105
+ rate: String(SAMPLE_RATE),
106
+ channels: String(CHANNELS),
107
+ bitwidth: String(BITS_PER_SAMPLE),
108
+ encoding: 'signed-integer',
109
+ endian: 'little',
110
+ device: 'default',
111
+ });
112
+
113
+ this.micInstance = micInstance;
114
+ const stream = micInstance.getAudioStream();
115
+
116
+ let speechChunks = [];
117
+ let isSpeaking = false;
118
+ let silenceStart = null;
119
+ let speechStart = null;
120
+
121
+ this.onListening?.();
122
+
123
+ stream.on('data', (buf) => {
124
+ if (!this.running) return;
125
+ const amp = getAmplitude(buf);
126
+
127
+ if (amp > SILENCE_THRESHOLD) {
128
+ if (!isSpeaking) {
129
+ isSpeaking = true;
130
+ speechStart = Date.now();
131
+ speechChunks = [];
132
+ this.onSpeechStart?.();
133
+ }
134
+ silenceStart = null;
135
+ speechChunks.push(Buffer.from(buf));
136
+ } else if (isSpeaking) {
137
+ speechChunks.push(Buffer.from(buf));
138
+ if (!silenceStart) silenceStart = Date.now();
139
+
140
+ if (Date.now() - silenceStart >= SILENCE_DURATION_MS) {
141
+ const duration = Date.now() - speechStart;
142
+ isSpeaking = false;
143
+ silenceStart = null;
144
+ this.onSpeechEnd?.();
145
+
146
+ if (duration >= MIN_SPEECH_MS && speechChunks.length > 0) {
147
+ this._processAudio(speechChunks);
148
+ }
149
+ speechChunks = [];
150
+ }
151
+ }
152
+ });
153
+
154
+ stream.on('error', (err) => {
155
+ if (this.running) {
156
+ console.log(chalk.red(` 마이크 오류: ${err.message}`));
157
+ }
158
+ });
159
+
160
+ micInstance.start();
161
+ }
162
+
163
+ async _processAudio(chunks) {
164
+ const pcmData = Buffer.concat(chunks);
165
+ const wavHeader = createWavHeader(pcmData.length);
166
+ const wavBuffer = Buffer.concat([wavHeader, pcmData]);
167
+
168
+ try {
169
+ const text = await transcribeWithGemini(wavBuffer, this.apiKey);
170
+ if (text) {
171
+ this.onTranscript?.(text);
172
+ }
173
+ } catch (err) {
174
+ console.log(chalk.red(` 음성 인식 오류: ${err.message}`));
175
+ }
176
+ }
177
+
178
+ stop() {
179
+ this.running = false;
180
+ if (this.micInstance) {
181
+ try { this.micInstance.stop(); } catch {}
182
+ this.micInstance = null;
183
+ }
184
+ }
185
+ }