npm - uwonbot - Versions diffs - 1.1.0 → 1.1.1 - Mend

uwonbot 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/bin/uwonbot.js CHANGED Viewed

@@ -13,7 +13,7 @@ showBanner();
 program
     .name('uwonbot')
     .description('Uwonbot AI Assistant — Your AI controls your computer')
-    .version('1.1.0');
+    .version('1.1.1');
 program
     .command('login')
@@ -44,19 +44,21 @@ program
     .command('chat [assistantName]')
     .description('Start chatting with an AI assistant')
     .option('-n, --name <name>', 'Assistant name to launch directly')
+    .option('-v, --voice', 'Enable hands-free voice input mode')
     .action(async (assistantName, opts) => {
         const config = getConfig();
         if (!config.get('uid')) {
             console.log('\n  ⚠️  Please log in first: uwonbot login\n');
             process.exit(1);
         }
+        const chatOpts = { voice: opts.voice || false };
         const targetName = opts.name || assistantName;
         if (targetName) {
-            await startChat(targetName);
+            await startChat(targetName, null, null, chatOpts);
         } else {
             const assistant = await selectAssistant();
             if (assistant) {
-                await startChat(null, assistant);
+                await startChat(null, assistant, null, chatOpts);
             } else {
                 const defaultBot = {
                     name: 'Uwonbot',
@@ -66,7 +68,7 @@ program
                     voiceStyle: 'male',
                     isDefaultBot: true,
                 };
-                await startChat(null, defaultBot);
+                await startChat(null, defaultBot, null, chatOpts);
             }
         }
     });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "uwonbot",
-  "version": "1.1.0",
+  "version": "1.1.1",
   "description": "Uwonbot AI Assistant CLI — Your AI controls your computer",
   "main": "src/index.js",
   "bin": {

package/src/chat.js CHANGED Viewed

@@ -8,6 +8,7 @@ import { getConfig } from './config.js';
 import { sendToBrain } from './brain.js';
 import { showMiniBar } from './banner.js';
 import { printOrb, animateOrb } from './terminalOrb.js';
+import VoiceInput from './voiceInput.js';
 import {
     hasRegisteredDevices,
     createCLISession,
@@ -110,7 +111,7 @@ async function requireBiometricAuth(uid) {
     return false;
 }
-export async function startChat(assistantName, assistant, initialCommand) {
+export async function startChat(assistantName, assistant, initialCommand, options = {}) {
     const config = getConfig();
     const uid = config.get('uid');
     if (!uid) {
@@ -155,24 +156,72 @@ export async function startChat(assistantName, assistant, initialCommand) {
     await bootSequence(assistant, brainLabel, brainColor);
+    const voiceMode = options.voice || false;
     console.log('');
     console.log(chalk.gray('  ─────────────────────────────────────────'));
+    if (voiceMode) {
+        console.log(chalk.cyan('  🎙 음성 모드 활성화 — 말하면 자동 인식됩니다'));
+        console.log(chalk.gray('  텍스트 입력도 가능합니다'));
+    }
     console.log(chalk.gray('  "exit" 종료 | "clear" 대화 초기화'));
     console.log(chalk.gray('  ─────────────────────────────────────────'));
     console.log('');
     const messages = [];
-    if (initialCommand) {
-        await processMessage(initialCommand, messages, assistant, brainColor);
-    }
+    let voiceInput = null;
+    let processingVoice = false;
     const rl = readline.createInterface({
         input: process.stdin,
         output: process.stdout,
-        prompt: chalk.hex('#2563eb')('  You > '),
+        prompt: chalk.hex(brainColor)('  You > '),
     });
+    if (voiceMode) {
+        const apiKey = assistant.apiKey || process.env.GEMINI_API_KEY || '';
+        if (!apiKey) {
+            console.log(chalk.yellow('  ⚠ 음성 모드에는 API 키가 필요합니다 (음성→텍스트 변환에 Gemini 사용)'));
+            console.log(chalk.gray('  텍스트 입력으로 진행합니다.\n'));
+        } else {
+            voiceInput = new VoiceInput(apiKey);
+            const ok = await voiceInput.start({
+                onListening: () => {
+                    if (!processingVoice) {
+                        process.stdout.write(chalk.gray('  🎙 듣고 있습니다...') + '\r');
+                    }
+                },
+                onSpeechStart: () => {
+                    process.stdout.write('\x1b[2K\r');
+                    process.stdout.write(chalk.cyan('  🔴 음성 감지 중...') + '\r');
+                },
+                onSpeechEnd: () => {
+                    process.stdout.write('\x1b[2K\r');
+                    process.stdout.write(chalk.gray('  ⏳ 음성 인식 중...') + '\r');
+                },
+                onTranscript: async (text) => {
+                    if (processingVoice) return;
+                    processingVoice = true;
+                    process.stdout.write('\x1b[2K\r');
+                    console.log(chalk.hex(brainColor)(`  You (voice) > `) + chalk.white(text));
+                    rl.pause();
+                    await processMessage(text, messages, assistant, brainColor);
+                    processingVoice = false;
+                    rl.resume();
+                    rl.prompt();
+                },
+            });
+            if (!ok) {
+                voiceInput = null;
+                console.log(chalk.gray('  텍스트 입력으로 진행합니다.\n'));
+            }
+        }
+    }
+    if (initialCommand) {
+        await processMessage(initialCommand, messages, assistant, brainColor);
+    }
     rl.prompt();
     rl.on('line', async (line) => {
@@ -183,7 +232,8 @@ export async function startChat(assistantName, assistant, initialCommand) {
         }
         if (input.toLowerCase() === 'exit' || input.toLowerCase() === 'quit') {
-            console.log(chalk.gray('\n  Goodbye! 👋\n'));
+            if (voiceInput) voiceInput.stop();
+            console.log(chalk.gray('\n  Goodbye!\n'));
             rl.close();
             process.exit(0);
         }
@@ -195,17 +245,42 @@ export async function startChat(assistantName, assistant, initialCommand) {
             return;
         }
-        if (input.toLowerCase() === 'tools') {
-            console.log('');
-            console.log(chalk.white.bold('  Available Tools:'));
-            console.log(chalk.gray('  ────────────────'));
-            console.log('  📂 read_file, write_file, list_directory, create_directory');
-            console.log('  🗑️  delete_file, move_file, search_files');
-            console.log('  ⚙️  run_shell, install_package');
-            console.log('  🌐 open_url, open_application');
-            console.log('  📋 get_clipboard, set_clipboard');
-            console.log('  💻 system_info');
-            console.log('');
+        if (input.toLowerCase() === 'voice on') {
+            if (!voiceInput) {
+                const apiKey = assistant.apiKey || process.env.GEMINI_API_KEY || '';
+                if (apiKey) {
+                    voiceInput = new VoiceInput(apiKey);
+                    await voiceInput.start({
+                        onListening: () => {},
+                        onSpeechStart: () => process.stdout.write(chalk.cyan('\r  🔴 음성 감지 중...') + '\r'),
+                        onSpeechEnd: () => process.stdout.write(chalk.gray('\r  ⏳ 인식 중...') + '\r'),
+                        onTranscript: async (text) => {
+                            if (processingVoice) return;
+                            processingVoice = true;
+                            process.stdout.write('\x1b[2K\r');
+                            console.log(chalk.hex(brainColor)(`  You (voice) > `) + chalk.white(text));
+                            rl.pause();
+                            await processMessage(text, messages, assistant, brainColor);
+                            processingVoice = false;
+                            rl.resume();
+                            rl.prompt();
+                        },
+                    });
+                    console.log(chalk.green('  🎙 음성 모드 활성화'));
+                } else {
+                    console.log(chalk.yellow('  ⚠ 음성 모드에는 API 키가 필요합니다.'));
+                }
+            }
+            rl.prompt();
+            return;
+        }
+        if (input.toLowerCase() === 'voice off') {
+            if (voiceInput) {
+                voiceInput.stop();
+                voiceInput = null;
+                console.log(chalk.gray('  🔇 음성 모드 비활성화'));
+            }
             rl.prompt();
             return;
         }
@@ -217,6 +292,7 @@ export async function startChat(assistantName, assistant, initialCommand) {
     });
     rl.on('close', () => {
+        if (voiceInput) voiceInput.stop();
         process.exit(0);
     });
 }

package/src/voiceInput.js ADDED Viewed

@@ -0,0 +1,185 @@
+import chalk from 'chalk';
+import fetch from 'node-fetch';
+const SILENCE_THRESHOLD = 0.02;
+const SILENCE_DURATION_MS = 1200;
+const MIN_SPEECH_MS = 400;
+const SAMPLE_RATE = 16000;
+const BITS_PER_SAMPLE = 16;
+const CHANNELS = 1;
+function createWavHeader(dataLength) {
+    const header = Buffer.alloc(44);
+    header.write('RIFF', 0);
+    header.writeUInt32LE(36 + dataLength, 4);
+    header.write('WAVE', 8);
+    header.write('fmt ', 12);
+    header.writeUInt32LE(16, 16);
+    header.writeUInt16LE(1, 20);
+    header.writeUInt16LE(CHANNELS, 22);
+    header.writeUInt32LE(SAMPLE_RATE, 24);
+    header.writeUInt32LE(SAMPLE_RATE * CHANNELS * (BITS_PER_SAMPLE / 8), 28);
+    header.writeUInt16LE(CHANNELS * (BITS_PER_SAMPLE / 8), 32);
+    header.writeUInt16LE(BITS_PER_SAMPLE, 34);
+    header.write('data', 36);
+    header.writeUInt32LE(dataLength, 40);
+    return header;
+}
+function getAmplitude(buf) {
+    let max = 0;
+    for (let i = 0; i < buf.length - 1; i += 2) {
+        const sample = buf.readInt16LE(i);
+        const abs = Math.abs(sample) / 32768;
+        if (abs > max) max = abs;
+    }
+    return max;
+}
+async function transcribeWithGemini(wavBuffer, apiKey) {
+    const base64Audio = wavBuffer.toString('base64');
+    const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${encodeURIComponent(apiKey)}`;
+    const body = {
+        contents: [{
+            parts: [
+                { text: 'Transcribe the following audio exactly as spoken. Output ONLY the transcribed text, nothing else. If the audio is in Korean, output Korean. If in English, output English. If mixed, output mixed.' },
+                { inlineData: { mimeType: 'audio/wav', data: base64Audio } },
+            ],
+        }],
+        generationConfig: { maxOutputTokens: 512, temperature: 0.1 },
+    };
+    const res = await fetch(url, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify(body),
+    });
+    if (!res.ok) {
+        const err = await res.json().catch(() => ({}));
+        throw new Error(err?.error?.message || `Transcription failed (${res.status})`);
+    }
+    const data = await res.json();
+    const text = data?.candidates?.[0]?.content?.parts?.[0]?.text;
+    return text?.trim() || '';
+}
+export default class VoiceInput {
+    constructor(apiKey) {
+        this.apiKey = apiKey;
+        this.mic = null;
+        this.micInstance = null;
+        this.running = false;
+        this.onTranscript = null;
+        this.onListening = null;
+        this.onSpeechStart = null;
+        this.onSpeechEnd = null;
+    }
+    async start({ onTranscript, onListening, onSpeechStart, onSpeechEnd }) {
+        this.onTranscript = onTranscript;
+        this.onListening = onListening;
+        this.onSpeechStart = onSpeechStart;
+        this.onSpeechEnd = onSpeechEnd;
+        let micModule;
+        try {
+            micModule = await import('mic');
+            this.mic = micModule.default || micModule;
+        } catch {
+            console.log(chalk.yellow('\n  ⚠ mic 모듈이 없습니다. 음성 모드를 사용하려면:'));
+            console.log(chalk.gray('    npm install -g mic'));
+            console.log(chalk.gray('    brew install sox  (macOS)\n'));
+            return false;
+        }
+        this.running = true;
+        this._listen();
+        return true;
+    }
+    _listen() {
+        const micInstance = this.mic({
+            rate: String(SAMPLE_RATE),
+            channels: String(CHANNELS),
+            bitwidth: String(BITS_PER_SAMPLE),
+            encoding: 'signed-integer',
+            endian: 'little',
+            device: 'default',
+        });
+        this.micInstance = micInstance;
+        const stream = micInstance.getAudioStream();
+        let speechChunks = [];
+        let isSpeaking = false;
+        let silenceStart = null;
+        let speechStart = null;
+        this.onListening?.();
+        stream.on('data', (buf) => {
+            if (!this.running) return;
+            const amp = getAmplitude(buf);
+            if (amp > SILENCE_THRESHOLD) {
+                if (!isSpeaking) {
+                    isSpeaking = true;
+                    speechStart = Date.now();
+                    speechChunks = [];
+                    this.onSpeechStart?.();
+                }
+                silenceStart = null;
+                speechChunks.push(Buffer.from(buf));
+            } else if (isSpeaking) {
+                speechChunks.push(Buffer.from(buf));
+                if (!silenceStart) silenceStart = Date.now();
+                if (Date.now() - silenceStart >= SILENCE_DURATION_MS) {
+                    const duration = Date.now() - speechStart;
+                    isSpeaking = false;
+                    silenceStart = null;
+                    this.onSpeechEnd?.();
+                    if (duration >= MIN_SPEECH_MS && speechChunks.length > 0) {
+                        this._processAudio(speechChunks);
+                    }
+                    speechChunks = [];
+                }
+            }
+        });
+        stream.on('error', (err) => {
+            if (this.running) {
+                console.log(chalk.red(`  마이크 오류: ${err.message}`));
+            }
+        });
+        micInstance.start();
+    }
+    async _processAudio(chunks) {
+        const pcmData = Buffer.concat(chunks);
+        const wavHeader = createWavHeader(pcmData.length);
+        const wavBuffer = Buffer.concat([wavHeader, pcmData]);
+        try {
+            const text = await transcribeWithGemini(wavBuffer, this.apiKey);
+            if (text) {
+                this.onTranscript?.(text);
+            }
+        } catch (err) {
+            console.log(chalk.red(`  음성 인식 오류: ${err.message}`));
+        }
+    }
+    stop() {
+        this.running = false;
+        if (this.micInstance) {
+            try { this.micInstance.stop(); } catch {}
+            this.micInstance = null;
+        }
+    }
+}