uwonbot 1.2.9 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "uwonbot",
3
- "version": "1.2.9",
3
+ "version": "1.4.0",
4
4
  "description": "Uwonbot AI Assistant CLI — Your AI controls your computer",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -15,7 +15,13 @@
15
15
  "start": "node bin/uwonbot.js",
16
16
  "dev": "node bin/uwonbot.js"
17
17
  },
18
- "keywords": ["ai", "assistant", "automation", "cli", "computer-use"],
18
+ "keywords": [
19
+ "ai",
20
+ "assistant",
21
+ "automation",
22
+ "cli",
23
+ "computer-use"
24
+ ],
19
25
  "author": "Uwonbot",
20
26
  "license": "MIT",
21
27
  "repository": {
package/src/agent.js CHANGED
@@ -5,7 +5,7 @@ import { promisify } from 'util';
5
5
  import chalk from 'chalk';
6
6
  import { getConfig } from './config.js';
7
7
  import ClapListener from './clapListener.js';
8
- import { fetchAssistants, setIdToken } from './firebase-client.js';
8
+ import { fetchAssistants, setIdToken, setRefreshToken } from './firebase-client.js';
9
9
 
10
10
  const execAsync = promisify(exec);
11
11
  const WEB_APP_URL = 'https://chartapp-653e1.web.app';
@@ -193,16 +193,20 @@ async function mouseClick(button = 'left', double = false) {
193
193
  const { x, y } = _lastKnownPos;
194
194
  if (button === 'right') {
195
195
  await cgMouseEvent('$.kCGEventRightMouseDown', x, y, '$.kCGMouseButtonRight');
196
+ await new Promise(r => setTimeout(r, 20));
196
197
  await cgMouseEvent('$.kCGEventRightMouseUp', x, y, '$.kCGMouseButtonRight');
197
198
  } else {
198
199
  await cgMouseEvent('$.kCGEventLeftMouseDown', x, y, 0);
200
+ await new Promise(r => setTimeout(r, 20));
199
201
  await cgMouseEvent('$.kCGEventLeftMouseUp', x, y, 0);
200
202
  if (double) {
201
- await new Promise(r => setTimeout(r, 50));
203
+ await new Promise(r => setTimeout(r, 60));
202
204
  await cgMouseEvent('$.kCGEventLeftMouseDown', x, y, 0);
205
+ await new Promise(r => setTimeout(r, 20));
203
206
  await cgMouseEvent('$.kCGEventLeftMouseUp', x, y, 0);
204
207
  }
205
208
  }
209
+ console.log(chalk.green(` [click] CG ${button}${double ? ' (double)' : ''} at (${x},${y})`));
206
210
  } catch {}
207
211
  }
208
212
  }
@@ -505,6 +509,8 @@ async function openTerminalWithChat(assistantName) {
505
509
  }
506
510
 
507
511
  let _logThrottle = 0;
512
+ let _clapListener = null;
513
+
508
514
  async function handleCommand(msg) {
509
515
  try {
510
516
  const cmd = JSON.parse(msg);
@@ -517,6 +523,10 @@ async function handleCommand(msg) {
517
523
  await moveMouse(cmd.x, cmd.y);
518
524
  return { ok: true };
519
525
  case 'mouse_click':
526
+ if (cmd.x !== undefined && cmd.y !== undefined) {
527
+ await moveMouse(cmd.x, cmd.y);
528
+ await new Promise(r => setTimeout(r, 30));
529
+ }
520
530
  await mouseClick(cmd.button || 'left', cmd.double || false);
521
531
  return { ok: true };
522
532
  case 'mouse_down':
@@ -579,6 +589,71 @@ async function handleCommand(msg) {
579
589
  return { ok: true };
580
590
  } catch (e) { return { ok: false, error: e.message }; }
581
591
  }
592
+ case 'open_chrome_window': {
593
+ const { url, w, h, x, y } = cmd;
594
+ if (!url) return { ok: false, error: 'No URL' };
595
+ const width = w || 380, height = h || 280;
596
+ const left = x ?? 16, top = y ?? 200;
597
+ try {
598
+ if (platform === 'darwin') {
599
+ const script = `
600
+ tell application "Google Chrome"
601
+ activate
602
+ set newWin to make new window
603
+ set URL of active tab of newWin to "${url.replace(/"/g, '\\"')}"
604
+ set bounds of newWin to {${left}, ${top}, ${left + width}, ${top + height}}
605
+ end tell`;
606
+ await execAsync(`osascript -e '${script.replace(/\n/g, ' ')}'`);
607
+ } else {
608
+ await execAsync(`open -na "Google Chrome" --args --new-window --window-size=${width},${height} --window-position=${left},${top} "${url}"`);
609
+ }
610
+ return { ok: true };
611
+ } catch (e) { return { ok: false, error: e.message }; }
612
+ }
613
+ case 'resize_chrome_by_url': {
614
+ const { urlPattern } = cmd;
615
+ const rw = cmd.w || 300, rh = cmd.h || 300;
616
+ const rx = cmd.x ?? 0, ry = cmd.y ?? 0;
617
+ if (!urlPattern) return { ok: false, error: 'No urlPattern' };
618
+ try {
619
+ if (platform === 'darwin') {
620
+ const escaped = urlPattern.replace(/"/g, '\\"');
621
+ const lines = [
622
+ 'tell application "Google Chrome"',
623
+ `set targetUrl to "${escaped}"`,
624
+ 'set found to "not_found"',
625
+ 'repeat with cw in windows',
626
+ 'repeat with t in tabs of cw',
627
+ `if URL of t contains targetUrl then`,
628
+ `set bounds of cw to {${rx}, ${ry}, ${rx + rw}, ${ry + rh}}`,
629
+ 'set found to "done"',
630
+ 'end if',
631
+ 'end repeat',
632
+ 'end repeat',
633
+ 'end tell',
634
+ 'found',
635
+ ];
636
+ const script = lines.join('\n');
637
+ const { stdout } = await execAsync(`osascript -e '${script.replace(/'/g, "'\\''")}'`);
638
+ const result = stdout.trim();
639
+ console.log(chalk.gray(` resize "${urlPattern}" → ${result} (${rw}x${rh} at ${rx},${ry})`));
640
+ return { ok: result === 'done' };
641
+ }
642
+ return { ok: false, error: 'Not supported on this platform yet' };
643
+ } catch (e) {
644
+ console.log(chalk.red(` resize error: ${e.message}`));
645
+ return { ok: false, error: e.message };
646
+ }
647
+ }
648
+ case 'mute_clap': {
649
+ const ms = cmd.duration || 10000;
650
+ if (_clapListener) _clapListener.mute(ms);
651
+ return { ok: true };
652
+ }
653
+ case 'unmute_clap': {
654
+ if (_clapListener) _clapListener.unmute();
655
+ return { ok: true };
656
+ }
582
657
  case 'ping':
583
658
  return { ok: true, pong: true };
584
659
  default:
@@ -590,18 +665,57 @@ async function handleCommand(msg) {
590
665
  }
591
666
 
592
667
  async function openWebAssistant(assistantId) {
593
- const chatUrl = `${WEB_APP_URL}/assistant/live?id=${assistantId}`;
594
- const camUrl = `${WEB_APP_URL}/camera-control`;
668
+ const config = getConfig();
669
+ const rt = encodeURIComponent(config.get('refreshToken') || '');
670
+ const rtParam = rt ? `&rt=${rt}` : '';
671
+ const orbUrl = `${WEB_APP_URL}/assistant/orb?id=${assistantId}${rtParam}`;
672
+ const chatUrl = `${WEB_APP_URL}/assistant/live?id=${assistantId}&autostart=1${rtParam}`;
673
+ const camUrl = `${WEB_APP_URL}/camera-control?auto=1${rtParam}`;
674
+ const screen = await getScreenSize();
675
+
676
+ const orbW = 440, orbH = 440;
677
+ const chatW = 320, chatH = 420;
678
+ const camW = 400, camH = 340;
679
+
680
+ const orbX = 20, orbY = 60;
681
+ const chatX = screen.width - chatW - 20, chatY = 40;
682
+ const camX = Math.round((screen.width - camW) / 2), camY = 40;
683
+
595
684
  try {
596
685
  if (platform === 'darwin') {
597
- await execAsync(`open -na "Google Chrome" --args --new-window --window-size=320,520 --window-position=1000,100 "${chatUrl}" 2>/dev/null || open "${chatUrl}"`);
598
- await new Promise(r => setTimeout(r, 800));
599
- await execAsync(`open -na "Google Chrome" --args --new-window --window-size=480,400 --window-position=480,150 "${camUrl}" 2>/dev/null || open "${camUrl}"`);
686
+ console.log(chalk.cyan(` Opening 3 windows (screen: ${screen.width}x${screen.height})`));
687
+ console.log(chalk.gray(` Orb: ${orbW}x${orbH} at (${orbX},${orbY})`));
688
+ console.log(chalk.gray(` Chat: ${chatW}x${chatH} at (${chatX},${chatY})`));
689
+ console.log(chalk.gray(` Cam: ${camW}x${camH} at (${camX},${camY})`));
690
+
691
+ const script = [
692
+ 'tell application "Google Chrome"',
693
+ 'activate',
694
+ 'set orbWin to make new window',
695
+ `set URL of active tab of orbWin to "${orbUrl.replace(/"/g, '\\"')}"`,
696
+ `set bounds of orbWin to {${orbX}, ${orbY}, ${orbX + orbW}, ${orbY + orbH}}`,
697
+ 'delay 0.2',
698
+ 'set chatWin to make new window',
699
+ `set URL of active tab of chatWin to "${chatUrl.replace(/"/g, '\\"')}"`,
700
+ `set bounds of chatWin to {${chatX}, ${chatY}, ${chatX + chatW}, ${chatY + chatH}}`,
701
+ 'delay 0.2',
702
+ 'set camWin to make new window',
703
+ `set URL of active tab of camWin to "${camUrl.replace(/"/g, '\\"')}"`,
704
+ `set bounds of camWin to {${camX}, ${camY}, ${camX + camW}, ${camY + camH}}`,
705
+ 'end tell',
706
+ ].join('\n');
707
+
708
+ await execAsync(`osascript -e '${script.replace(/'/g, "'\\''")}'`, { timeout: 15000 });
709
+ console.log(chalk.green(' ✓ All 3 windows opened'));
600
710
  } else if (platform === 'win32') {
601
- await execAsync(`start chrome --new-window --window-size=320,520 --window-position=1000,100 "${chatUrl}" 2>nul || start "" "${chatUrl}"`);
711
+ await execAsync(`start chrome --new-window --window-size=${orbW},${orbH} --window-position=${orbX},${orbY} "${orbUrl}" 2>nul || start "" "${orbUrl}"`);
712
+ await new Promise(r => setTimeout(r, 800));
713
+ await execAsync(`start chrome --new-window --window-size=${chatW},${chatH} --window-position=${chatX},${chatY} "${chatUrl}" 2>nul || start "" "${chatUrl}"`);
602
714
  await new Promise(r => setTimeout(r, 800));
603
- await execAsync(`start chrome --new-window --window-size=480,400 --window-position=480,150 "${camUrl}" 2>nul || start "" "${camUrl}"`);
715
+ await execAsync(`start chrome --new-window --window-size=${camW},${camH} --window-position=${camX},${camY} "${camUrl}" 2>nul || start "" "${camUrl}"`);
604
716
  } else {
717
+ await execAsync(`google-chrome --new-window "${orbUrl}" 2>/dev/null || xdg-open "${orbUrl}"`);
718
+ await new Promise(r => setTimeout(r, 800));
605
719
  await execAsync(`google-chrome --new-window "${chatUrl}" 2>/dev/null || xdg-open "${chatUrl}"`);
606
720
  await new Promise(r => setTimeout(r, 800));
607
721
  await execAsync(`google-chrome --new-window "${camUrl}" 2>/dev/null || xdg-open "${camUrl}"`);
@@ -692,6 +806,8 @@ export async function startAgent(port = 9876, options = {}) {
692
806
  }
693
807
 
694
808
  if (idToken) setIdToken(idToken);
809
+ const refreshToken = config.get('refreshToken');
810
+ if (refreshToken) setRefreshToken(refreshToken);
695
811
 
696
812
  console.log(chalk.gray(` User: ${email}`));
697
813
  console.log(chalk.gray(` Port: ${port}`));
@@ -708,27 +824,57 @@ export async function startAgent(port = 9876, options = {}) {
708
824
  console.log('');
709
825
 
710
826
  let _assistantOpen = false;
827
+ let _assistantOpenTimer = null;
711
828
  let _clapCooldown = false;
712
829
 
830
+ async function isAssistantWindowOpen() {
831
+ if (platform !== 'darwin') return false;
832
+ try {
833
+ const { stdout } = await execAsync(`osascript -e '
834
+ tell application "Google Chrome"
835
+ repeat with cw in windows
836
+ repeat with t in tabs of cw
837
+ if URL of t contains "assistant/live" or URL of t contains "assistant/orb" then
838
+ return "yes"
839
+ end if
840
+ end repeat
841
+ end repeat
842
+ return "no"
843
+ end tell'`, { timeout: 3000 });
844
+ return stdout.trim() === 'yes';
845
+ } catch { return false; }
846
+ }
847
+
713
848
  if (!options.noMic) {
714
849
  await ensureSox();
715
- const clapListener = new ClapListener(async () => {
716
- if (_assistantOpen) {
850
+ _clapListener = new ClapListener(async () => {
851
+ if (_assistantOpen || await isAssistantWindowOpen()) {
717
852
  console.log(chalk.gray(' 👏 박수 감지 — 이미 비서가 활성 상태입니다.'));
853
+ _assistantOpen = true;
718
854
  return;
719
855
  }
720
856
  if (_clapCooldown) return;
721
857
  _clapCooldown = true;
722
- setTimeout(() => { _clapCooldown = false; }, 5000);
858
+ setTimeout(() => { _clapCooldown = false; }, 15000);
723
859
 
724
860
  console.log(chalk.bold.cyan(' 👏 박수 감지! 비서 활성화 중...'));
725
861
  _assistantOpen = true;
862
+ clearTimeout(_assistantOpenTimer);
863
+ _assistantOpenTimer = setTimeout(() => { _assistantOpen = false; }, 30000);
726
864
 
727
865
  if (userAssistants.length === 0) {
728
- console.log(chalk.gray(' → 기본 Uwonbot 웹 실행'));
866
+ console.log(chalk.gray(' → 기본 Uwonbot 웹 실행 (작은 창)'));
867
+ const screen = await getScreenSize();
868
+ const W = 320, H = 220;
729
869
  try {
730
870
  if (platform === 'darwin') {
731
- await execAsync(`open "${WEB_APP_URL}/assistant"`);
871
+ const script = `tell application "Google Chrome" to activate
872
+ tell application "Google Chrome"
873
+ set newWin to make new window
874
+ set URL of active tab of newWin to "${WEB_APP_URL}/assistant"
875
+ set bounds of newWin to {20, 40, ${20 + W}, ${40 + H}}
876
+ end tell`;
877
+ await execAsync(`osascript -e '${script.replace(/\n/g, ' ')}'`);
732
878
  } else if (platform === 'win32') {
733
879
  await execAsync(`start "" "${WEB_APP_URL}/assistant"`);
734
880
  } else {
@@ -742,7 +888,7 @@ export async function startAgent(port = 9876, options = {}) {
742
888
  console.log(chalk.green(` → ${target.name} 웹 실행`));
743
889
  await openWebAssistant(target.id);
744
890
  });
745
- await clapListener.start();
891
+ await _clapListener.start();
746
892
  console.log('');
747
893
  }
748
894
 
@@ -764,6 +910,7 @@ export async function startAgent(port = 9876, options = {}) {
764
910
  ws.on('close', () => {
765
911
  console.log(chalk.yellow(' ○ Client disconnected'));
766
912
  _assistantOpen = false;
913
+ clearTimeout(_assistantOpenTimer);
767
914
  });
768
915
 
769
916
  ws.send(JSON.stringify({ type: 'welcome', agent: 'uwonbot', version: '1.1.2', uid }));
package/src/brain.js CHANGED
@@ -31,7 +31,7 @@ IMPORTANT RULES:
31
31
  }
32
32
 
33
33
  async function callGemini(apiKey, model, messages, systemPrompt) {
34
- const modelName = model || 'gemini-2.0-flash';
34
+ const modelName = model || 'gemini-2.5-flash-lite';
35
35
  const tools = getToolsForProvider('gemini');
36
36
 
37
37
  const contents = messages.map(m => ({
@@ -1,6 +1,6 @@
1
1
  import chalk from 'chalk';
2
2
 
3
- const DEFAULT_THRESHOLD = 0.4;
3
+ const DEFAULT_THRESHOLD = 0.35;
4
4
  const CLAP_MIN_INTERVAL = 100;
5
5
  const CLAP_MAX_INTERVAL = 800;
6
6
  const REQUIRED_CLAPS = 2;
@@ -17,6 +17,19 @@ export default class ClapListener {
17
17
  this.resetTimer = null;
18
18
  this.mic = null;
19
19
  this.running = false;
20
+ this._muted = false;
21
+ this._muteTimer = null;
22
+ }
23
+
24
+ mute(durationMs = 10000) {
25
+ this._muted = true;
26
+ clearTimeout(this._muteTimer);
27
+ this._muteTimer = setTimeout(() => { this._muted = false; }, durationMs);
28
+ }
29
+
30
+ unmute() {
31
+ this._muted = false;
32
+ clearTimeout(this._muteTimer);
20
33
  }
21
34
 
22
35
  async start() {
@@ -80,6 +93,8 @@ export default class ClapListener {
80
93
  }
81
94
 
82
95
  _processBuffer(buf) {
96
+ if (this._muted) return;
97
+
83
98
  let peak = 0;
84
99
  for (let i = 0; i < buf.length - 1; i += 2) {
85
100
  const sample = buf.readInt16LE(i) / 32768;
@@ -47,10 +47,19 @@ function parseFirestoreDoc(doc) {
47
47
  export async function fetchAssistants(uid) {
48
48
  if (!cachedIdToken) throw new Error('Not logged in');
49
49
  const url = `${FIRESTORE_URL}/users/${uid}/assistants?orderBy=createdAt%20desc`;
50
- const res = await fetch(url, {
50
+ let res = await fetch(url, {
51
51
  headers: { 'Authorization': `Bearer ${cachedIdToken}` },
52
52
  });
53
- const data = await res.json();
53
+ let data = await res.json();
54
+ if (data.error && data.error.code === 401 || data.error?.status === 'UNAUTHENTICATED') {
55
+ const refreshed = await refreshIdToken();
56
+ if (refreshed) {
57
+ res = await fetch(url, {
58
+ headers: { 'Authorization': `Bearer ${cachedIdToken}` },
59
+ });
60
+ data = await res.json();
61
+ }
62
+ }
54
63
  if (data.error) throw new Error(data.error.message);
55
64
  return (data.documents || []).map(doc => {
56
65
  const parts = doc.name.split('/');
@@ -84,6 +93,27 @@ export async function sendPasswordReset(email) {
84
93
 
85
94
  export function setIdToken(token) { cachedIdToken = token; }
86
95
 
96
+ let cachedRefreshToken = null;
97
+ export function setRefreshToken(token) { cachedRefreshToken = token; }
98
+
99
+ async function refreshIdToken() {
100
+ if (!cachedRefreshToken) return false;
101
+ try {
102
+ const res = await fetch(`https://securetoken.googleapis.com/v1/token?key=${API_KEY}`, {
103
+ method: 'POST',
104
+ headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
105
+ body: `grant_type=refresh_token&refresh_token=${encodeURIComponent(cachedRefreshToken)}`,
106
+ });
107
+ const data = await res.json();
108
+ if (data.id_token) {
109
+ cachedIdToken = data.id_token;
110
+ if (data.refresh_token) cachedRefreshToken = data.refresh_token;
111
+ return true;
112
+ }
113
+ } catch {}
114
+ return false;
115
+ }
116
+
87
117
  export async function createCLISession(uid, token) {
88
118
  if (!cachedIdToken) throw new Error('Not logged in');
89
119
  const url = `${FIRESTORE_URL}/users/${uid}/cliSessions/${token}`;
package/src/voiceInput.js CHANGED
@@ -38,7 +38,7 @@ function getAmplitude(buf) {
38
38
 
39
39
  async function transcribeWithGemini(wavBuffer, apiKey) {
40
40
  const base64Audio = wavBuffer.toString('base64');
41
- const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${encodeURIComponent(apiKey)}`;
41
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${encodeURIComponent(apiKey)}`;
42
42
 
43
43
  const body = {
44
44
  contents: [{