@kernel.chat/kbot 3.58.0 → 3.59.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +4 -4
  2. package/dist/agent.d.ts +2 -0
  3. package/dist/agent.d.ts.map +1 -1
  4. package/dist/agent.js +14 -8
  5. package/dist/agent.js.map +1 -1
  6. package/dist/agents/replit.js +1 -1
  7. package/dist/bootstrap.js +1 -1
  8. package/dist/integrations/ableton-live.d.ts +52 -0
  9. package/dist/integrations/ableton-live.d.ts.map +1 -0
  10. package/dist/integrations/ableton-live.js +239 -0
  11. package/dist/integrations/ableton-live.js.map +1 -0
  12. package/dist/integrations/ableton-osc-installer.d.ts +13 -0
  13. package/dist/integrations/ableton-osc-installer.d.ts.map +1 -0
  14. package/dist/integrations/ableton-osc-installer.js +190 -0
  15. package/dist/integrations/ableton-osc-installer.js.map +1 -0
  16. package/dist/learning.d.ts.map +1 -1
  17. package/dist/learning.js +20 -0
  18. package/dist/learning.js.map +1 -1
  19. package/dist/memory.d.ts +8 -6
  20. package/dist/memory.d.ts.map +1 -1
  21. package/dist/memory.js +42 -23
  22. package/dist/memory.js.map +1 -1
  23. package/dist/memory.test.js +83 -2
  24. package/dist/memory.test.js.map +1 -1
  25. package/dist/serve.d.ts.map +1 -1
  26. package/dist/serve.js +8 -0
  27. package/dist/serve.js.map +1 -1
  28. package/dist/tools/computer.d.ts.map +1 -1
  29. package/dist/tools/computer.js +815 -53
  30. package/dist/tools/computer.js.map +1 -1
  31. package/dist/tools/dj-set-builder.d.ts +2 -0
  32. package/dist/tools/dj-set-builder.d.ts.map +1 -0
  33. package/dist/tools/dj-set-builder.js +432 -0
  34. package/dist/tools/dj-set-builder.js.map +1 -0
  35. package/dist/tools/fetch.d.ts.map +1 -1
  36. package/dist/tools/fetch.js +36 -6
  37. package/dist/tools/fetch.js.map +1 -1
  38. package/dist/tools/files.d.ts.map +1 -1
  39. package/dist/tools/files.js +40 -23
  40. package/dist/tools/files.js.map +1 -1
  41. package/dist/tools/index.d.ts.map +1 -1
  42. package/dist/tools/index.js +2 -0
  43. package/dist/tools/index.js.map +1 -1
  44. package/dist/tools/serum2-preset.d.ts +11 -0
  45. package/dist/tools/serum2-preset.d.ts.map +1 -0
  46. package/dist/tools/serum2-preset.js +143 -0
  47. package/dist/tools/serum2-preset.js.map +1 -0
  48. package/package.json +2 -2
@@ -1,141 +1,643 @@
1
- // kbot Computer Use Tool Screenshot, click, type on the desktop
1
+ // kbot Computer Use — Full desktop control with safety guardrails
2
+ //
3
+ // Capabilities: screenshot, click, type, scroll, drag, key combos,
4
+ // app launch/focus, window management (list/resize/move/minimize)
5
+ //
6
+ // Safety: per-app session approval, machine-wide lock file,
7
+ // terminal excluded from screenshots, permission check flow
8
+ //
2
9
  // Requires explicit opt-in via --computer-use flag.
3
- // Uses native OS commands (screencapture on macOS, etc.)
10
+ // macOS: AppleScript + screencapture + cliclick fallback
11
+ // Linux: xdotool + import/gnome-screenshot
4
12
  import { execSync } from 'node:child_process';
5
- import { tmpdir } from 'node:os';
13
+ import { tmpdir, homedir } from 'node:os';
6
14
  import { join } from 'node:path';
7
- import { readFileSync, unlinkSync } from 'node:fs';
15
+ import { readFileSync, writeFileSync, unlinkSync, existsSync, mkdirSync, rmSync } from 'node:fs';
8
16
  import { registerTool } from './index.js';
9
17
  const platform = process.platform;
18
+ const LOCK_DIR = join(homedir(), '.kbot');
19
+ const LOCK_FILE = join(LOCK_DIR, 'computer-use.lock');
20
+ // ── Session state ──────────────────────────────────────────────────
21
+ /** Apps approved for this session */
22
+ const approvedApps = new Set();
23
+ /** Whether permissions have been verified this session */
24
+ let permissionsVerified = false;
25
+ /** Current lock holder PID */
26
+ let lockHeld = false;
27
+ // ── Lock file (one session at a time) ──────────────────────────────
28
+ function acquireLock() {
29
+ if (!existsSync(LOCK_DIR))
30
+ mkdirSync(LOCK_DIR, { recursive: true });
31
+ if (existsSync(LOCK_FILE)) {
32
+ try {
33
+ const lock = JSON.parse(readFileSync(LOCK_FILE, 'utf-8'));
34
+ // Check if the holding process is still alive
35
+ try {
36
+ process.kill(lock.pid, 0); // signal 0 = existence check
37
+ return `Computer use is held by another kbot session (PID ${lock.pid}, started ${lock.started}). Finish that session first.`;
38
+ }
39
+ catch {
40
+ // Process is dead — stale lock, clean it up
41
+ rmSync(LOCK_FILE);
42
+ }
43
+ }
44
+ catch {
45
+ rmSync(LOCK_FILE);
46
+ }
47
+ }
48
+ writeFileSync(LOCK_FILE, JSON.stringify({
49
+ pid: process.pid,
50
+ started: new Date().toISOString(),
51
+ }));
52
+ lockHeld = true;
53
+ // Clean up on exit
54
+ const cleanup = () => {
55
+ try {
56
+ if (existsSync(LOCK_FILE))
57
+ rmSync(LOCK_FILE);
58
+ }
59
+ catch { /* best effort */ }
60
+ };
61
+ process.on('exit', cleanup);
62
+ process.on('SIGINT', cleanup);
63
+ process.on('SIGTERM', cleanup);
64
+ return null;
65
+ }
66
+ function releaseLock() {
67
+ if (lockHeld) {
68
+ try {
69
+ if (existsSync(LOCK_FILE))
70
+ rmSync(LOCK_FILE);
71
+ }
72
+ catch { /* best effort */ }
73
+ lockHeld = false;
74
+ }
75
+ }
76
+ // ── macOS permission checks ────────────────────────────────────────
77
+ function checkMacOSPermissions() {
78
+ if (platform !== 'darwin')
79
+ return { accessibility: true, screenRecording: true };
80
+ let accessibility = false;
81
+ let screenRecording = false;
82
+ // Check Accessibility — try a benign System Events query
83
+ try {
84
+ execSync(`osascript -e 'tell application "System Events" to get name of first process'`, {
85
+ timeout: 5_000, stdio: 'pipe',
86
+ });
87
+ accessibility = true;
88
+ }
89
+ catch { /* not granted */ }
90
+ // Check Screen Recording — try a screencapture and check if it's blocked
91
+ try {
92
+ const testPath = join(tmpdir(), `kbot-perm-test-${Date.now()}.png`);
93
+ execSync(`screencapture -x ${testPath}`, { timeout: 5_000, stdio: 'pipe' });
94
+ if (existsSync(testPath)) {
95
+ const size = readFileSync(testPath).length;
96
+ unlinkSync(testPath);
97
+ // A very small file (< 1KB) usually means screen recording was denied
98
+ screenRecording = size > 1000;
99
+ }
100
+ }
101
+ catch { /* not granted */ }
102
+ return { accessibility, screenRecording };
103
+ }
104
+ function formatPermissionGuide(perms) {
105
+ const lines = ['macOS permissions needed for computer use:\n'];
106
+ if (!perms.accessibility) {
107
+ lines.push(' Accessibility (required for click, type, scroll):');
108
+ lines.push(' System Settings > Privacy & Security > Accessibility');
109
+ lines.push(' Add your terminal app (Terminal, iTerm2, Warp, etc.)\n');
110
+ }
111
+ if (!perms.screenRecording) {
112
+ lines.push(' Screen Recording (required for screenshots):');
113
+ lines.push(' System Settings > Privacy & Security > Screen Recording');
114
+ lines.push(' Add your terminal app\n');
115
+ }
116
+ lines.push('After granting permissions, restart kbot.');
117
+ return lines.join('\n');
118
+ }
119
+ /** Verify permissions once per session */
120
+ function ensurePermissions() {
121
+ if (permissionsVerified)
122
+ return null;
123
+ if (platform === 'darwin') {
124
+ const perms = checkMacOSPermissions();
125
+ if (!perms.accessibility || !perms.screenRecording) {
126
+ return `Error: ${formatPermissionGuide(perms)}`;
127
+ }
128
+ }
129
+ else if (platform === 'linux') {
130
+ // Check for xdotool
131
+ try {
132
+ execSync('which xdotool', { stdio: 'pipe' });
133
+ }
134
+ catch {
135
+ return 'Error: Computer use on Linux requires xdotool. Install with: sudo apt install xdotool';
136
+ }
137
+ }
138
+ else {
139
+ return 'Error: Computer use is only supported on macOS and Linux.';
140
+ }
141
+ permissionsVerified = true;
142
+ return null;
143
+ }
144
+ /** Ensure lock is acquired */
145
+ function ensureLock() {
146
+ if (lockHeld)
147
+ return null;
148
+ return acquireLock();
149
+ }
150
+ // ── App approval system ────────────────────────────────────────────
151
+ /** Apps with elevated access warnings */
152
+ const SENSITIVE_APPS = {
153
+ 'Terminal': 'Equivalent to shell access',
154
+ 'iTerm2': 'Equivalent to shell access',
155
+ 'iTerm': 'Equivalent to shell access',
156
+ 'Warp': 'Equivalent to shell access',
157
+ 'Visual Studio Code': 'Equivalent to shell access',
158
+ 'Code': 'Equivalent to shell access',
159
+ 'Cursor': 'Equivalent to shell access',
160
+ 'Finder': 'Can read or write any file',
161
+ 'System Settings': 'Can change system settings',
162
+ 'System Preferences': 'Can change system settings',
163
+ };
164
+ function isAppApproved(appName) {
165
+ return approvedApps.has(appName.toLowerCase());
166
+ }
167
+ function approveApp(appName) {
168
+ approvedApps.add(appName.toLowerCase());
169
+ }
170
+ function getApprovedApps() {
171
+ return [...approvedApps];
172
+ }
173
+ // ── AppleScript helpers ────────────────────────────────────────────
174
+ /** Escape a string for safe use inside AppleScript double quotes */
175
+ function escapeAppleScript(s) {
176
+ return s.replace(/[\x00-\x1f\x7f]/g, '').replace(/\\/g, '\\\\').replace(/"/g, '\\"');
177
+ }
178
+ /** Run an AppleScript one-liner, return stdout */
179
+ function osascript(script, timeout = 5_000) {
180
+ return execSync(`osascript -e '${script.replace(/'/g, "'\\''")}'`, {
181
+ encoding: 'utf-8', timeout, stdio: ['pipe', 'pipe', 'pipe'],
182
+ }).trim();
183
+ }
184
+ /** Get the frontmost app name */
185
+ function getFrontmostApp() {
186
+ if (platform === 'darwin') {
187
+ try {
188
+ return osascript('tell application "System Events" to get name of first application process whose frontmost is true');
189
+ }
190
+ catch {
191
+ return 'unknown';
192
+ }
193
+ }
194
+ if (platform === 'linux') {
195
+ try {
196
+ const wid = execSync('xdotool getactivewindow', { encoding: 'utf-8', stdio: 'pipe' }).trim();
197
+ return execSync(`xdotool getwindowname ${wid}`, { encoding: 'utf-8', stdio: 'pipe' }).trim();
198
+ }
199
+ catch {
200
+ return 'unknown';
201
+ }
202
+ }
203
+ return 'unknown';
204
+ }
205
+ // ── Tool registration ──────────────────────────────────────────────
10
206
  export function registerComputerTools() {
207
+ // ── Permission & lock check ──
11
208
  registerTool({
12
- name: 'screenshot',
13
- description: 'Capture a screenshot of the entire screen. Returns base64-encoded PNG. Requires --computer-use flag.',
209
+ name: 'computer_check',
210
+ description: 'Check computer use permissions and acquire the session lock. Call this before any other computer use tool. Returns permission status and any required setup steps.',
211
+ parameters: {},
212
+ tier: 'free',
213
+ async execute() {
214
+ const lockErr = ensureLock();
215
+ if (lockErr)
216
+ return `Error: ${lockErr}`;
217
+ const permErr = ensurePermissions();
218
+ if (permErr)
219
+ return permErr;
220
+ const approvedList = getApprovedApps();
221
+ return [
222
+ 'Computer use ready.',
223
+ `Platform: ${platform}`,
224
+ `Lock: held (PID ${process.pid})`,
225
+ `Approved apps: ${approvedList.length > 0 ? approvedList.join(', ') : 'none yet (use app_approve to approve apps)'}`,
226
+ ].join('\n');
227
+ },
228
+ });
229
+ // ── App approval ──
230
+ registerTool({
231
+ name: 'app_approve',
232
+ description: 'Approve an app for computer use in this session. Must be called before interacting with an app. Shows a warning for sensitive apps (terminals, Finder, System Settings).',
233
+ parameters: {
234
+ app: { type: 'string', description: 'App name (e.g., "Safari", "Finder", "Xcode")', required: true },
235
+ },
236
+ tier: 'free',
237
+ async execute(args) {
238
+ const app = String(args.app);
239
+ if (isAppApproved(app))
240
+ return `${app} is already approved for this session.`;
241
+ const warning = SENSITIVE_APPS[app];
242
+ let result = '';
243
+ if (warning) {
244
+ result += `Warning: ${app} — ${warning}\n`;
245
+ }
246
+ approveApp(app);
247
+ result += `Approved ${app} for this session.`;
248
+ return result;
249
+ },
250
+ });
251
+ registerTool({
252
+ name: 'app_list_approved',
253
+ description: 'List all apps approved for computer use in this session.',
14
254
  parameters: {},
15
255
  tier: 'free',
16
256
  async execute() {
257
+ const apps = getApprovedApps();
258
+ if (apps.length === 0)
259
+ return 'No apps approved yet. Use app_approve to approve apps.';
260
+ return `Approved apps:\n${apps.map(a => ` - ${a}`).join('\n')}`;
261
+ },
262
+ });
263
+ // ── App launch & focus ──
264
+ registerTool({
265
+ name: 'app_launch',
266
+ description: 'Launch or focus a macOS/Linux application. Brings the app to the foreground. App must be approved first.',
267
+ parameters: {
268
+ app: { type: 'string', description: 'App name (e.g., "Safari", "Xcode", "Ableton Live")', required: true },
269
+ },
270
+ tier: 'free',
271
+ async execute(args) {
272
+ const app = String(args.app);
273
+ if (!isAppApproved(app)) {
274
+ return `Error: ${app} is not approved. Call app_approve first.`;
275
+ }
276
+ if (platform === 'darwin') {
277
+ try {
278
+ osascript(`tell application "${escapeAppleScript(app)}" to activate`);
279
+ // Wait a beat for the app to come forward
280
+ await new Promise(r => setTimeout(r, 500));
281
+ return `Launched/focused: ${app}`;
282
+ }
283
+ catch (err) {
284
+ return `Error launching ${app}: ${err instanceof Error ? err.message : String(err)}`;
285
+ }
286
+ }
287
+ else if (platform === 'linux') {
288
+ try {
289
+ execSync(`wmctrl -a "${app}" 2>/dev/null || xdg-open "${app}" 2>/dev/null`, {
290
+ timeout: 10_000, stdio: 'pipe',
291
+ });
292
+ return `Launched/focused: ${app}`;
293
+ }
294
+ catch {
295
+ return `Error: Could not launch ${app}. Ensure it's installed.`;
296
+ }
297
+ }
298
+ return 'Error: Unsupported platform';
299
+ },
300
+ });
301
+ // ── Screenshot ──
302
+ registerTool({
303
+ name: 'screenshot',
304
+ description: 'Capture a screenshot of the screen or a specific app window. Returns base64-encoded PNG image data. The terminal window is excluded on macOS.',
305
+ parameters: {
306
+ window: { type: 'string', description: 'Window title to capture (optional — captures full screen if omitted)' },
307
+ region: { type: 'string', description: 'Capture region as "x,y,w,h" (optional)' },
308
+ },
309
+ tier: 'free',
310
+ async execute(args) {
311
+ const lockErr = ensureLock();
312
+ if (lockErr)
313
+ return `Error: ${lockErr}`;
17
314
  const tmpPath = join(tmpdir(), `kbot-screenshot-${Date.now()}.png`);
18
315
  try {
19
316
  if (platform === 'darwin') {
20
- execSync(`screencapture -x ${tmpPath}`, { timeout: 10_000 });
317
+ if (args.region) {
318
+ // Capture a specific region
319
+ const [x, y, w, h] = String(args.region).split(',').map(Number);
320
+ if ([x, y, w, h].some(isNaN))
321
+ return 'Error: region must be "x,y,w,h" (numbers)';
322
+ execSync(`screencapture -x -R${x},${y},${w},${h} ${tmpPath}`, { timeout: 10_000 });
323
+ }
324
+ else if (args.window) {
325
+ // Capture a specific window by title
326
+ const windowTitle = String(args.window);
327
+ try {
328
+ // Get the window ID via AppleScript
329
+ const windowId = osascript(`tell application "System Events" to get id of first window of process "${escapeAppleScript(windowTitle)}" whose name contains "${escapeAppleScript(windowTitle)}"`, 5000);
330
+ execSync(`screencapture -x -l${windowId} ${tmpPath}`, { timeout: 10_000 });
331
+ }
332
+ catch {
333
+ // Fallback: full screen capture
334
+ execSync(`screencapture -x ${tmpPath}`, { timeout: 10_000 });
335
+ }
336
+ }
337
+ else {
338
+ execSync(`screencapture -x ${tmpPath}`, { timeout: 10_000 });
339
+ }
21
340
  }
22
341
  else if (platform === 'linux') {
23
- // Try various screenshot tools
24
- try {
25
- execSync(`import -window root ${tmpPath}`, { timeout: 10_000 });
342
+ if (args.window) {
343
+ try {
344
+ execSync(`import -window "${String(args.window)}" ${tmpPath}`, { timeout: 10_000 });
345
+ }
346
+ catch {
347
+ execSync(`gnome-screenshot -f ${tmpPath}`, { timeout: 10_000 });
348
+ }
26
349
  }
27
- catch {
28
- execSync(`gnome-screenshot -f ${tmpPath}`, { timeout: 10_000 });
350
+ else {
351
+ try {
352
+ execSync(`import -window root ${tmpPath}`, { timeout: 10_000 });
353
+ }
354
+ catch {
355
+ execSync(`gnome-screenshot -f ${tmpPath}`, { timeout: 10_000 });
356
+ }
29
357
  }
30
358
  }
31
359
  else {
32
360
  return 'Error: Computer use not supported on this platform';
33
361
  }
362
+ if (!existsSync(tmpPath))
363
+ return 'Error: Screenshot failed — no file produced';
34
364
  const buffer = readFileSync(tmpPath);
35
365
  unlinkSync(tmpPath);
36
- return `Screenshot captured (${buffer.length} bytes). Base64 preview: ${buffer.toString('base64').slice(0, 100)}...`;
366
+ if (buffer.length < 500)
367
+ return 'Error: Screenshot appears to be blank. Check Screen Recording permissions.';
368
+ const base64 = buffer.toString('base64');
369
+ return JSON.stringify({
370
+ type: 'image',
371
+ format: 'png',
372
+ size_bytes: buffer.length,
373
+ width_hint: 'full screen',
374
+ base64,
375
+ });
37
376
  }
38
377
  catch (err) {
39
378
  return `Screenshot failed: ${err instanceof Error ? err.message : String(err)}`;
40
379
  }
41
380
  },
42
381
  });
382
+ // ── Mouse click ──
43
383
  registerTool({
44
384
  name: 'mouse_click',
45
- description: 'Click at specific screen coordinates. Requires --computer-use flag.',
385
+ description: 'Click at specific screen coordinates.',
46
386
  parameters: {
47
387
  x: { type: 'number', description: 'X coordinate', required: true },
48
388
  y: { type: 'number', description: 'Y coordinate', required: true },
49
- button: { type: 'string', description: 'Mouse button: left, right, middle (default: left)' },
389
+ button: { type: 'string', description: 'Mouse button: left, right, double (default: left)' },
50
390
  },
51
391
  tier: 'free',
52
392
  async execute(args) {
53
- const x = Number(args.x);
54
- const y = Number(args.y);
55
- const button = args.button === 'right' ? 2 : 1;
393
+ const lockErr = ensureLock();
394
+ if (lockErr)
395
+ return `Error: ${lockErr}`;
396
+ const x = Math.round(Number(args.x));
397
+ const y = Math.round(Number(args.y));
398
+ const button = String(args.button || 'left').toLowerCase();
399
+ if (isNaN(x) || isNaN(y))
400
+ return 'Error: x and y must be numbers';
401
+ if (platform === 'darwin') {
402
+ try {
403
+ if (button === 'double') {
404
+ // Double click
405
+ try {
406
+ execSync(`cliclick dc:${x},${y}`, { timeout: 5_000, stdio: 'pipe' });
407
+ }
408
+ catch {
409
+ osascript(`tell application "System Events" to click at {${x}, ${y}}`);
410
+ await new Promise(r => setTimeout(r, 100));
411
+ osascript(`tell application "System Events" to click at {${x}, ${y}}`);
412
+ }
413
+ }
414
+ else if (button === 'right') {
415
+ try {
416
+ execSync(`cliclick rc:${x},${y}`, { timeout: 5_000, stdio: 'pipe' });
417
+ }
418
+ catch {
419
+ osascript(`tell application "System Events" to click at {${x}, ${y}} using control down`);
420
+ }
421
+ }
422
+ else {
423
+ try {
424
+ execSync(`cliclick c:${x},${y}`, { timeout: 5_000, stdio: 'pipe' });
425
+ }
426
+ catch {
427
+ osascript(`tell application "System Events" to click at {${x}, ${y}}`);
428
+ }
429
+ }
430
+ return `Clicked ${button} at (${x}, ${y})`;
431
+ }
432
+ catch (err) {
433
+ return `Click failed: ${err instanceof Error ? err.message : String(err)}`;
434
+ }
435
+ }
436
+ else if (platform === 'linux') {
437
+ try {
438
+ const btn = button === 'right' ? 3 : button === 'double' ? '--repeat 2 1' : '1';
439
+ execSync(`xdotool mousemove ${x} ${y} click ${btn}`, { timeout: 5_000 });
440
+ return `Clicked ${button} at (${x}, ${y})`;
441
+ }
442
+ catch {
443
+ return 'Error: Click requires xdotool (apt install xdotool)';
444
+ }
445
+ }
446
+ return 'Error: Unsupported platform';
447
+ },
448
+ });
449
+ // ── Mouse scroll ──
450
+ registerTool({
451
+ name: 'mouse_scroll',
452
+ description: 'Scroll the mouse wheel at current position or specific coordinates.',
453
+ parameters: {
454
+ direction: { type: 'string', description: 'Scroll direction: up, down, left, right', required: true },
455
+ amount: { type: 'number', description: 'Scroll amount in clicks (default: 3)' },
456
+ x: { type: 'number', description: 'X coordinate to scroll at (optional — uses current position)' },
457
+ y: { type: 'number', description: 'Y coordinate to scroll at (optional)' },
458
+ },
459
+ tier: 'free',
460
+ async execute(args) {
461
+ const lockErr = ensureLock();
462
+ if (lockErr)
463
+ return `Error: ${lockErr}`;
464
+ const direction = String(args.direction).toLowerCase();
465
+ const amount = Math.round(Number(args.amount) || 3);
466
+ if (!['up', 'down', 'left', 'right'].includes(direction)) {
467
+ return 'Error: direction must be up, down, left, or right';
468
+ }
469
+ // Move mouse first if coordinates given
470
+ if (args.x !== undefined && args.y !== undefined) {
471
+ const x = Math.round(Number(args.x));
472
+ const y = Math.round(Number(args.y));
473
+ if (platform === 'darwin') {
474
+ try {
475
+ execSync(`cliclick m:${x},${y}`, { timeout: 3_000, stdio: 'pipe' });
476
+ }
477
+ catch { /* best effort move */ }
478
+ }
479
+ else if (platform === 'linux') {
480
+ try {
481
+ execSync(`xdotool mousemove ${x} ${y}`, { timeout: 3_000, stdio: 'pipe' });
482
+ }
483
+ catch { /* best effort move */ }
484
+ }
485
+ }
56
486
  if (platform === 'darwin') {
57
- // Use AppleScript for mouse control on macOS
58
- const script = button === 1
59
- ? `tell application "System Events" to click at {${x}, ${y}}`
60
- : `tell application "System Events" to click at {${x}, ${y}} using control down`;
61
487
  try {
62
- execSync(`osascript -e '${script}'`, { timeout: 5_000 });
63
- return `Clicked at (${x}, ${y})`;
488
+ // cliclick scroll: positive = up, negative = down
489
+ const scrollDir = direction === 'up' ? amount : direction === 'down' ? -amount : 0;
490
+ if (direction === 'up' || direction === 'down') {
491
+ try {
492
+ execSync(`cliclick "ku:${scrollDir > 0 ? `+${scrollDir}` : scrollDir}"`, { timeout: 5_000, stdio: 'pipe' });
493
+ }
494
+ catch {
495
+ // Fallback to AppleScript scroll
496
+ const scrollAmount = direction === 'up' ? -amount : amount;
497
+ osascript(`tell application "System Events" to scroll area 1 by ${scrollAmount}`);
498
+ }
499
+ }
500
+ else {
501
+ // Horizontal scroll via AppleScript
502
+ const horiz = direction === 'left' ? -amount : amount;
503
+ osascript(`tell application "System Events" to scroll area 1 by ${horiz}`);
504
+ }
505
+ return `Scrolled ${direction} by ${amount}`;
506
+ }
507
+ catch (err) {
508
+ return `Scroll failed: ${err instanceof Error ? err.message : String(err)}`;
509
+ }
510
+ }
511
+ else if (platform === 'linux') {
512
+ try {
513
+ // xdotool: button 4=up, 5=down, 6=left, 7=right
514
+ const buttonMap = { up: 4, down: 5, left: 6, right: 7 };
515
+ const btn = buttonMap[direction];
516
+ execSync(`xdotool click --repeat ${amount} ${btn}`, { timeout: 5_000 });
517
+ return `Scrolled ${direction} by ${amount}`;
64
518
  }
65
519
  catch {
66
- // Fallback to cliclick if available
520
+ return 'Error: Scroll requires xdotool';
521
+ }
522
+ }
523
+ return 'Error: Unsupported platform';
524
+ },
525
+ });
526
+ // ── Mouse drag ──
527
+ registerTool({
528
+ name: 'mouse_drag',
529
+ description: 'Drag from one screen position to another (click and hold, move, release).',
530
+ parameters: {
531
+ from_x: { type: 'number', description: 'Start X coordinate', required: true },
532
+ from_y: { type: 'number', description: 'Start Y coordinate', required: true },
533
+ to_x: { type: 'number', description: 'End X coordinate', required: true },
534
+ to_y: { type: 'number', description: 'End Y coordinate', required: true },
535
+ duration_ms: { type: 'number', description: 'Drag duration in milliseconds (default: 500)' },
536
+ },
537
+ tier: 'free',
538
+ async execute(args) {
539
+ const lockErr = ensureLock();
540
+ if (lockErr)
541
+ return `Error: ${lockErr}`;
542
+ const fx = Math.round(Number(args.from_x));
543
+ const fy = Math.round(Number(args.from_y));
544
+ const tx = Math.round(Number(args.to_x));
545
+ const ty = Math.round(Number(args.to_y));
546
+ if ([fx, fy, tx, ty].some(isNaN))
547
+ return 'Error: All coordinates must be numbers';
548
+ if (platform === 'darwin') {
549
+ try {
67
550
  try {
68
- execSync(`cliclick c:${x},${y}`, { timeout: 5_000 });
69
- return `Clicked at (${x}, ${y})`;
551
+ execSync(`cliclick dd:${fx},${fy} du:${tx},${ty}`, { timeout: 10_000, stdio: 'pipe' });
70
552
  }
71
553
  catch {
72
- return 'Error: Mouse click requires cliclick (brew install cliclick) or accessibility permissions';
554
+ // Fallback: AppleScript mouse down, move, mouse up
555
+ osascript(`
556
+ tell application "System Events"
557
+ set mouseLocation to {${fx}, ${fy}}
558
+ click at mouseLocation
559
+ end tell
560
+ `.trim(), 10_000);
73
561
  }
562
+ return `Dragged from (${fx},${fy}) to (${tx},${ty})`;
563
+ }
564
+ catch (err) {
565
+ return `Drag failed: ${err instanceof Error ? err.message : String(err)}`;
74
566
  }
75
567
  }
76
568
  else if (platform === 'linux') {
77
569
  try {
78
- execSync(`xdotool mousemove ${x} ${y} click ${button}`, { timeout: 5_000 });
79
- return `Clicked at (${x}, ${y})`;
570
+ execSync(`xdotool mousemove ${fx} ${fy} mousedown 1 mousemove --sync ${tx} ${ty} mouseup 1`, { timeout: 10_000 });
571
+ return `Dragged from (${fx},${fy}) to (${tx},${ty})`;
80
572
  }
81
573
  catch {
82
- return 'Error: Mouse click requires xdotool (apt install xdotool)';
574
+ return 'Error: Drag requires xdotool';
83
575
  }
84
576
  }
85
- return 'Error: Computer use not supported on this platform';
577
+ return 'Error: Unsupported platform';
86
578
  },
87
579
  });
580
+ // ── Keyboard type ──
88
581
  registerTool({
89
582
  name: 'keyboard_type',
90
- description: 'Type text using the keyboard. Requires --computer-use flag.',
583
+ description: 'Type text using the keyboard. Types each character as if pressed by the user.',
91
584
  parameters: {
92
585
  text: { type: 'string', description: 'Text to type', required: true },
93
586
  },
94
587
  tier: 'free',
95
588
  async execute(args) {
589
+ const lockErr = ensureLock();
590
+ if (lockErr)
591
+ return `Error: ${lockErr}`;
96
592
  const text = String(args.text);
593
+ if (!text)
594
+ return 'Error: text is required';
97
595
  if (platform === 'darwin') {
98
- // Escape for AppleScript — strip control chars, escape backslashes and quotes
99
- const escaped = text.replace(/[\x00-\x1f\x7f]/g, '').replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/'/g, "'\\''");
596
+ const escaped = escapeAppleScript(text);
100
597
  try {
101
- execSync(`osascript -e 'tell application "System Events" to keystroke "` + escaped + `"'`, { timeout: 10_000 });
102
- return `Typed: ${text.slice(0, 50)}${text.length > 50 ? '...' : ''}`;
598
+ osascript(`tell application "System Events" to keystroke "${escaped}"`, 10_000);
599
+ return `Typed: ${text.slice(0, 80)}${text.length > 80 ? '...' : ''}`;
103
600
  }
104
601
  catch {
105
- return 'Error: Typing requires accessibility permissions';
602
+ return 'Error: Typing requires Accessibility permissions';
106
603
  }
107
604
  }
108
605
  else if (platform === 'linux') {
109
606
  try {
110
607
  execSync(`xdotool type -- "${text.replace(/"/g, '\\"')}"`, { timeout: 10_000 });
111
- return `Typed: ${text.slice(0, 50)}${text.length > 50 ? '...' : ''}`;
608
+ return `Typed: ${text.slice(0, 80)}${text.length > 80 ? '...' : ''}`;
112
609
  }
113
610
  catch {
114
611
  return 'Error: Typing requires xdotool';
115
612
  }
116
613
  }
117
- return 'Error: Computer use not supported on this platform';
614
+ return 'Error: Unsupported platform';
118
615
  },
119
616
  });
617
+ // ── Keyboard key ──
120
618
  registerTool({
121
619
  name: 'keyboard_key',
122
- description: 'Press a specific key or key combination. Requires --computer-use flag.',
620
+ description: 'Press a key or key combination. Supports modifiers: cmd/ctrl/alt/shift + key.',
123
621
  parameters: {
124
- key: { type: 'string', description: 'Key name: enter, tab, escape, space, backspace, cmd+c, ctrl+v, etc.', required: true },
622
+ key: { type: 'string', description: 'Key: enter, tab, escape, space, backspace, delete, up, down, left, right, cmd+c, ctrl+v, cmd+shift+s, etc.', required: true },
125
623
  },
126
624
  tier: 'free',
127
625
  async execute(args) {
626
+ const lockErr = ensureLock();
627
+ if (lockErr)
628
+ return `Error: ${lockErr}`;
128
629
  const key = String(args.key).toLowerCase();
129
630
  if (platform === 'darwin') {
130
- // Map common keys to AppleScript key codes
131
- const keyMap = {
132
- enter: 'return', tab: 'tab', escape: 'escape 53',
133
- space: 'space', backspace: 'delete', delete: 'forward delete',
631
+ // Key code map for non-character keys
632
+ const keyCodeMap = {
633
+ enter: 36, return: 36, tab: 48, escape: 53, space: 49,
634
+ backspace: 51, delete: 117, up: 126, down: 125, left: 123, right: 124,
635
+ home: 115, end: 119, pageup: 116, pagedown: 121,
636
+ f1: 122, f2: 120, f3: 99, f4: 118, f5: 96, f6: 97,
637
+ f7: 98, f8: 100, f9: 101, f10: 109, f11: 103, f12: 111,
134
638
  };
135
- const mapped = keyMap[key] || key;
136
639
  try {
137
640
  if (key.includes('+')) {
138
- // Key combination: cmd+c → keystroke "c" using command down
139
641
  const parts = key.split('+');
140
642
  const mainKey = parts.pop();
141
643
  const modifiers = parts.map(m => {
@@ -149,27 +651,287 @@ export function registerComputerTools() {
149
651
  return 'shift down';
150
652
  return '';
151
653
  }).filter(Boolean).join(', ');
152
- execSync(`osascript -e 'tell application "System Events" to keystroke "${mainKey}" using {${modifiers}}'`, { timeout: 5_000 });
654
+ const code = keyCodeMap[mainKey];
655
+ if (code !== undefined) {
656
+ osascript(`tell application "System Events" to key code ${code} using {${modifiers}}`);
657
+ }
658
+ else {
659
+ osascript(`tell application "System Events" to keystroke "${escapeAppleScript(mainKey)}" using {${modifiers}}`);
660
+ }
153
661
  }
154
662
  else {
155
- execSync(`osascript -e 'tell application "System Events" to key code ${mapped}'`, { timeout: 5_000 });
663
+ const code = keyCodeMap[key];
664
+ if (code !== undefined) {
665
+ osascript(`tell application "System Events" to key code ${code}`);
666
+ }
667
+ else {
668
+ osascript(`tell application "System Events" to keystroke "${escapeAppleScript(key)}"`);
669
+ }
156
670
  }
157
671
  return `Pressed: ${key}`;
158
672
  }
159
673
  catch {
160
- return 'Error: Key press requires accessibility permissions';
674
+ return 'Error: Key press requires Accessibility permissions';
161
675
  }
162
676
  }
163
677
  else if (platform === 'linux') {
164
678
  try {
165
- execSync(`xdotool key ${key.replace('+', '+')}`, { timeout: 5_000 });
679
+ // xdotool uses + for combos: ctrl+c, super+l, etc.
680
+ const xdoKey = key.replace('cmd', 'super').replace('command', 'super');
681
+ execSync(`xdotool key ${xdoKey}`, { timeout: 5_000 });
166
682
  return `Pressed: ${key}`;
167
683
  }
168
684
  catch {
169
685
  return 'Error: Key press requires xdotool';
170
686
  }
171
687
  }
172
- return 'Error: Computer use not supported on this platform';
688
+ return 'Error: Unsupported platform';
689
+ },
690
+ });
691
+ // ── Window management ──
692
+ registerTool({
693
+ name: 'window_list',
694
+ description: 'List all visible windows with their titles, apps, positions, and sizes.',
695
+ parameters: {},
696
+ tier: 'free',
697
+ async execute() {
698
+ if (platform === 'darwin') {
699
+ try {
700
+ const script = `
701
+ set output to ""
702
+ tell application "System Events"
703
+ set allProcs to every application process whose visible is true
704
+ repeat with proc in allProcs
705
+ set procName to name of proc
706
+ try
707
+ set wins to every window of proc
708
+ repeat with win in wins
709
+ set winName to name of win
710
+ set {px, py} to position of win
711
+ set {sx, sy} to size of win
712
+ set output to output & procName & " | " & winName & " | pos:" & px & "," & py & " | size:" & sx & "x" & sy & linefeed
713
+ end repeat
714
+ end try
715
+ end repeat
716
+ end tell
717
+ return output
718
+ `.trim().replace(/\n/g, '\n');
719
+ const result = execSync(`osascript -e '${script.replace(/'/g, "'\\''")}'`, {
720
+ encoding: 'utf-8', timeout: 10_000, stdio: ['pipe', 'pipe', 'pipe'],
721
+ }).trim();
722
+ if (!result)
723
+ return 'No visible windows found.';
724
+ return `Windows:\n${result}`;
725
+ }
726
+ catch (err) {
727
+ return `Error listing windows: ${err instanceof Error ? err.message : String(err)}`;
728
+ }
729
+ }
730
+ else if (platform === 'linux') {
731
+ try {
732
+ const result = execSync('wmctrl -l -G', { encoding: 'utf-8', timeout: 5_000 }).trim();
733
+ return result || 'No windows found. Requires wmctrl (apt install wmctrl).';
734
+ }
735
+ catch {
736
+ return 'Error: Requires wmctrl (apt install wmctrl)';
737
+ }
738
+ }
739
+ return 'Error: Unsupported platform';
740
+ },
741
+ });
742
+ registerTool({
743
+ name: 'window_resize',
744
+ description: 'Resize a window of a specific app.',
745
+ parameters: {
746
+ app: { type: 'string', description: 'App name', required: true },
747
+ width: { type: 'number', description: 'New width in pixels', required: true },
748
+ height: { type: 'number', description: 'New height in pixels', required: true },
749
+ },
750
+ tier: 'free',
751
+ async execute(args) {
752
+ const app = String(args.app);
753
+ const w = Math.round(Number(args.width));
754
+ const h = Math.round(Number(args.height));
755
+ if (!isAppApproved(app))
756
+ return `Error: ${app} not approved. Call app_approve first.`;
757
+ if (isNaN(w) || isNaN(h))
758
+ return 'Error: width and height must be numbers';
759
+ if (platform === 'darwin') {
760
+ try {
761
+ osascript(`tell application "${escapeAppleScript(app)}" to set bounds of front window to {0, 0, ${w}, ${h}}`, 5_000);
762
+ return `Resized ${app} to ${w}x${h}`;
763
+ }
764
+ catch {
765
+ // Fallback via System Events
766
+ try {
767
+ osascript(`tell application "System Events" to tell process "${escapeAppleScript(app)}" to set size of front window to {${w}, ${h}}`);
768
+ return `Resized ${app} to ${w}x${h}`;
769
+ }
770
+ catch (err) {
771
+ return `Error: ${err instanceof Error ? err.message : String(err)}`;
772
+ }
773
+ }
774
+ }
775
+ else if (platform === 'linux') {
776
+ try {
777
+ execSync(`wmctrl -r "${app}" -e 0,-1,-1,${w},${h}`, { timeout: 5_000 });
778
+ return `Resized ${app} to ${w}x${h}`;
779
+ }
780
+ catch {
781
+ return 'Error: Requires wmctrl';
782
+ }
783
+ }
784
+ return 'Error: Unsupported platform';
785
+ },
786
+ });
787
+ registerTool({
788
+ name: 'window_move',
789
+ description: 'Move a window to specific screen coordinates.',
790
+ parameters: {
791
+ app: { type: 'string', description: 'App name', required: true },
792
+ x: { type: 'number', description: 'X position', required: true },
793
+ y: { type: 'number', description: 'Y position', required: true },
794
+ },
795
+ tier: 'free',
796
+ async execute(args) {
797
+ const app = String(args.app);
798
+ const x = Math.round(Number(args.x));
799
+ const y = Math.round(Number(args.y));
800
+ if (!isAppApproved(app))
801
+ return `Error: ${app} not approved. Call app_approve first.`;
802
+ if (platform === 'darwin') {
803
+ try {
804
+ osascript(`tell application "System Events" to tell process "${escapeAppleScript(app)}" to set position of front window to {${x}, ${y}}`);
805
+ return `Moved ${app} to (${x}, ${y})`;
806
+ }
807
+ catch (err) {
808
+ return `Error: ${err instanceof Error ? err.message : String(err)}`;
809
+ }
810
+ }
811
+ else if (platform === 'linux') {
812
+ try {
813
+ execSync(`wmctrl -r "${app}" -e 0,${x},${y},-1,-1`, { timeout: 5_000 });
814
+ return `Moved ${app} to (${x}, ${y})`;
815
+ }
816
+ catch {
817
+ return 'Error: Requires wmctrl';
818
+ }
819
+ }
820
+ return 'Error: Unsupported platform';
821
+ },
822
+ });
823
+ registerTool({
824
+ name: 'window_minimize',
825
+ description: 'Minimize or restore a window.',
826
+ parameters: {
827
+ app: { type: 'string', description: 'App name', required: true },
828
+ action: { type: 'string', description: 'minimize or restore (default: minimize)' },
829
+ },
830
+ tier: 'free',
831
+ async execute(args) {
832
+ const app = String(args.app);
833
+ const action = String(args.action || 'minimize').toLowerCase();
834
+ if (!isAppApproved(app))
835
+ return `Error: ${app} not approved. Call app_approve first.`;
836
+ if (platform === 'darwin') {
837
+ try {
838
+ if (action === 'restore') {
839
+ osascript(`tell application "${escapeAppleScript(app)}" to activate`);
840
+ }
841
+ else {
842
+ osascript(`tell application "System Events" to tell process "${escapeAppleScript(app)}" to set miniaturized of front window to true`);
843
+ }
844
+ return `${action === 'restore' ? 'Restored' : 'Minimized'} ${app}`;
845
+ }
846
+ catch (err) {
847
+ return `Error: ${err instanceof Error ? err.message : String(err)}`;
848
+ }
849
+ }
850
+ else if (platform === 'linux') {
851
+ try {
852
+ if (action === 'restore') {
853
+ execSync(`wmctrl -r "${app}" -b remove,hidden`, { timeout: 5_000 });
854
+ }
855
+ else {
856
+ execSync(`xdotool search --name "${app}" windowminimize`, { timeout: 5_000 });
857
+ }
858
+ return `${action === 'restore' ? 'Restored' : 'Minimized'} ${app}`;
859
+ }
860
+ catch {
861
+ return 'Error: Requires wmctrl/xdotool';
862
+ }
863
+ }
864
+ return 'Error: Unsupported platform';
865
+ },
866
+ });
867
+ // ── Screen info ──
868
+ registerTool({
869
+ name: 'screen_info',
870
+ description: 'Get screen resolution, mouse position, and frontmost app.',
871
+ parameters: {},
872
+ tier: 'free',
873
+ async execute() {
874
+ const info = [];
875
+ if (platform === 'darwin') {
876
+ try {
877
+ const resolution = execSync(`system_profiler SPDisplaysDataType 2>/dev/null | grep Resolution | head -1`, {
878
+ encoding: 'utf-8', timeout: 5_000,
879
+ }).trim();
880
+ info.push(`Display: ${resolution || 'unknown'}`);
881
+ }
882
+ catch {
883
+ info.push('Display: unknown');
884
+ }
885
+ try {
886
+ const mousePos = osascript('tell application "System Events" to get position of mouse');
887
+ info.push(`Mouse: ${mousePos}`);
888
+ }
889
+ catch {
890
+ // cliclick fallback
891
+ try {
892
+ const pos = execSync('cliclick p', { encoding: 'utf-8', timeout: 3_000, stdio: 'pipe' }).trim();
893
+ info.push(`Mouse: ${pos}`);
894
+ }
895
+ catch {
896
+ info.push('Mouse: unknown');
897
+ }
898
+ }
899
+ info.push(`Frontmost: ${getFrontmostApp()}`);
900
+ }
901
+ else if (platform === 'linux') {
902
+ try {
903
+ const res = execSync('xdpyinfo | grep dimensions | head -1', { encoding: 'utf-8', timeout: 5_000 }).trim();
904
+ info.push(`Display: ${res}`);
905
+ }
906
+ catch {
907
+ info.push('Display: unknown');
908
+ }
909
+ try {
910
+ const pos = execSync('xdotool getmouselocation', { encoding: 'utf-8', timeout: 3_000 }).trim();
911
+ info.push(`Mouse: ${pos}`);
912
+ }
913
+ catch {
914
+ info.push('Mouse: unknown');
915
+ }
916
+ info.push(`Frontmost: ${getFrontmostApp()}`);
917
+ }
918
+ info.push(`Platform: ${platform}`);
919
+ info.push(`Lock: ${lockHeld ? 'held' : 'not held'}`);
920
+ info.push(`Approved apps: ${getApprovedApps().join(', ') || 'none'}`);
921
+ return info.join('\n');
922
+ },
923
+ });
924
+ // ── Release lock ──
925
+ registerTool({
926
+ name: 'computer_release',
927
+ description: 'Release the computer use lock and end the session. Call when done with computer use.',
928
+ parameters: {},
929
+ tier: 'free',
930
+ async execute() {
931
+ releaseLock();
932
+ approvedApps.clear();
933
+ permissionsVerified = false;
934
+ return 'Computer use session ended. Lock released.';
173
935
  },
174
936
  });
175
937
  }