@projectservan8n/cnapse 0.5.7 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1465,6 +1465,7 @@ var TelegramBotService = class extends EventEmitter {
1465
1465
  bot = null;
1466
1466
  isRunning = false;
1467
1467
  allowedChatIds = /* @__PURE__ */ new Set();
1468
+ chatHistory = /* @__PURE__ */ new Map();
1468
1469
  constructor() {
1469
1470
  super();
1470
1471
  }
@@ -1622,13 +1623,52 @@ ${result.error}
1622
1623
  if (ctx.message.text.startsWith("/")) {
1623
1624
  return;
1624
1625
  }
1626
+ const chatId = ctx.chat.id;
1627
+ const userText = ctx.message.text;
1628
+ const from = ctx.from.username || ctx.from.first_name || "User";
1625
1629
  const message = {
1626
- chatId: ctx.chat.id,
1627
- text: ctx.message.text,
1628
- from: ctx.from.username || ctx.from.first_name || "User"
1630
+ chatId,
1631
+ text: userText,
1632
+ from
1629
1633
  };
1630
1634
  this.emit("message", message);
1631
- this.emit("command", "chat", ctx.message.text, ctx.chat.id);
1635
+ if (!this.chatHistory.has(chatId)) {
1636
+ this.chatHistory.set(chatId, []);
1637
+ }
1638
+ const history = this.chatHistory.get(chatId);
1639
+ history.push({ role: "user", content: userText });
1640
+ if (history.length > 10) {
1641
+ history.splice(0, history.length - 10);
1642
+ }
1643
+ try {
1644
+ await ctx.sendChatAction("typing");
1645
+ const isVisionRequest = /screen|see|look|what('?s| is) (on|visible)|show me|screenshot/i.test(userText);
1646
+ let response;
1647
+ if (isVisionRequest) {
1648
+ const screenshot = await captureScreenshot();
1649
+ if (screenshot) {
1650
+ response = await chatWithVision(history, screenshot);
1651
+ } else {
1652
+ response = await chat(history);
1653
+ }
1654
+ } else {
1655
+ response = await chat(history);
1656
+ }
1657
+ history.push({ role: "assistant", content: response.content });
1658
+ const responseText = response.content || "(no response)";
1659
+ if (responseText.length > 4e3) {
1660
+ const chunks = responseText.match(/.{1,4000}/gs) || [responseText];
1661
+ for (const chunk of chunks) {
1662
+ await ctx.reply(chunk);
1663
+ }
1664
+ } else {
1665
+ await ctx.reply(responseText);
1666
+ }
1667
+ } catch (error) {
1668
+ const errorMsg = error instanceof Error ? error.message : "Unknown error";
1669
+ await ctx.reply(`\u274C Error: ${errorMsg}`);
1670
+ this.emit("error", new Error(errorMsg));
1671
+ }
1632
1672
  });
1633
1673
  this.bot.catch((err2) => {
1634
1674
  this.emit("error", err2);
@@ -2352,16 +2392,19 @@ async function main() {
2352
2392
  const key = args[2];
2353
2393
  if (!provider || !key) {
2354
2394
  console.log("Usage: cnapse auth <provider> <api-key>");
2355
- console.log("Providers: openrouter, anthropic, openai");
2395
+ console.log("Providers: openrouter, anthropic, openai, telegram");
2356
2396
  process.exit(1);
2357
2397
  }
2358
- if (!["openrouter", "anthropic", "openai"].includes(provider)) {
2398
+ if (!["openrouter", "anthropic", "openai", "telegram"].includes(provider)) {
2359
2399
  console.log(`Invalid provider: ${provider}`);
2360
- console.log("Valid providers: openrouter, anthropic, openai");
2400
+ console.log("Valid providers: openrouter, anthropic, openai, telegram");
2361
2401
  process.exit(1);
2362
2402
  }
2363
2403
  setApiKey(provider, key);
2364
2404
  console.log(`\u2713 ${provider} API key saved`);
2405
+ if (provider === "telegram") {
2406
+ console.log("Start the bot with: cnapse, then /telegram or Ctrl+T");
2407
+ }
2365
2408
  process.exit(0);
2366
2409
  }
2367
2410
  case "config": {
@@ -2408,32 +2451,58 @@ async function main() {
2408
2451
  case "help":
2409
2452
  case "--help":
2410
2453
  case "-h": {
2454
+ const cyan = "\x1B[36m";
2455
+ const green = "\x1B[32m";
2456
+ const yellow = "\x1B[33m";
2457
+ const magenta = "\x1B[35m";
2458
+ const bold = "\x1B[1m";
2459
+ const dim = "\x1B[2m";
2460
+ const reset = "\x1B[0m";
2411
2461
  console.log(`
2412
- C-napse - Autonomous PC Intelligence
2413
-
2414
- Usage:
2415
- cnapse Start interactive chat
2416
- cnapse init Interactive setup wizard
2417
- cnapse config Interactive configuration
2418
- cnapse config show Show current configuration
2419
- cnapse config set <k> <v> Set config value
2420
- cnapse auth <provider> <key> Set API key
2421
- cnapse help Show this help
2422
-
2423
- Providers:
2424
- ollama - Local AI (default, free)
2425
- openrouter - OpenRouter API (many models)
2426
- anthropic - Anthropic Claude
2427
- openai - OpenAI GPT
2428
-
2429
- Quick Start:
2430
- cnapse init # Interactive setup
2431
- cnapse config # Change provider/model
2432
-
2433
- Manual Setup:
2434
- cnapse auth openrouter sk-or-xxxxx
2435
- cnapse config set provider openrouter
2436
- cnapse config set model qwen/qwen-2.5-coder-32b-instruct
2462
+ ${cyan}${bold}\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557
2463
+ \u2551 \u2551
2464
+ \u2551 ${magenta}\u2588\u2588\u2588\u2588\u2588\u2588\u2557 ${cyan}\u2588\u2588\u2588\u2557 \u2588\u2588\u2557 \u2588\u2588\u2588\u2588\u2588\u2557 \u2588\u2588\u2588\u2588\u2588\u2588\u2557 \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557${reset}${cyan}${bold} \u2551
2465
+ \u2551 ${magenta}\u2588\u2588\u2554\u2550\u2550\u2550\u2550\u255D ${cyan}\u2588\u2588\u2588\u2588\u2557 \u2588\u2588\u2551\u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2557\u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2557\u2588\u2588\u2554\u2550\u2550\u2550\u2550\u255D\u2588\u2588\u2554\u2550\u2550\u2550\u2550\u255D${reset}${cyan}${bold} \u2551
2466
+ \u2551 ${magenta}\u2588\u2588\u2551 \u2588\u2588\u2588\u2588\u2588\u2557${cyan}\u2588\u2588\u2554\u2588\u2588\u2557 \u2588\u2588\u2551\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2551\u2588\u2588\u2588\u2588\u2588\u2588\u2554\u255D\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2588\u2588\u2588\u2557${reset}${cyan}${bold} \u2551
2467
+ \u2551 ${magenta}\u2588\u2588\u2551 \u255A\u2550\u2550\u2550\u2550\u255D${cyan}\u2588\u2588\u2551\u255A\u2588\u2588\u2557\u2588\u2588\u2551\u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2551\u2588\u2588\u2554\u2550\u2550\u2550\u255D \u255A\u2550\u2550\u2550\u2550\u2588\u2588\u2551\u2588\u2588\u2554\u2550\u2550\u255D${reset}${cyan}${bold} \u2551
2468
+ \u2551 ${magenta}\u255A\u2588\u2588\u2588\u2588\u2588\u2588\u2557 ${cyan}\u2588\u2588\u2551 \u255A\u2588\u2588\u2588\u2588\u2551\u2588\u2588\u2551 \u2588\u2588\u2551\u2588\u2588\u2551 \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2551\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557${reset}${cyan}${bold} \u2551
2469
+ \u2551 ${magenta} \u255A\u2550\u2550\u2550\u2550\u2550\u255D ${cyan}\u255A\u2550\u255D \u255A\u2550\u2550\u2550\u255D\u255A\u2550\u255D \u255A\u2550\u255D\u255A\u2550\u255D \u255A\u2550\u2550\u2550\u2550\u2550\u2550\u255D\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u255D${reset}${cyan}${bold} \u2551
2470
+ \u2551 \u2551
2471
+ \u2551 ${reset}${dim}Autonomous PC Intelligence${reset}${cyan}${bold} \u2551
2472
+ \u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D${reset}
2473
+
2474
+ ${yellow}${bold}USAGE${reset}
2475
+ ${green}cnapse${reset} Start interactive chat
2476
+ ${green}cnapse init${reset} Interactive setup wizard
2477
+ ${green}cnapse config${reset} Interactive configuration
2478
+ ${green}cnapse config show${reset} Show current configuration
2479
+ ${green}cnapse auth <provider> <key>${reset} Set API key
2480
+ ${green}cnapse help${reset} Show this help
2481
+
2482
+ ${yellow}${bold}PROVIDERS${reset}
2483
+ ${cyan}ollama${reset} Local AI ${dim}(default, free, private)${reset}
2484
+ ${cyan}openrouter${reset} OpenRouter API ${dim}(many models, pay-per-use)${reset}
2485
+ ${cyan}anthropic${reset} Anthropic Claude ${dim}(best reasoning)${reset}
2486
+ ${cyan}openai${reset} OpenAI GPT ${dim}(reliable)${reset}
2487
+ ${cyan}telegram${reset} Telegram bot token ${dim}(remote control)${reset}
2488
+
2489
+ ${yellow}${bold}QUICK START${reset}
2490
+ ${dim}# Interactive setup - easiest way${reset}
2491
+ ${green}cnapse init${reset}
2492
+
2493
+ ${dim}# Manual setup with OpenRouter${reset}
2494
+ ${green}cnapse auth openrouter sk-or-v1-xxxxx${reset}
2495
+ ${green}cnapse config set provider openrouter${reset}
2496
+
2497
+ ${dim}# Add Telegram for remote control${reset}
2498
+ ${green}cnapse auth telegram YOUR_BOT_TOKEN${reset}
2499
+
2500
+ ${yellow}${bold}IN-APP SHORTCUTS${reset}
2501
+ ${cyan}Ctrl+H${reset} Help menu ${cyan}Ctrl+P${reset} Change provider
2502
+ ${cyan}Ctrl+E${reset} Screen watch ${cyan}Ctrl+T${reset} Toggle Telegram
2503
+ ${cyan}Ctrl+L${reset} Clear chat ${cyan}Ctrl+C${reset} Exit
2504
+
2505
+ ${dim}GitHub: https://github.com/projectservan8n/C-napse${reset}
2437
2506
  `);
2438
2507
  process.exit(0);
2439
2508
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@projectservan8n/cnapse",
3
- "version": "0.5.7",
3
+ "version": "0.6.0",
4
4
  "description": "Autonomous PC intelligence - AI assistant for desktop automation",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -16,7 +16,13 @@ Available tools:
16
16
  - getActiveWindow() - Get info about the currently focused window
17
17
  - listWindows() - List all open windows
18
18
  - focusWindow(title) - Focus a window by title (partial match)
19
+ - minimizeWindow(title?) - Minimize a window by title, or active window if no title
20
+ - maximizeWindow(title?) - Maximize a window by title, or active window if no title
21
+ - closeWindow(title?) - Close a window by title, or active window if no title
22
+ - restoreWindow(title) - Restore a minimized window by title
19
23
  - scrollMouse(amount) - Scroll mouse wheel (positive=up, negative=down)
24
+ - dragMouse(startX, startY, endX, endY) - Drag mouse from one point to another
25
+ - getMousePosition() - Get current mouse position
20
26
 
21
27
  Guidelines:
22
28
  1. Always confirm dangerous actions (like closing windows with unsaved work)
@@ -25,6 +31,13 @@ Guidelines:
25
31
  4. Report what you see/do at each step
26
32
  5. If something fails, try alternative approaches
27
33
 
34
+ Window control examples:
35
+ - minimizeWindow("Visual Studio Code") - Minimize VS Code
36
+ - minimizeWindow() - Minimize the currently active window
37
+ - maximizeWindow("Chrome") - Maximize Chrome
38
+ - closeWindow("Notepad") - Close Notepad
39
+ - restoreWindow("Discord") - Restore minimized Discord
40
+
28
41
  Common keyboard shortcuts:
29
42
  - Copy: control+c
30
43
  - Paste: control+v
@@ -52,6 +65,10 @@ When asked to open an application:
52
65
  'getActiveWindow',
53
66
  'listWindows',
54
67
  'focusWindow',
68
+ 'minimizeWindow',
69
+ 'maximizeWindow',
70
+ 'closeWindow',
71
+ 'restoreWindow',
55
72
  'scrollMouse',
56
73
  'dragMouse',
57
74
  'getMousePosition',
@@ -114,6 +114,14 @@ export async function executeTool(call: ToolCall): Promise<ToolResult> {
114
114
  return await computer.listWindows();
115
115
  case 'focusWindow':
116
116
  return await computer.focusWindow(args.title as string);
117
+ case 'minimizeWindow':
118
+ return await computer.minimizeWindow(args.title as string | undefined);
119
+ case 'maximizeWindow':
120
+ return await computer.maximizeWindow(args.title as string | undefined);
121
+ case 'closeWindow':
122
+ return await computer.closeWindow(args.title as string | undefined);
123
+ case 'restoreWindow':
124
+ return await computer.restoreWindow(args.title as string);
117
125
  case 'scrollMouse':
118
126
  return await computer.scrollMouse(args.amount as number);
119
127
  case 'dragMouse':
package/src/index.tsx CHANGED
@@ -13,23 +13,26 @@ async function main() {
13
13
 
14
14
  switch (command) {
15
15
  case 'auth': {
16
- const provider = args[1] as 'openrouter' | 'anthropic' | 'openai';
16
+ const provider = args[1] as 'openrouter' | 'anthropic' | 'openai' | 'telegram';
17
17
  const key = args[2];
18
18
 
19
19
  if (!provider || !key) {
20
20
  console.log('Usage: cnapse auth <provider> <api-key>');
21
- console.log('Providers: openrouter, anthropic, openai');
21
+ console.log('Providers: openrouter, anthropic, openai, telegram');
22
22
  process.exit(1);
23
23
  }
24
24
 
25
- if (!['openrouter', 'anthropic', 'openai'].includes(provider)) {
25
+ if (!['openrouter', 'anthropic', 'openai', 'telegram'].includes(provider)) {
26
26
  console.log(`Invalid provider: ${provider}`);
27
- console.log('Valid providers: openrouter, anthropic, openai');
27
+ console.log('Valid providers: openrouter, anthropic, openai, telegram');
28
28
  process.exit(1);
29
29
  }
30
30
 
31
31
  setApiKey(provider, key);
32
32
  console.log(`✓ ${provider} API key saved`);
33
+ if (provider === 'telegram') {
34
+ console.log('Start the bot with: cnapse, then /telegram or Ctrl+T');
35
+ }
33
36
  process.exit(0);
34
37
  }
35
38
 
@@ -84,32 +87,60 @@ async function main() {
84
87
  case 'help':
85
88
  case '--help':
86
89
  case '-h': {
90
+ // Colorful help using ANSI escape codes
91
+ const cyan = '\x1b[36m';
92
+ const green = '\x1b[32m';
93
+ const yellow = '\x1b[33m';
94
+ const magenta = '\x1b[35m';
95
+ const bold = '\x1b[1m';
96
+ const dim = '\x1b[2m';
97
+ const reset = '\x1b[0m';
98
+
87
99
  console.log(`
88
- C-napse - Autonomous PC Intelligence
89
-
90
- Usage:
91
- cnapse Start interactive chat
92
- cnapse init Interactive setup wizard
93
- cnapse config Interactive configuration
94
- cnapse config show Show current configuration
95
- cnapse config set <k> <v> Set config value
96
- cnapse auth <provider> <key> Set API key
97
- cnapse help Show this help
98
-
99
- Providers:
100
- ollama - Local AI (default, free)
101
- openrouter - OpenRouter API (many models)
102
- anthropic - Anthropic Claude
103
- openai - OpenAI GPT
104
-
105
- Quick Start:
106
- cnapse init # Interactive setup
107
- cnapse config # Change provider/model
108
-
109
- Manual Setup:
110
- cnapse auth openrouter sk-or-xxxxx
111
- cnapse config set provider openrouter
112
- cnapse config set model qwen/qwen-2.5-coder-32b-instruct
100
+ ${cyan}${bold}╔═══════════════════════════════════════════════════════════╗
101
+ ║ ║
102
+ ║ ${magenta}██████╗ ${cyan}███╗ ██╗ █████╗ ██████╗ ███████╗███████╗${reset}${cyan}${bold} ║
103
+ ║ ${magenta}██╔════╝ ${cyan}████╗ ██║██╔══██╗██╔══██╗██╔════╝██╔════╝${reset}${cyan}${bold} ║
104
+ ║ ${magenta}██║ █████╗${cyan}██╔██╗ ██║███████║██████╔╝███████╗█████╗${reset}${cyan}${bold} ║
105
+ ║ ${magenta}██║ ╚════╝${cyan}██║╚██╗██║██╔══██║██╔═══╝ ╚════██║██╔══╝${reset}${cyan}${bold} ║
106
+ ║ ${magenta}╚██████╗ ${cyan}██║ ╚████║██║ ██║██║ ███████║███████╗${reset}${cyan}${bold} ║
107
+ ║ ${magenta} ╚═════╝ ${cyan}╚═╝ ╚═══╝╚═╝ ╚═╝╚═╝ ╚══════╝╚══════╝${reset}${cyan}${bold} ║
108
+ ║ ║
109
+ ║ ${reset}${dim}Autonomous PC Intelligence${reset}${cyan}${bold} ║
110
+ ╚═══════════════════════════════════════════════════════════╝${reset}
111
+
112
+ ${yellow}${bold}USAGE${reset}
113
+ ${green}cnapse${reset} Start interactive chat
114
+ ${green}cnapse init${reset} Interactive setup wizard
115
+ ${green}cnapse config${reset} Interactive configuration
116
+ ${green}cnapse config show${reset} Show current configuration
117
+ ${green}cnapse auth <provider> <key>${reset} Set API key
118
+ ${green}cnapse help${reset} Show this help
119
+
120
+ ${yellow}${bold}PROVIDERS${reset}
121
+ ${cyan}ollama${reset} Local AI ${dim}(default, free, private)${reset}
122
+ ${cyan}openrouter${reset} OpenRouter API ${dim}(many models, pay-per-use)${reset}
123
+ ${cyan}anthropic${reset} Anthropic Claude ${dim}(best reasoning)${reset}
124
+ ${cyan}openai${reset} OpenAI GPT ${dim}(reliable)${reset}
125
+ ${cyan}telegram${reset} Telegram bot token ${dim}(remote control)${reset}
126
+
127
+ ${yellow}${bold}QUICK START${reset}
128
+ ${dim}# Interactive setup - easiest way${reset}
129
+ ${green}cnapse init${reset}
130
+
131
+ ${dim}# Manual setup with OpenRouter${reset}
132
+ ${green}cnapse auth openrouter sk-or-v1-xxxxx${reset}
133
+ ${green}cnapse config set provider openrouter${reset}
134
+
135
+ ${dim}# Add Telegram for remote control${reset}
136
+ ${green}cnapse auth telegram YOUR_BOT_TOKEN${reset}
137
+
138
+ ${yellow}${bold}IN-APP SHORTCUTS${reset}
139
+ ${cyan}Ctrl+H${reset} Help menu ${cyan}Ctrl+P${reset} Change provider
140
+ ${cyan}Ctrl+E${reset} Screen watch ${cyan}Ctrl+T${reset} Toggle Telegram
141
+ ${cyan}Ctrl+L${reset} Clear chat ${cyan}Ctrl+C${reset} Exit
142
+
143
+ ${dim}GitHub: https://github.com/projectservan8n/C-napse${reset}
113
144
  `);
114
145
  process.exit(0);
115
146
  }
@@ -6,6 +6,7 @@ import { EventEmitter } from 'events';
6
6
  import { getConfig, getApiKey } from '../lib/config.js';
7
7
  import { describeScreen, captureScreenshot } from '../lib/vision.js';
8
8
  import { runCommand } from '../tools/shell.js';
9
+ import { chat as chatWithAI, chatWithVision, Message } from '../lib/api.js';
9
10
 
10
11
  export interface TelegramMessage {
11
12
  chatId: number;
@@ -25,6 +26,7 @@ export class TelegramBotService extends EventEmitter {
25
26
  private bot: any = null;
26
27
  private isRunning = false;
27
28
  private allowedChatIds: Set<number> = new Set();
29
+ private chatHistory: Map<number, Message[]> = new Map();
28
30
 
29
31
  constructor() {
30
32
  super();
@@ -205,7 +207,7 @@ export class TelegramBotService extends EventEmitter {
205
207
  await ctx.reply(status);
206
208
  });
207
209
 
208
- // Handle text messages - forward to AI
210
+ // Handle text messages - forward to AI and respond
209
211
  this.bot.on('text', async (ctx: any) => {
210
212
  if (!this.isAllowed(ctx.chat.id)) {
211
213
  return;
@@ -216,14 +218,71 @@ export class TelegramBotService extends EventEmitter {
216
218
  return;
217
219
  }
218
220
 
221
+ const chatId = ctx.chat.id;
222
+ const userText = ctx.message.text;
223
+ const from = ctx.from.username || ctx.from.first_name || 'User';
224
+
219
225
  const message: TelegramMessage = {
220
- chatId: ctx.chat.id,
221
- text: ctx.message.text,
222
- from: ctx.from.username || ctx.from.first_name || 'User',
226
+ chatId,
227
+ text: userText,
228
+ from,
223
229
  };
224
230
 
225
231
  this.emit('message', message);
226
- this.emit('command', 'chat', ctx.message.text, ctx.chat.id);
232
+
233
+ // Get or initialize chat history for this user
234
+ if (!this.chatHistory.has(chatId)) {
235
+ this.chatHistory.set(chatId, []);
236
+ }
237
+ const history = this.chatHistory.get(chatId)!;
238
+
239
+ // Add user message to history
240
+ history.push({ role: 'user', content: userText });
241
+
242
+ // Keep only last 10 messages for context
243
+ if (history.length > 10) {
244
+ history.splice(0, history.length - 10);
245
+ }
246
+
247
+ try {
248
+ // Send typing indicator
249
+ await ctx.sendChatAction('typing');
250
+
251
+ // Check if this looks like a screen/vision request
252
+ const isVisionRequest = /screen|see|look|what('?s| is) (on|visible)|show me|screenshot/i.test(userText);
253
+
254
+ let response;
255
+ if (isVisionRequest) {
256
+ // Capture screenshot and use vision
257
+ const screenshot = await captureScreenshot();
258
+ if (screenshot) {
259
+ response = await chatWithVision(history, screenshot);
260
+ } else {
261
+ response = await chatWithAI(history);
262
+ }
263
+ } else {
264
+ response = await chatWithAI(history);
265
+ }
266
+
267
+ // Add assistant response to history
268
+ history.push({ role: 'assistant', content: response.content });
269
+
270
+ // Send response (split if too long for Telegram)
271
+ const responseText = response.content || '(no response)';
272
+ if (responseText.length > 4000) {
273
+ // Split into chunks
274
+ const chunks = responseText.match(/.{1,4000}/gs) || [responseText];
275
+ for (const chunk of chunks) {
276
+ await ctx.reply(chunk);
277
+ }
278
+ } else {
279
+ await ctx.reply(responseText);
280
+ }
281
+ } catch (error) {
282
+ const errorMsg = error instanceof Error ? error.message : 'Unknown error';
283
+ await ctx.reply(`❌ Error: ${errorMsg}`);
284
+ this.emit('error', new Error(errorMsg));
285
+ }
227
286
  });
228
287
 
229
288
  // Error handling
@@ -288,6 +288,188 @@ export async function focusWindow(title: string): Promise<ToolResult> {
288
288
  }
289
289
  }
290
290
 
291
+ /**
292
+ * Minimize a window by title (or active window if no title)
293
+ */
294
+ export async function minimizeWindow(title?: string): Promise<ToolResult> {
295
+ try {
296
+ if (process.platform === 'win32') {
297
+ if (title) {
298
+ const escaped = title.replace(/'/g, "''");
299
+ const script = `
300
+ $proc = Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
301
+ if ($proc) {
302
+ Add-Type @"
303
+ using System;
304
+ using System.Runtime.InteropServices;
305
+ public class Win32 {
306
+ [DllImport("user32.dll")]
307
+ public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
308
+ }
309
+ "@
310
+ [Win32]::ShowWindow($proc.MainWindowHandle, 6)
311
+ Write-Output "Minimized: $($proc.MainWindowTitle)"
312
+ } else {
313
+ Write-Output "NOT_FOUND"
314
+ }`;
315
+ const { stdout } = await execAsync(`powershell -Command "${script.replace(/\n/g, ' ')}"`, { shell: 'cmd.exe' });
316
+ if (stdout.includes('NOT_FOUND')) {
317
+ return err(`Window containing "${title}" not found`);
318
+ }
319
+ return ok(stdout.trim());
320
+ } else {
321
+ // Minimize active window using Alt+Space, N
322
+ await execAsync(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('% n')"`, { shell: 'cmd.exe' });
323
+ return ok('Minimized active window');
324
+ }
325
+ } else if (process.platform === 'darwin') {
326
+ if (title) {
327
+ await execAsync(`osascript -e 'tell application "${title}" to set miniaturized of window 1 to true'`);
328
+ } else {
329
+ await execAsync(`osascript -e 'tell application "System Events" to keystroke "m" using command down'`);
330
+ }
331
+ return ok(`Minimized window${title ? `: ${title}` : ''}`);
332
+ } else {
333
+ if (title) {
334
+ await execAsync(`wmctrl -r "${title}" -b add,hidden`);
335
+ } else {
336
+ await execAsync(`xdotool getactivewindow windowminimize`);
337
+ }
338
+ return ok(`Minimized window${title ? `: ${title}` : ''}`);
339
+ }
340
+ } catch (error) {
341
+ return err(`Failed to minimize window: ${error instanceof Error ? error.message : 'Unknown error'}`);
342
+ }
343
+ }
344
+
345
+ /**
346
+ * Maximize a window by title (or active window if no title)
347
+ */
348
+ export async function maximizeWindow(title?: string): Promise<ToolResult> {
349
+ try {
350
+ if (process.platform === 'win32') {
351
+ if (title) {
352
+ const escaped = title.replace(/'/g, "''");
353
+ const script = `
354
+ $proc = Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
355
+ if ($proc) {
356
+ Add-Type @"
357
+ using System;
358
+ using System.Runtime.InteropServices;
359
+ public class Win32 {
360
+ [DllImport("user32.dll")]
361
+ public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
362
+ }
363
+ "@
364
+ [Win32]::ShowWindow($proc.MainWindowHandle, 3)
365
+ Write-Output "Maximized: $($proc.MainWindowTitle)"
366
+ } else {
367
+ Write-Output "NOT_FOUND"
368
+ }`;
369
+ const { stdout } = await execAsync(`powershell -Command "${script.replace(/\n/g, ' ')}"`, { shell: 'cmd.exe' });
370
+ if (stdout.includes('NOT_FOUND')) {
371
+ return err(`Window containing "${title}" not found`);
372
+ }
373
+ return ok(stdout.trim());
374
+ } else {
375
+ // Maximize active window using Alt+Space, X
376
+ await execAsync(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('% x')"`, { shell: 'cmd.exe' });
377
+ return ok('Maximized active window');
378
+ }
379
+ } else if (process.platform === 'darwin') {
380
+ if (title) {
381
+ await execAsync(`osascript -e 'tell application "${title}" to set zoomed of window 1 to true'`);
382
+ } else {
383
+ await execAsync(`osascript -e 'tell application "System Events" to keystroke "f" using {control down, command down}'`);
384
+ }
385
+ return ok(`Maximized window${title ? `: ${title}` : ''}`);
386
+ } else {
387
+ if (title) {
388
+ await execAsync(`wmctrl -r "${title}" -b add,maximized_vert,maximized_horz`);
389
+ } else {
390
+ await execAsync(`wmctrl -r :ACTIVE: -b add,maximized_vert,maximized_horz`);
391
+ }
392
+ return ok(`Maximized window${title ? `: ${title}` : ''}`);
393
+ }
394
+ } catch (error) {
395
+ return err(`Failed to maximize window: ${error instanceof Error ? error.message : 'Unknown error'}`);
396
+ }
397
+ }
398
+
399
+ /**
400
+ * Close a window by title (or active window if no title)
401
+ */
402
+ export async function closeWindow(title?: string): Promise<ToolResult> {
403
+ try {
404
+ if (process.platform === 'win32') {
405
+ if (title) {
406
+ const escaped = title.replace(/'/g, "''");
407
+ await execAsync(`powershell -Command "Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' } | ForEach-Object { $_.CloseMainWindow() }"`, { shell: 'cmd.exe' });
408
+ return ok(`Closed window: ${title}`);
409
+ } else {
410
+ await execAsync(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('%{F4}')"`, { shell: 'cmd.exe' });
411
+ return ok('Closed active window');
412
+ }
413
+ } else if (process.platform === 'darwin') {
414
+ if (title) {
415
+ await execAsync(`osascript -e 'tell application "${title}" to close window 1'`);
416
+ } else {
417
+ await execAsync(`osascript -e 'tell application "System Events" to keystroke "w" using command down'`);
418
+ }
419
+ return ok(`Closed window${title ? `: ${title}` : ''}`);
420
+ } else {
421
+ if (title) {
422
+ await execAsync(`wmctrl -c "${title}"`);
423
+ } else {
424
+ await execAsync(`xdotool getactivewindow windowclose`);
425
+ }
426
+ return ok(`Closed window${title ? `: ${title}` : ''}`);
427
+ }
428
+ } catch (error) {
429
+ return err(`Failed to close window: ${error instanceof Error ? error.message : 'Unknown error'}`);
430
+ }
431
+ }
432
+
433
+ /**
434
+ * Restore a minimized window by title
435
+ */
436
+ export async function restoreWindow(title: string): Promise<ToolResult> {
437
+ try {
438
+ if (process.platform === 'win32') {
439
+ const escaped = title.replace(/'/g, "''");
440
+ const script = `
441
+ $proc = Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
442
+ if ($proc) {
443
+ Add-Type @"
444
+ using System;
445
+ using System.Runtime.InteropServices;
446
+ public class Win32 {
447
+ [DllImport("user32.dll")]
448
+ public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
449
+ }
450
+ "@
451
+ [Win32]::ShowWindow($proc.MainWindowHandle, 9)
452
+ Write-Output "Restored: $($proc.MainWindowTitle)"
453
+ } else {
454
+ Write-Output "NOT_FOUND"
455
+ }`;
456
+ const { stdout } = await execAsync(`powershell -Command "${script.replace(/\n/g, ' ')}"`, { shell: 'cmd.exe' });
457
+ if (stdout.includes('NOT_FOUND')) {
458
+ return err(`Window containing "${title}" not found`);
459
+ }
460
+ return ok(stdout.trim());
461
+ } else if (process.platform === 'darwin') {
462
+ await execAsync(`osascript -e 'tell application "${title}" to set miniaturized of window 1 to false'`);
463
+ return ok(`Restored window: ${title}`);
464
+ } else {
465
+ await execAsync(`wmctrl -r "${title}" -b remove,hidden`);
466
+ return ok(`Restored window: ${title}`);
467
+ }
468
+ } catch (error) {
469
+ return err(`Failed to restore window: ${error instanceof Error ? error.message : 'Unknown error'}`);
470
+ }
471
+ }
472
+
291
473
  /**
292
474
  * Scroll mouse wheel
293
475
  */
@@ -383,6 +565,10 @@ export function getComputerTools() {
383
565
  getActiveWindow,
384
566
  listWindows,
385
567
  focusWindow,
568
+ minimizeWindow,
569
+ maximizeWindow,
570
+ closeWindow,
571
+ restoreWindow,
386
572
  scrollMouse,
387
573
  dragMouse,
388
574
  getMousePosition,
@@ -436,6 +622,26 @@ export const computerTools = [
436
622
  description: 'Focus a window by title',
437
623
  parameters: { type: 'object', properties: { title: { type: 'string' } }, required: ['title'] },
438
624
  },
625
+ {
626
+ name: 'minimizeWindow',
627
+ description: 'Minimize a window by title (or active window if no title given)',
628
+ parameters: { type: 'object', properties: { title: { type: 'string', description: 'Window title to minimize (partial match). Leave empty for active window.' } } },
629
+ },
630
+ {
631
+ name: 'maximizeWindow',
632
+ description: 'Maximize a window by title (or active window if no title given)',
633
+ parameters: { type: 'object', properties: { title: { type: 'string', description: 'Window title to maximize (partial match). Leave empty for active window.' } } },
634
+ },
635
+ {
636
+ name: 'closeWindow',
637
+ description: 'Close a window by title (or active window if no title given)',
638
+ parameters: { type: 'object', properties: { title: { type: 'string', description: 'Window title to close (partial match). Leave empty for active window.' } } },
639
+ },
640
+ {
641
+ name: 'restoreWindow',
642
+ description: 'Restore a minimized window by title',
643
+ parameters: { type: 'object', properties: { title: { type: 'string', description: 'Window title to restore (partial match)' } }, required: ['title'] },
644
+ },
439
645
  {
440
646
  name: 'scrollMouse',
441
647
  description: 'Scroll mouse wheel (positive=up, negative=down)',