@projectservan8n/cnapse 0.5.8 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1465,6 +1465,7 @@ var TelegramBotService = class extends EventEmitter {
1465
1465
  bot = null;
1466
1466
  isRunning = false;
1467
1467
  allowedChatIds = /* @__PURE__ */ new Set();
1468
+ chatHistory = /* @__PURE__ */ new Map();
1468
1469
  constructor() {
1469
1470
  super();
1470
1471
  }
@@ -1622,13 +1623,52 @@ ${result.error}
1622
1623
  if (ctx.message.text.startsWith("/")) {
1623
1624
  return;
1624
1625
  }
1626
+ const chatId = ctx.chat.id;
1627
+ const userText = ctx.message.text;
1628
+ const from = ctx.from.username || ctx.from.first_name || "User";
1625
1629
  const message = {
1626
- chatId: ctx.chat.id,
1627
- text: ctx.message.text,
1628
- from: ctx.from.username || ctx.from.first_name || "User"
1630
+ chatId,
1631
+ text: userText,
1632
+ from
1629
1633
  };
1630
1634
  this.emit("message", message);
1631
- this.emit("command", "chat", ctx.message.text, ctx.chat.id);
1635
+ if (!this.chatHistory.has(chatId)) {
1636
+ this.chatHistory.set(chatId, []);
1637
+ }
1638
+ const history = this.chatHistory.get(chatId);
1639
+ history.push({ role: "user", content: userText });
1640
+ if (history.length > 10) {
1641
+ history.splice(0, history.length - 10);
1642
+ }
1643
+ try {
1644
+ await ctx.sendChatAction("typing");
1645
+ const isVisionRequest = /screen|see|look|what('?s| is) (on|visible)|show me|screenshot/i.test(userText);
1646
+ let response;
1647
+ if (isVisionRequest) {
1648
+ const screenshot = await captureScreenshot();
1649
+ if (screenshot) {
1650
+ response = await chatWithVision(history, screenshot);
1651
+ } else {
1652
+ response = await chat(history);
1653
+ }
1654
+ } else {
1655
+ response = await chat(history);
1656
+ }
1657
+ history.push({ role: "assistant", content: response.content });
1658
+ const responseText = response.content || "(no response)";
1659
+ if (responseText.length > 4e3) {
1660
+ const chunks = responseText.match(/.{1,4000}/gs) || [responseText];
1661
+ for (const chunk of chunks) {
1662
+ await ctx.reply(chunk);
1663
+ }
1664
+ } else {
1665
+ await ctx.reply(responseText);
1666
+ }
1667
+ } catch (error) {
1668
+ const errorMsg = error instanceof Error ? error.message : "Unknown error";
1669
+ await ctx.reply(`\u274C Error: ${errorMsg}`);
1670
+ this.emit("error", new Error(errorMsg));
1671
+ }
1632
1672
  });
1633
1673
  this.bot.catch((err2) => {
1634
1674
  this.emit("error", err2);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@projectservan8n/cnapse",
3
- "version": "0.5.8",
3
+ "version": "0.6.0",
4
4
  "description": "Autonomous PC intelligence - AI assistant for desktop automation",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -16,7 +16,13 @@ Available tools:
16
16
  - getActiveWindow() - Get info about the currently focused window
17
17
  - listWindows() - List all open windows
18
18
  - focusWindow(title) - Focus a window by title (partial match)
19
+ - minimizeWindow(title?) - Minimize a window by title, or active window if no title
20
+ - maximizeWindow(title?) - Maximize a window by title, or active window if no title
21
+ - closeWindow(title?) - Close a window by title, or active window if no title
22
+ - restoreWindow(title) - Restore a minimized window by title
19
23
  - scrollMouse(amount) - Scroll mouse wheel (positive=up, negative=down)
24
+ - dragMouse(startX, startY, endX, endY) - Drag mouse from one point to another
25
+ - getMousePosition() - Get current mouse position
20
26
 
21
27
  Guidelines:
22
28
  1. Always confirm dangerous actions (like closing windows with unsaved work)
@@ -25,6 +31,13 @@ Guidelines:
25
31
  4. Report what you see/do at each step
26
32
  5. If something fails, try alternative approaches
27
33
 
34
+ Window control examples:
35
+ - minimizeWindow("Visual Studio Code") - Minimize VS Code
36
+ - minimizeWindow() - Minimize the currently active window
37
+ - maximizeWindow("Chrome") - Maximize Chrome
38
+ - closeWindow("Notepad") - Close Notepad
39
+ - restoreWindow("Discord") - Restore minimized Discord
40
+
28
41
  Common keyboard shortcuts:
29
42
  - Copy: control+c
30
43
  - Paste: control+v
@@ -52,6 +65,10 @@ When asked to open an application:
52
65
  'getActiveWindow',
53
66
  'listWindows',
54
67
  'focusWindow',
68
+ 'minimizeWindow',
69
+ 'maximizeWindow',
70
+ 'closeWindow',
71
+ 'restoreWindow',
55
72
  'scrollMouse',
56
73
  'dragMouse',
57
74
  'getMousePosition',
@@ -114,6 +114,14 @@ export async function executeTool(call: ToolCall): Promise<ToolResult> {
114
114
  return await computer.listWindows();
115
115
  case 'focusWindow':
116
116
  return await computer.focusWindow(args.title as string);
117
+ case 'minimizeWindow':
118
+ return await computer.minimizeWindow(args.title as string | undefined);
119
+ case 'maximizeWindow':
120
+ return await computer.maximizeWindow(args.title as string | undefined);
121
+ case 'closeWindow':
122
+ return await computer.closeWindow(args.title as string | undefined);
123
+ case 'restoreWindow':
124
+ return await computer.restoreWindow(args.title as string);
117
125
  case 'scrollMouse':
118
126
  return await computer.scrollMouse(args.amount as number);
119
127
  case 'dragMouse':
@@ -6,6 +6,7 @@ import { EventEmitter } from 'events';
6
6
  import { getConfig, getApiKey } from '../lib/config.js';
7
7
  import { describeScreen, captureScreenshot } from '../lib/vision.js';
8
8
  import { runCommand } from '../tools/shell.js';
9
+ import { chat as chatWithAI, chatWithVision, Message } from '../lib/api.js';
9
10
 
10
11
  export interface TelegramMessage {
11
12
  chatId: number;
@@ -25,6 +26,7 @@ export class TelegramBotService extends EventEmitter {
25
26
  private bot: any = null;
26
27
  private isRunning = false;
27
28
  private allowedChatIds: Set<number> = new Set();
29
+ private chatHistory: Map<number, Message[]> = new Map();
28
30
 
29
31
  constructor() {
30
32
  super();
@@ -205,7 +207,7 @@ export class TelegramBotService extends EventEmitter {
205
207
  await ctx.reply(status);
206
208
  });
207
209
 
208
- // Handle text messages - forward to AI
210
+ // Handle text messages - forward to AI and respond
209
211
  this.bot.on('text', async (ctx: any) => {
210
212
  if (!this.isAllowed(ctx.chat.id)) {
211
213
  return;
@@ -216,14 +218,71 @@ export class TelegramBotService extends EventEmitter {
216
218
  return;
217
219
  }
218
220
 
221
+ const chatId = ctx.chat.id;
222
+ const userText = ctx.message.text;
223
+ const from = ctx.from.username || ctx.from.first_name || 'User';
224
+
219
225
  const message: TelegramMessage = {
220
- chatId: ctx.chat.id,
221
- text: ctx.message.text,
222
- from: ctx.from.username || ctx.from.first_name || 'User',
226
+ chatId,
227
+ text: userText,
228
+ from,
223
229
  };
224
230
 
225
231
  this.emit('message', message);
226
- this.emit('command', 'chat', ctx.message.text, ctx.chat.id);
232
+
233
+ // Get or initialize chat history for this user
234
+ if (!this.chatHistory.has(chatId)) {
235
+ this.chatHistory.set(chatId, []);
236
+ }
237
+ const history = this.chatHistory.get(chatId)!;
238
+
239
+ // Add user message to history
240
+ history.push({ role: 'user', content: userText });
241
+
242
+ // Keep only last 10 messages for context
243
+ if (history.length > 10) {
244
+ history.splice(0, history.length - 10);
245
+ }
246
+
247
+ try {
248
+ // Send typing indicator
249
+ await ctx.sendChatAction('typing');
250
+
251
+ // Check if this looks like a screen/vision request
252
+ const isVisionRequest = /screen|see|look|what('?s| is) (on|visible)|show me|screenshot/i.test(userText);
253
+
254
+ let response;
255
+ if (isVisionRequest) {
256
+ // Capture screenshot and use vision
257
+ const screenshot = await captureScreenshot();
258
+ if (screenshot) {
259
+ response = await chatWithVision(history, screenshot);
260
+ } else {
261
+ response = await chatWithAI(history);
262
+ }
263
+ } else {
264
+ response = await chatWithAI(history);
265
+ }
266
+
267
+ // Add assistant response to history
268
+ history.push({ role: 'assistant', content: response.content });
269
+
270
+ // Send response (split if too long for Telegram)
271
+ const responseText = response.content || '(no response)';
272
+ if (responseText.length > 4000) {
273
+ // Split into chunks
274
+ const chunks = responseText.match(/.{1,4000}/gs) || [responseText];
275
+ for (const chunk of chunks) {
276
+ await ctx.reply(chunk);
277
+ }
278
+ } else {
279
+ await ctx.reply(responseText);
280
+ }
281
+ } catch (error) {
282
+ const errorMsg = error instanceof Error ? error.message : 'Unknown error';
283
+ await ctx.reply(`❌ Error: ${errorMsg}`);
284
+ this.emit('error', new Error(errorMsg));
285
+ }
227
286
  });
228
287
 
229
288
  // Error handling
@@ -288,6 +288,188 @@ export async function focusWindow(title: string): Promise<ToolResult> {
288
288
  }
289
289
  }
290
290
 
291
+ /**
292
+ * Minimize a window by title (or active window if no title)
293
+ */
294
+ export async function minimizeWindow(title?: string): Promise<ToolResult> {
295
+ try {
296
+ if (process.platform === 'win32') {
297
+ if (title) {
298
+ const escaped = title.replace(/'/g, "''");
299
+ const script = `
300
+ $proc = Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
301
+ if ($proc) {
302
+ Add-Type @"
303
+ using System;
304
+ using System.Runtime.InteropServices;
305
+ public class Win32 {
306
+ [DllImport("user32.dll")]
307
+ public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
308
+ }
309
+ "@
310
+ [Win32]::ShowWindow($proc.MainWindowHandle, 6)
311
+ Write-Output "Minimized: $($proc.MainWindowTitle)"
312
+ } else {
313
+ Write-Output "NOT_FOUND"
314
+ }`;
315
+ const { stdout } = await execAsync(`powershell -Command "${script.replace(/\n/g, ' ')}"`, { shell: 'cmd.exe' });
316
+ if (stdout.includes('NOT_FOUND')) {
317
+ return err(`Window containing "${title}" not found`);
318
+ }
319
+ return ok(stdout.trim());
320
+ } else {
321
+ // Minimize active window using Alt+Space, N
322
+ await execAsync(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('% n')"`, { shell: 'cmd.exe' });
323
+ return ok('Minimized active window');
324
+ }
325
+ } else if (process.platform === 'darwin') {
326
+ if (title) {
327
+ await execAsync(`osascript -e 'tell application "${title}" to set miniaturized of window 1 to true'`);
328
+ } else {
329
+ await execAsync(`osascript -e 'tell application "System Events" to keystroke "m" using command down'`);
330
+ }
331
+ return ok(`Minimized window${title ? `: ${title}` : ''}`);
332
+ } else {
333
+ if (title) {
334
+ await execAsync(`wmctrl -r "${title}" -b add,hidden`);
335
+ } else {
336
+ await execAsync(`xdotool getactivewindow windowminimize`);
337
+ }
338
+ return ok(`Minimized window${title ? `: ${title}` : ''}`);
339
+ }
340
+ } catch (error) {
341
+ return err(`Failed to minimize window: ${error instanceof Error ? error.message : 'Unknown error'}`);
342
+ }
343
+ }
344
+
345
+ /**
346
+ * Maximize a window by title (or active window if no title)
347
+ */
348
+ export async function maximizeWindow(title?: string): Promise<ToolResult> {
349
+ try {
350
+ if (process.platform === 'win32') {
351
+ if (title) {
352
+ const escaped = title.replace(/'/g, "''");
353
+ const script = `
354
+ $proc = Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
355
+ if ($proc) {
356
+ Add-Type @"
357
+ using System;
358
+ using System.Runtime.InteropServices;
359
+ public class Win32 {
360
+ [DllImport("user32.dll")]
361
+ public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
362
+ }
363
+ "@
364
+ [Win32]::ShowWindow($proc.MainWindowHandle, 3)
365
+ Write-Output "Maximized: $($proc.MainWindowTitle)"
366
+ } else {
367
+ Write-Output "NOT_FOUND"
368
+ }`;
369
+ const { stdout } = await execAsync(`powershell -Command "${script.replace(/\n/g, ' ')}"`, { shell: 'cmd.exe' });
370
+ if (stdout.includes('NOT_FOUND')) {
371
+ return err(`Window containing "${title}" not found`);
372
+ }
373
+ return ok(stdout.trim());
374
+ } else {
375
+ // Maximize active window using Alt+Space, X
376
+ await execAsync(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('% x')"`, { shell: 'cmd.exe' });
377
+ return ok('Maximized active window');
378
+ }
379
+ } else if (process.platform === 'darwin') {
380
+ if (title) {
381
+ await execAsync(`osascript -e 'tell application "${title}" to set zoomed of window 1 to true'`);
382
+ } else {
383
+ await execAsync(`osascript -e 'tell application "System Events" to keystroke "f" using {control down, command down}'`);
384
+ }
385
+ return ok(`Maximized window${title ? `: ${title}` : ''}`);
386
+ } else {
387
+ if (title) {
388
+ await execAsync(`wmctrl -r "${title}" -b add,maximized_vert,maximized_horz`);
389
+ } else {
390
+ await execAsync(`wmctrl -r :ACTIVE: -b add,maximized_vert,maximized_horz`);
391
+ }
392
+ return ok(`Maximized window${title ? `: ${title}` : ''}`);
393
+ }
394
+ } catch (error) {
395
+ return err(`Failed to maximize window: ${error instanceof Error ? error.message : 'Unknown error'}`);
396
+ }
397
+ }
398
+
399
+ /**
400
+ * Close a window by title (or active window if no title)
401
+ */
402
+ export async function closeWindow(title?: string): Promise<ToolResult> {
403
+ try {
404
+ if (process.platform === 'win32') {
405
+ if (title) {
406
+ const escaped = title.replace(/'/g, "''");
407
+ await execAsync(`powershell -Command "Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' } | ForEach-Object { $_.CloseMainWindow() }"`, { shell: 'cmd.exe' });
408
+ return ok(`Closed window: ${title}`);
409
+ } else {
410
+ await execAsync(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('%{F4}')"`, { shell: 'cmd.exe' });
411
+ return ok('Closed active window');
412
+ }
413
+ } else if (process.platform === 'darwin') {
414
+ if (title) {
415
+ await execAsync(`osascript -e 'tell application "${title}" to close window 1'`);
416
+ } else {
417
+ await execAsync(`osascript -e 'tell application "System Events" to keystroke "w" using command down'`);
418
+ }
419
+ return ok(`Closed window${title ? `: ${title}` : ''}`);
420
+ } else {
421
+ if (title) {
422
+ await execAsync(`wmctrl -c "${title}"`);
423
+ } else {
424
+ await execAsync(`xdotool getactivewindow windowclose`);
425
+ }
426
+ return ok(`Closed window${title ? `: ${title}` : ''}`);
427
+ }
428
+ } catch (error) {
429
+ return err(`Failed to close window: ${error instanceof Error ? error.message : 'Unknown error'}`);
430
+ }
431
+ }
432
+
433
+ /**
434
+ * Restore a minimized window by title
435
+ */
436
+ export async function restoreWindow(title: string): Promise<ToolResult> {
437
+ try {
438
+ if (process.platform === 'win32') {
439
+ const escaped = title.replace(/'/g, "''");
440
+ const script = `
441
+ $proc = Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
442
+ if ($proc) {
443
+ Add-Type @"
444
+ using System;
445
+ using System.Runtime.InteropServices;
446
+ public class Win32 {
447
+ [DllImport("user32.dll")]
448
+ public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
449
+ }
450
+ "@
451
+ [Win32]::ShowWindow($proc.MainWindowHandle, 9)
452
+ Write-Output "Restored: $($proc.MainWindowTitle)"
453
+ } else {
454
+ Write-Output "NOT_FOUND"
455
+ }`;
456
+ const { stdout } = await execAsync(`powershell -Command "${script.replace(/\n/g, ' ')}"`, { shell: 'cmd.exe' });
457
+ if (stdout.includes('NOT_FOUND')) {
458
+ return err(`Window containing "${title}" not found`);
459
+ }
460
+ return ok(stdout.trim());
461
+ } else if (process.platform === 'darwin') {
462
+ await execAsync(`osascript -e 'tell application "${title}" to set miniaturized of window 1 to false'`);
463
+ return ok(`Restored window: ${title}`);
464
+ } else {
465
+ await execAsync(`wmctrl -r "${title}" -b remove,hidden`);
466
+ return ok(`Restored window: ${title}`);
467
+ }
468
+ } catch (error) {
469
+ return err(`Failed to restore window: ${error instanceof Error ? error.message : 'Unknown error'}`);
470
+ }
471
+ }
472
+
291
473
  /**
292
474
  * Scroll mouse wheel
293
475
  */
@@ -383,6 +565,10 @@ export function getComputerTools() {
383
565
  getActiveWindow,
384
566
  listWindows,
385
567
  focusWindow,
568
+ minimizeWindow,
569
+ maximizeWindow,
570
+ closeWindow,
571
+ restoreWindow,
386
572
  scrollMouse,
387
573
  dragMouse,
388
574
  getMousePosition,
@@ -436,6 +622,26 @@ export const computerTools = [
436
622
  description: 'Focus a window by title',
437
623
  parameters: { type: 'object', properties: { title: { type: 'string' } }, required: ['title'] },
438
624
  },
625
+ {
626
+ name: 'minimizeWindow',
627
+ description: 'Minimize a window by title (or active window if no title given)',
628
+ parameters: { type: 'object', properties: { title: { type: 'string', description: 'Window title to minimize (partial match). Leave empty for active window.' } } },
629
+ },
630
+ {
631
+ name: 'maximizeWindow',
632
+ description: 'Maximize a window by title (or active window if no title given)',
633
+ parameters: { type: 'object', properties: { title: { type: 'string', description: 'Window title to maximize (partial match). Leave empty for active window.' } } },
634
+ },
635
+ {
636
+ name: 'closeWindow',
637
+ description: 'Close a window by title (or active window if no title given)',
638
+ parameters: { type: 'object', properties: { title: { type: 'string', description: 'Window title to close (partial match). Leave empty for active window.' } } },
639
+ },
640
+ {
641
+ name: 'restoreWindow',
642
+ description: 'Restore a minimized window by title',
643
+ parameters: { type: 'object', properties: { title: { type: 'string', description: 'Window title to restore (partial match)' } }, required: ['title'] },
644
+ },
439
645
  {
440
646
  name: 'scrollMouse',
441
647
  description: 'Scroll mouse wheel (positive=up, negative=down)',