@projectservan8n/cnapse 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,12 +1,59 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
- ProviderSelector,
3
+ ProviderSelector
4
+ } from "./chunk-7SDY7OPA.js";
5
+ import {
6
+ askAI,
7
+ getPageText,
8
+ googleDocsType,
9
+ googleSheetsType,
10
+ openGoogleDoc,
11
+ openGoogleSheet,
12
+ openUrl,
13
+ research,
14
+ scroll,
15
+ sendGmail,
16
+ sendOutlook,
17
+ webSearch
18
+ } from "./chunk-MOKGR7WE.js";
19
+ import {
20
+ chat,
21
+ chatWithVision
22
+ } from "./chunk-GP73OJCZ.js";
23
+ import {
24
+ clickMouse,
25
+ closeWindow,
26
+ doubleClick,
27
+ findAndClick,
28
+ focusWindow,
29
+ getActiveWindow,
30
+ getMousePosition,
31
+ keyCombo,
32
+ listDir,
33
+ listWindows,
34
+ maximizeWindow,
35
+ minimizeWindow,
36
+ moveMouse,
37
+ pressKey,
38
+ readFile,
39
+ restoreWindow,
40
+ runCommand,
41
+ scrollMouse,
42
+ typeText,
43
+ typeTextHuman,
44
+ writeFile
45
+ } from "./chunk-TFHK5CYF.js";
46
+ import {
47
+ captureScreenshot,
48
+ describeScreen
49
+ } from "./chunk-OIVTPXE4.js";
50
+ import {
4
51
  getApiKey,
5
52
  getConfig,
6
53
  setApiKey,
7
54
  setModel,
8
55
  setProvider
9
- } from "./chunk-OPX7FFL6.js";
56
+ } from "./chunk-COKO6V5J.js";
10
57
 
11
58
  // src/index.tsx
12
59
  import { render } from "ink";
@@ -194,580 +241,6 @@ function HelpMenu({ onClose, onSelect }) {
194
241
 
195
242
  // src/hooks/useChat.ts
196
243
  import { useState as useState2, useCallback, useRef, useEffect } from "react";
197
-
198
- // src/lib/system.ts
199
- import os from "os";
200
- import { exec } from "child_process";
201
- import { promisify } from "util";
202
- var execAsync = promisify(exec);
203
- var cachedSystemInfo = null;
204
- async function getSystemInfo() {
205
- if (cachedSystemInfo) return cachedSystemInfo;
206
- const platform = os.platform();
207
- const cpus = os.cpus();
208
- let osName = platform;
209
- const osVersion = os.release();
210
- if (platform === "win32") {
211
- try {
212
- const { stdout } = await execAsync("wmic os get Caption /value", { timeout: 5e3 });
213
- const match = stdout.match(/Caption=(.+)/);
214
- if (match) osName = match[1].trim();
215
- } catch {
216
- osName = `Windows ${osVersion}`;
217
- }
218
- } else if (platform === "darwin") {
219
- try {
220
- const { stdout } = await execAsync("sw_vers -productName && sw_vers -productVersion", { timeout: 5e3 });
221
- const lines = stdout.trim().split("\n");
222
- osName = `${lines[0]} ${lines[1]}`;
223
- } catch {
224
- osName = `macOS ${osVersion}`;
225
- }
226
- } else if (platform === "linux") {
227
- try {
228
- const { stdout } = await execAsync("cat /etc/os-release | grep PRETTY_NAME", { timeout: 5e3 });
229
- const match = stdout.match(/PRETTY_NAME="(.+)"/);
230
- if (match) osName = match[1];
231
- } catch {
232
- osName = `Linux ${osVersion}`;
233
- }
234
- }
235
- cachedSystemInfo = {
236
- platform,
237
- osName,
238
- osVersion,
239
- arch: os.arch(),
240
- cpuModel: cpus[0]?.model || "Unknown CPU",
241
- cpuCores: cpus.length,
242
- totalMemoryGB: Math.round(os.totalmem() / 1024 ** 3 * 10) / 10,
243
- freeMemoryGB: Math.round(os.freemem() / 1024 ** 3 * 10) / 10,
244
- username: os.userInfo().username,
245
- hostname: os.hostname(),
246
- homeDir: os.homedir(),
247
- shell: process.env.SHELL || process.env.COMSPEC || "unknown"
248
- };
249
- return cachedSystemInfo;
250
- }
251
- async function getSystemContext() {
252
- const info = await getSystemInfo();
253
- return `SYSTEM INFO:
254
- - OS: ${info.osName} (${info.arch})
255
- - CPU: ${info.cpuModel} (${info.cpuCores} cores)
256
- - RAM: ${info.totalMemoryGB}GB total, ${info.freeMemoryGB}GB free
257
- - User: ${info.username}@${info.hostname}
258
- - Home: ${info.homeDir}
259
- - Shell: ${info.shell}`;
260
- }
261
- function getCwd() {
262
- return process.cwd();
263
- }
264
-
265
- // src/lib/api.ts
266
- var BASE_PROMPT = `You are C-napse, an AI assistant for PC automation running on the user's desktop.
267
- You have access to their system and can help with coding, file management, shell commands, and more.
268
-
269
- When responding:
270
- - Be direct and practical
271
- - Use markdown formatting for code blocks
272
- - If asked to do something, explain what you'll do first
273
- - Give commands specific to the user's OS (use the system info below)
274
- - Be aware of the user's current working directory`;
275
- var systemContextCache = null;
276
- async function getSystemPrompt() {
277
- if (!systemContextCache) {
278
- systemContextCache = await getSystemContext();
279
- }
280
- const cwd = getCwd();
281
- return `${BASE_PROMPT}
282
-
283
- ${systemContextCache}
284
- - Current directory: ${cwd}`;
285
- }
286
- async function chat(messages, systemPrompt) {
287
- const config = getConfig();
288
- const finalPrompt = systemPrompt || await getSystemPrompt();
289
- const allMessages = [
290
- { role: "system", content: finalPrompt },
291
- ...messages
292
- ];
293
- switch (config.provider) {
294
- case "openrouter":
295
- return chatOpenRouter(allMessages, config.model);
296
- case "ollama":
297
- return chatOllama(allMessages, config.model);
298
- case "anthropic":
299
- return chatAnthropic(allMessages, config.model);
300
- case "openai":
301
- return chatOpenAI(allMessages, config.model);
302
- default:
303
- throw new Error(`Unknown provider: ${config.provider}`);
304
- }
305
- }
306
- async function chatOpenRouter(messages, model) {
307
- const apiKey = getApiKey("openrouter");
308
- if (!apiKey) {
309
- throw new Error("OpenRouter API key not configured. Run: cnapse auth openrouter <key>");
310
- }
311
- const config = getConfig();
312
- const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
313
- method: "POST",
314
- headers: {
315
- "Authorization": `Bearer ${apiKey}`,
316
- "Content-Type": "application/json",
317
- "HTTP-Referer": config.openrouter.siteUrl,
318
- "X-Title": config.openrouter.appName
319
- },
320
- body: JSON.stringify({
321
- model,
322
- messages,
323
- max_tokens: 2048,
324
- temperature: 0.7
325
- })
326
- });
327
- if (!response.ok) {
328
- const error = await response.text();
329
- throw new Error(`OpenRouter error: ${response.status} - ${error}`);
330
- }
331
- const data = await response.json();
332
- const content = data.choices?.[0]?.message?.content || "";
333
- return { content, model };
334
- }
335
- async function chatOllama(messages, model) {
336
- const config = getConfig();
337
- const response = await fetch(`${config.ollamaHost}/api/chat`, {
338
- method: "POST",
339
- headers: { "Content-Type": "application/json" },
340
- body: JSON.stringify({
341
- model,
342
- messages,
343
- stream: false
344
- })
345
- });
346
- if (!response.ok) {
347
- const error = await response.text();
348
- throw new Error(`Ollama error: ${response.status} - ${error}`);
349
- }
350
- const data = await response.json();
351
- const content = data.message?.content || "";
352
- return { content, model };
353
- }
354
- async function chatAnthropic(messages, model) {
355
- const apiKey = getApiKey("anthropic");
356
- if (!apiKey) {
357
- throw new Error("Anthropic API key not configured. Run: cnapse auth anthropic <key>");
358
- }
359
- const systemMsg = messages.find((m) => m.role === "system");
360
- const chatMessages = messages.filter((m) => m.role !== "system");
361
- const response = await fetch("https://api.anthropic.com/v1/messages", {
362
- method: "POST",
363
- headers: {
364
- "x-api-key": apiKey,
365
- "Content-Type": "application/json",
366
- "anthropic-version": "2023-06-01"
367
- },
368
- body: JSON.stringify({
369
- model,
370
- max_tokens: 2048,
371
- system: systemMsg?.content || "",
372
- messages: chatMessages
373
- })
374
- });
375
- if (!response.ok) {
376
- const error = await response.text();
377
- throw new Error(`Anthropic error: ${response.status} - ${error}`);
378
- }
379
- const data = await response.json();
380
- const content = data.content?.[0]?.text || "";
381
- return { content, model };
382
- }
383
- async function chatOpenAI(messages, model) {
384
- const apiKey = getApiKey("openai");
385
- if (!apiKey) {
386
- throw new Error("OpenAI API key not configured. Run: cnapse auth openai <key>");
387
- }
388
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
389
- method: "POST",
390
- headers: {
391
- "Authorization": `Bearer ${apiKey}`,
392
- "Content-Type": "application/json"
393
- },
394
- body: JSON.stringify({
395
- model,
396
- messages,
397
- max_tokens: 2048,
398
- temperature: 0.7
399
- })
400
- });
401
- if (!response.ok) {
402
- const error = await response.text();
403
- throw new Error(`OpenAI error: ${response.status} - ${error}`);
404
- }
405
- const data = await response.json();
406
- const content = data.choices?.[0]?.message?.content || "";
407
- return { content, model };
408
- }
409
- async function chatWithVision(messages, screenshotBase64) {
410
- const config = getConfig();
411
- const systemPrompt = await getSystemPrompt();
412
- const visionPrompt = systemPrompt + "\n\nYou can see the user's screen. Describe what you see and help them with their request.";
413
- switch (config.provider) {
414
- case "openrouter":
415
- return chatWithVisionOpenRouter(messages, screenshotBase64, visionPrompt);
416
- case "ollama":
417
- return chatWithVisionOllama(messages, screenshotBase64, visionPrompt);
418
- case "anthropic":
419
- return chatWithVisionAnthropic(messages, screenshotBase64, visionPrompt);
420
- case "openai":
421
- return chatWithVisionOpenAI(messages, screenshotBase64, visionPrompt);
422
- default:
423
- throw new Error(`Vision not supported for provider: ${config.provider}`);
424
- }
425
- }
426
- async function chatWithVisionOpenRouter(messages, screenshot, systemPrompt) {
427
- const apiKey = getApiKey("openrouter");
428
- if (!apiKey) throw new Error("OpenRouter API key not configured");
429
- const config = getConfig();
430
- let model = config.model;
431
- if (!model.includes("gpt-5") && !model.includes("claude") && !model.includes("gemini")) {
432
- model = "openai/gpt-5-nano";
433
- }
434
- const lastUserIdx = messages.length - 1;
435
- const visionMessages = messages.map((m, i) => {
436
- if (i === lastUserIdx && m.role === "user") {
437
- return {
438
- role: "user",
439
- content: [
440
- { type: "text", text: m.content },
441
- { type: "image_url", image_url: { url: `data:image/png;base64,${screenshot}` } }
442
- ]
443
- };
444
- }
445
- return m;
446
- });
447
- const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
448
- method: "POST",
449
- headers: {
450
- "Authorization": `Bearer ${apiKey}`,
451
- "Content-Type": "application/json",
452
- "HTTP-Referer": config.openrouter.siteUrl,
453
- "X-Title": config.openrouter.appName
454
- },
455
- body: JSON.stringify({
456
- model,
457
- messages: [{ role: "system", content: systemPrompt }, ...visionMessages],
458
- max_tokens: 2048
459
- })
460
- });
461
- if (!response.ok) {
462
- const error = await response.text();
463
- throw new Error(`OpenRouter vision error: ${response.status} - ${error}`);
464
- }
465
- const data = await response.json();
466
- return { content: data.choices?.[0]?.message?.content || "", model };
467
- }
468
- async function chatWithVisionOllama(messages, screenshot, systemPrompt) {
469
- const config = getConfig();
470
- const visionModels = ["llava", "llama3.2-vision", "bakllava"];
471
- const model = visionModels.find((m) => config.model.includes(m)) || "llava";
472
- const lastUserMsg = messages.filter((m) => m.role === "user").pop();
473
- const response = await fetch(`${config.ollamaHost}/api/generate`, {
474
- method: "POST",
475
- headers: { "Content-Type": "application/json" },
476
- body: JSON.stringify({
477
- model,
478
- prompt: `${systemPrompt}
479
-
480
- User: ${lastUserMsg?.content || "What do you see?"}`,
481
- images: [screenshot],
482
- stream: false
483
- })
484
- });
485
- if (!response.ok) {
486
- const error = await response.text();
487
- throw new Error(`Ollama vision error: ${error}`);
488
- }
489
- const data = await response.json();
490
- return { content: data.response || "", model };
491
- }
492
- async function chatWithVisionAnthropic(messages, screenshot, systemPrompt) {
493
- const apiKey = getApiKey("anthropic");
494
- if (!apiKey) throw new Error("Anthropic API key not configured");
495
- const chatMessages = messages.filter((m) => m.role !== "system");
496
- const lastUserIdx = chatMessages.length - 1;
497
- const visionMessages = chatMessages.map((m, i) => {
498
- if (i === lastUserIdx && m.role === "user") {
499
- return {
500
- role: "user",
501
- content: [
502
- { type: "image", source: { type: "base64", media_type: "image/png", data: screenshot } },
503
- { type: "text", text: m.content }
504
- ]
505
- };
506
- }
507
- return { role: m.role, content: m.content };
508
- });
509
- const response = await fetch("https://api.anthropic.com/v1/messages", {
510
- method: "POST",
511
- headers: {
512
- "x-api-key": apiKey,
513
- "anthropic-version": "2023-06-01",
514
- "Content-Type": "application/json"
515
- },
516
- body: JSON.stringify({
517
- model: "claude-3-5-sonnet-20241022",
518
- max_tokens: 2048,
519
- system: systemPrompt,
520
- messages: visionMessages
521
- })
522
- });
523
- if (!response.ok) {
524
- const error = await response.text();
525
- throw new Error(`Anthropic vision error: ${error}`);
526
- }
527
- const data = await response.json();
528
- return { content: data.content?.[0]?.text || "", model: "claude-3-5-sonnet-20241022" };
529
- }
530
- async function chatWithVisionOpenAI(messages, screenshot, systemPrompt) {
531
- const apiKey = getApiKey("openai");
532
- if (!apiKey) throw new Error("OpenAI API key not configured");
533
- const lastUserIdx = messages.length - 1;
534
- const visionMessages = messages.map((m, i) => {
535
- if (i === lastUserIdx && m.role === "user") {
536
- return {
537
- role: "user",
538
- content: [
539
- { type: "text", text: m.content },
540
- { type: "image_url", image_url: { url: `data:image/png;base64,${screenshot}` } }
541
- ]
542
- };
543
- }
544
- return m;
545
- });
546
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
547
- method: "POST",
548
- headers: {
549
- "Authorization": `Bearer ${apiKey}`,
550
- "Content-Type": "application/json"
551
- },
552
- body: JSON.stringify({
553
- model: "gpt-4o",
554
- messages: [{ role: "system", content: systemPrompt }, ...visionMessages],
555
- max_tokens: 2048
556
- })
557
- });
558
- if (!response.ok) {
559
- const error = await response.text();
560
- throw new Error(`OpenAI vision error: ${error}`);
561
- }
562
- const data = await response.json();
563
- return { content: data.choices?.[0]?.message?.content || "", model: "gpt-4o" };
564
- }
565
-
566
- // src/lib/vision.ts
567
- async function describeScreen() {
568
- const screenshot = await captureScreenshot();
569
- if (!screenshot) {
570
- throw new Error("Failed to capture screenshot");
571
- }
572
- const config = getConfig();
573
- const description = await analyzeWithVision(screenshot, config.provider);
574
- return { description, screenshot };
575
- }
576
- async function captureScreenshot() {
577
- try {
578
- const screenshotDesktop = await import("screenshot-desktop");
579
- const buffer = await screenshotDesktop.default({ format: "png" });
580
- return buffer.toString("base64");
581
- } catch {
582
- return captureScreenFallback();
583
- }
584
- }
585
- async function captureScreenFallback() {
586
- const { exec: exec5 } = await import("child_process");
587
- const { promisify: promisify5 } = await import("util");
588
- const { tmpdir } = await import("os");
589
- const { join: join3 } = await import("path");
590
- const { readFile: readFile2, unlink } = await import("fs/promises");
591
- const execAsync5 = promisify5(exec5);
592
- const tempFile = join3(tmpdir(), `cnapse-screen-${Date.now()}.png`);
593
- try {
594
- const platform = process.platform;
595
- if (platform === "win32") {
596
- await execAsync5(`
597
- Add-Type -AssemblyName System.Windows.Forms
598
- $screen = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds
599
- $bitmap = New-Object System.Drawing.Bitmap($screen.Width, $screen.Height)
600
- $graphics = [System.Drawing.Graphics]::FromImage($bitmap)
601
- $graphics.CopyFromScreen($screen.Location, [System.Drawing.Point]::Empty, $screen.Size)
602
- $bitmap.Save("${tempFile.replace(/\\/g, "\\\\")}")
603
- $graphics.Dispose()
604
- $bitmap.Dispose()
605
- `, { shell: "powershell.exe" });
606
- } else if (platform === "darwin") {
607
- await execAsync5(`screencapture -x "${tempFile}"`);
608
- } else {
609
- await execAsync5(`gnome-screenshot -f "${tempFile}" 2>/dev/null || scrot "${tempFile}" 2>/dev/null || import -window root "${tempFile}"`);
610
- }
611
- const imageBuffer = await readFile2(tempFile);
612
- await unlink(tempFile).catch(() => {
613
- });
614
- return imageBuffer.toString("base64");
615
- } catch {
616
- return null;
617
- }
618
- }
619
- async function analyzeWithVision(base64Image, provider) {
620
- const prompt = `Look at this screenshot and describe:
621
- 1. What application or window is visible
622
- 2. Key UI elements you can see (buttons, text fields, menus)
623
- 3. What the user appears to be doing or could do next
624
- 4. Any notable content or state
625
-
626
- Be concise but helpful.`;
627
- switch (provider) {
628
- case "ollama":
629
- return analyzeWithOllama(base64Image, prompt);
630
- case "openrouter":
631
- return analyzeWithOpenRouter(base64Image, prompt);
632
- case "anthropic":
633
- return analyzeWithAnthropic(base64Image, prompt);
634
- case "openai":
635
- return analyzeWithOpenAI(base64Image, prompt);
636
- default:
637
- throw new Error(`Vision not supported for provider: ${provider}`);
638
- }
639
- }
640
- async function analyzeWithOllama(base64Image, prompt) {
641
- const config = getConfig();
642
- const ollamaHost = config.ollamaHost || "http://localhost:11434";
643
- const visionModels = ["llava", "llama3.2-vision", "bakllava", "llava-llama3"];
644
- const model = visionModels.find((m) => config.model.includes(m)) || "llava";
645
- const response = await fetch(`${ollamaHost}/api/generate`, {
646
- method: "POST",
647
- headers: { "Content-Type": "application/json" },
648
- body: JSON.stringify({
649
- model,
650
- prompt,
651
- images: [base64Image],
652
- stream: false
653
- })
654
- });
655
- if (!response.ok) {
656
- const text = await response.text();
657
- throw new Error(`Ollama vision error: ${text}`);
658
- }
659
- const data = await response.json();
660
- return data.response || "Unable to analyze image";
661
- }
662
- async function analyzeWithOpenRouter(base64Image, prompt) {
663
- const apiKey = getApiKey("openrouter");
664
- if (!apiKey) throw new Error("OpenRouter API key not configured");
665
- const model = "anthropic/claude-3-5-sonnet";
666
- const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
667
- method: "POST",
668
- headers: {
669
- "Authorization": `Bearer ${apiKey}`,
670
- "Content-Type": "application/json",
671
- "HTTP-Referer": "https://c-napse.up.railway.app",
672
- "X-Title": "C-napse"
673
- },
674
- body: JSON.stringify({
675
- model,
676
- messages: [
677
- {
678
- role: "user",
679
- content: [
680
- { type: "text", text: prompt },
681
- {
682
- type: "image_url",
683
- image_url: { url: `data:image/png;base64,${base64Image}` }
684
- }
685
- ]
686
- }
687
- ],
688
- max_tokens: 1e3
689
- })
690
- });
691
- if (!response.ok) {
692
- const text = await response.text();
693
- throw new Error(`OpenRouter vision error: ${text}`);
694
- }
695
- const data = await response.json();
696
- return data.choices?.[0]?.message?.content || "Unable to analyze image";
697
- }
698
- async function analyzeWithAnthropic(base64Image, prompt) {
699
- const apiKey = getApiKey("anthropic");
700
- if (!apiKey) throw new Error("Anthropic API key not configured");
701
- const response = await fetch("https://api.anthropic.com/v1/messages", {
702
- method: "POST",
703
- headers: {
704
- "x-api-key": apiKey,
705
- "anthropic-version": "2023-06-01",
706
- "Content-Type": "application/json"
707
- },
708
- body: JSON.stringify({
709
- model: "claude-3-5-sonnet-20241022",
710
- max_tokens: 1e3,
711
- messages: [
712
- {
713
- role: "user",
714
- content: [
715
- {
716
- type: "image",
717
- source: {
718
- type: "base64",
719
- media_type: "image/png",
720
- data: base64Image
721
- }
722
- },
723
- { type: "text", text: prompt }
724
- ]
725
- }
726
- ]
727
- })
728
- });
729
- if (!response.ok) {
730
- const text = await response.text();
731
- throw new Error(`Anthropic vision error: ${text}`);
732
- }
733
- const data = await response.json();
734
- return data.content?.[0]?.text || "Unable to analyze image";
735
- }
736
- async function analyzeWithOpenAI(base64Image, prompt) {
737
- const apiKey = getApiKey("openai");
738
- if (!apiKey) throw new Error("OpenAI API key not configured");
739
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
740
- method: "POST",
741
- headers: {
742
- "Authorization": `Bearer ${apiKey}`,
743
- "Content-Type": "application/json"
744
- },
745
- body: JSON.stringify({
746
- model: "gpt-4-vision-preview",
747
- messages: [
748
- {
749
- role: "user",
750
- content: [
751
- { type: "text", text: prompt },
752
- {
753
- type: "image_url",
754
- image_url: { url: `data:image/png;base64,${base64Image}` }
755
- }
756
- ]
757
- }
758
- ],
759
- max_tokens: 1e3
760
- })
761
- });
762
- if (!response.ok) {
763
- const text = await response.text();
764
- throw new Error(`OpenAI vision error: ${text}`);
765
- }
766
- const data = await response.json();
767
- return data.choices?.[0]?.message?.content || "Unable to analyze image";
768
- }
769
-
770
- // src/hooks/useChat.ts
771
244
  var WELCOME_MESSAGE = {
772
245
  id: "0",
773
246
  role: "system",
@@ -831,8 +304,8 @@ function useChat(screenWatch = false) {
831
304
  (m) => m.id === assistantId ? { ...m, content: response.content || "(no response)", isStreaming: false } : m
832
305
  )
833
306
  );
834
- } catch (err2) {
835
- const errorMsg = err2 instanceof Error ? err2.message : "Unknown error";
307
+ } catch (err) {
308
+ const errorMsg = err instanceof Error ? err.message : "Unknown error";
836
309
  setError(errorMsg);
837
310
  setMessages(
838
311
  (prev) => prev.map(
@@ -872,10 +345,10 @@ function useVision() {
872
345
  setLastDescription(result.description);
873
346
  setLastScreenshot(result.screenshot);
874
347
  return result.description;
875
- } catch (err2) {
876
- const errorMsg = err2 instanceof Error ? err2.message : "Vision analysis failed";
348
+ } catch (err) {
349
+ const errorMsg = err instanceof Error ? err.message : "Vision analysis failed";
877
350
  setError(errorMsg);
878
- throw err2;
351
+ throw err;
879
352
  } finally {
880
353
  setIsAnalyzing(false);
881
354
  }
@@ -895,735 +368,15 @@ import { useState as useState4, useCallback as useCallback3, useEffect as useEff
895
368
  // src/services/telegram.ts
896
369
  import { EventEmitter } from "events";
897
370
 
898
- // src/tools/shell.ts
899
- import { exec as exec4 } from "child_process";
900
- import { promisify as promisify4 } from "util";
901
-
902
- // src/tools/filesystem.ts
903
- import { promises as fs } from "fs";
904
- import { join, dirname } from "path";
905
- async function readFile(path2) {
906
- try {
907
- const content = await fs.readFile(path2, "utf-8");
908
- return ok(content);
909
- } catch (error) {
910
- return err(`Failed to read file: ${error.message}`);
911
- }
912
- }
913
- async function writeFile(path2, content) {
914
- try {
915
- const dir = dirname(path2);
916
- await fs.mkdir(dir, { recursive: true });
917
- await fs.writeFile(path2, content, "utf-8");
918
- return ok(`Written ${content.length} bytes to ${path2}`);
919
- } catch (error) {
920
- return err(`Failed to write file: ${error.message}`);
921
- }
922
- }
923
- async function listDir(path2, recursive = false) {
924
- try {
925
- const stat = await fs.stat(path2);
926
- if (!stat.isDirectory()) {
927
- return err(`Not a directory: ${path2}`);
928
- }
929
- const entries = [];
930
- async function walkDir(dir, prefix) {
931
- const items = await fs.readdir(dir, { withFileTypes: true });
932
- for (const item of items) {
933
- const displayPath = prefix ? `${prefix}/${item.name}` : item.name;
934
- if (item.isDirectory()) {
935
- entries.push(`${displayPath}/`);
936
- if (recursive) {
937
- await walkDir(join(dir, item.name), displayPath);
938
- }
939
- } else {
940
- entries.push(displayPath);
941
- }
942
- }
943
- }
944
- await walkDir(path2, "");
945
- entries.sort();
946
- return ok(entries.join("\n"));
947
- } catch (error) {
948
- return err(`Failed to list directory: ${error.message}`);
949
- }
950
- }
951
-
952
- // src/tools/clipboard.ts
953
- import clipboardy from "clipboardy";
954
-
955
- // src/tools/process.ts
956
- import { exec as exec2 } from "child_process";
957
- import { promisify as promisify2 } from "util";
958
- var execAsync2 = promisify2(exec2);
959
-
960
- // src/tools/computer.ts
961
- import { exec as exec3 } from "child_process";
962
- import { promisify as promisify3 } from "util";
963
- var execAsync3 = promisify3(exec3);
964
- async function moveMouse(x, y) {
965
- try {
966
- if (process.platform === "win32") {
967
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${x}, ${y})"`, { shell: "cmd.exe" });
968
- } else if (process.platform === "darwin") {
969
- await execAsync3(`cliclick m:${x},${y}`);
970
- } else {
971
- await execAsync3(`xdotool mousemove ${x} ${y}`);
972
- }
973
- return ok(`Mouse moved to (${x}, ${y})`);
974
- } catch (error) {
975
- return err(`Failed to move mouse: ${error instanceof Error ? error.message : "Unknown error"}`);
976
- }
977
- }
978
- async function clickMouse(button = "left") {
979
- try {
980
- if (process.platform === "win32") {
981
- const script = `
982
- Add-Type -MemberDefinition @"
983
- [DllImport("user32.dll",CharSet=CharSet.Auto,CallingConvention=CallingConvention.StdCall)]
984
- public static extern void mouse_event(long dwFlags, long dx, long dy, long cButtons, long dwExtraInfo);
985
- "@ -Name Mouse -Namespace Win32
986
- ${button === "left" ? "[Win32.Mouse]::mouse_event(0x02, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x04, 0, 0, 0, 0)" : button === "right" ? "[Win32.Mouse]::mouse_event(0x08, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x10, 0, 0, 0, 0)" : "[Win32.Mouse]::mouse_event(0x20, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x40, 0, 0, 0, 0)"}`;
987
- await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
988
- } else if (process.platform === "darwin") {
989
- await execAsync3(`cliclick c:.`);
990
- } else {
991
- const btn = button === "left" ? "1" : button === "right" ? "3" : "2";
992
- await execAsync3(`xdotool click ${btn}`);
993
- }
994
- return ok(`Clicked ${button} button`);
995
- } catch (error) {
996
- return err(`Failed to click: ${error instanceof Error ? error.message : "Unknown error"}`);
997
- }
998
- }
999
- async function doubleClick() {
1000
- try {
1001
- if (process.platform === "win32") {
1002
- const script = `
1003
- Add-Type -MemberDefinition @"
1004
- [DllImport("user32.dll",CharSet=CharSet.Auto,CallingConvention=CallingConvention.StdCall)]
1005
- public static extern void mouse_event(long dwFlags, long dx, long dy, long cButtons, long dwExtraInfo);
1006
- "@ -Name Mouse -Namespace Win32
1007
- [Win32.Mouse]::mouse_event(0x02, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x04, 0, 0, 0, 0)
1008
- Start-Sleep -Milliseconds 50
1009
- [Win32.Mouse]::mouse_event(0x02, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x04, 0, 0, 0, 0)`;
1010
- await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
1011
- } else if (process.platform === "darwin") {
1012
- await execAsync3(`cliclick dc:.`);
1013
- } else {
1014
- await execAsync3(`xdotool click --repeat 2 --delay 50 1`);
1015
- }
1016
- return ok("Double clicked");
1017
- } catch (error) {
1018
- return err(`Failed to double click: ${error instanceof Error ? error.message : "Unknown error"}`);
1019
- }
1020
- }
1021
- async function typeText(text) {
1022
- try {
1023
- if (process.platform === "win32") {
1024
- const escapedText = text.replace(/'/g, "''").replace(/[+^%~(){}[\]]/g, "{$&}");
1025
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${escapedText}')"`, { shell: "cmd.exe" });
1026
- } else if (process.platform === "darwin") {
1027
- const escaped = text.replace(/'/g, "'\\''");
1028
- await execAsync3(`osascript -e 'tell application "System Events" to keystroke "${escaped}"'`);
1029
- } else {
1030
- const escaped = text.replace(/'/g, "'\\''");
1031
- await execAsync3(`xdotool type '${escaped}'`);
1032
- }
1033
- return ok(`Typed: ${text}`);
1034
- } catch (error) {
1035
- return err(`Failed to type: ${error instanceof Error ? error.message : "Unknown error"}`);
1036
- }
1037
- }
1038
- async function pressKey(key) {
1039
- try {
1040
- if (process.platform === "win32") {
1041
- const winKeyMap = {
1042
- "enter": "{ENTER}",
1043
- "return": "{ENTER}",
1044
- "escape": "{ESC}",
1045
- "esc": "{ESC}",
1046
- "tab": "{TAB}",
1047
- "space": " ",
1048
- "backspace": "{BACKSPACE}",
1049
- "delete": "{DELETE}",
1050
- "up": "{UP}",
1051
- "down": "{DOWN}",
1052
- "left": "{LEFT}",
1053
- "right": "{RIGHT}",
1054
- "home": "{HOME}",
1055
- "end": "{END}",
1056
- "pageup": "{PGUP}",
1057
- "pagedown": "{PGDN}",
1058
- "f1": "{F1}",
1059
- "f2": "{F2}",
1060
- "f3": "{F3}",
1061
- "f4": "{F4}",
1062
- "f5": "{F5}",
1063
- "f6": "{F6}",
1064
- "f7": "{F7}",
1065
- "f8": "{F8}",
1066
- "f9": "{F9}",
1067
- "f10": "{F10}",
1068
- "f11": "{F11}",
1069
- "f12": "{F12}"
1070
- };
1071
- const winKey = winKeyMap[key.toLowerCase()] || key;
1072
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${winKey}')"`, { shell: "cmd.exe" });
1073
- } else if (process.platform === "darwin") {
1074
- const macKeyMap = {
1075
- "return": 36,
1076
- "enter": 36,
1077
- "escape": 53,
1078
- "esc": 53,
1079
- "tab": 48,
1080
- "space": 49,
1081
- "backspace": 51,
1082
- "delete": 117,
1083
- "up": 126,
1084
- "down": 125,
1085
- "left": 123,
1086
- "right": 124
1087
- };
1088
- const keyCode = macKeyMap[key.toLowerCase()];
1089
- if (keyCode) {
1090
- await execAsync3(`osascript -e 'tell application "System Events" to key code ${keyCode}'`);
1091
- } else {
1092
- await execAsync3(`osascript -e 'tell application "System Events" to keystroke "${key}"'`);
1093
- }
1094
- } else {
1095
- await execAsync3(`xdotool key ${key}`);
1096
- }
1097
- return ok(`Pressed: ${key}`);
1098
- } catch (error) {
1099
- return err(`Failed to press key: ${error instanceof Error ? error.message : "Unknown error"}`);
1100
- }
1101
- }
1102
- async function keyCombo(keys) {
1103
- try {
1104
- if (process.platform === "win32") {
1105
- const hasWin = keys.some((k) => k.toLowerCase() === "meta" || k.toLowerCase() === "win");
1106
- const hasR = keys.some((k) => k.toLowerCase() === "r");
1107
- if (hasWin && hasR) {
1108
- await execAsync3(`powershell -Command "$shell = New-Object -ComObject WScript.Shell; $shell.Run('explorer shell:::{2559a1f3-21d7-11d4-bdaf-00c04f60b9f0}')"`, { shell: "cmd.exe" });
1109
- return ok(`Pressed: ${keys.join("+")}`);
1110
- }
1111
- const modifierMap = {
1112
- "control": "^",
1113
- "ctrl": "^",
1114
- "alt": "%",
1115
- "shift": "+"
1116
- };
1117
- let combo = "";
1118
- const regularKeys = [];
1119
- for (const key of keys) {
1120
- const lower = key.toLowerCase();
1121
- if (modifierMap[lower]) {
1122
- combo += modifierMap[lower];
1123
- } else if (lower !== "meta" && lower !== "win") {
1124
- regularKeys.push(key.toLowerCase());
1125
- }
1126
- }
1127
- combo += regularKeys.join("");
1128
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${combo}')"`, { shell: "cmd.exe" });
1129
- } else if (process.platform === "darwin") {
1130
- const modifiers = keys.filter((k) => ["control", "ctrl", "alt", "shift", "command", "meta"].includes(k.toLowerCase()));
1131
- const regular = keys.filter((k) => !["control", "ctrl", "alt", "shift", "command", "meta"].includes(k.toLowerCase()));
1132
- let cmd = 'tell application "System Events" to keystroke "' + regular.join("") + '"';
1133
- if (modifiers.length > 0) {
1134
- const modMap = {
1135
- "control": "control down",
1136
- "ctrl": "control down",
1137
- "alt": "option down",
1138
- "shift": "shift down",
1139
- "command": "command down",
1140
- "meta": "command down"
1141
- };
1142
- cmd += " using {" + modifiers.map((m) => modMap[m.toLowerCase()]).join(", ") + "}";
1143
- }
1144
- await execAsync3(`osascript -e '${cmd}'`);
1145
- } else {
1146
- await execAsync3(`xdotool key ${keys.join("+")}`);
1147
- }
1148
- return ok(`Pressed: ${keys.join("+")}`);
1149
- } catch (error) {
1150
- return err(`Failed to press combo: ${error instanceof Error ? error.message : "Unknown error"}`);
1151
- }
1152
- }
1153
- async function getActiveWindow() {
1154
- try {
1155
- if (process.platform === "win32") {
1156
- const script = `
1157
- Add-Type @"
1158
- using System;
1159
- using System.Runtime.InteropServices;
1160
- using System.Text;
1161
- public class Win32 {
1162
- [DllImport("user32.dll")]
1163
- public static extern IntPtr GetForegroundWindow();
1164
- [DllImport("user32.dll")]
1165
- public static extern int GetWindowText(IntPtr hWnd, StringBuilder text, int count);
1166
- }
1167
- "@
1168
- $hwnd = [Win32]::GetForegroundWindow()
1169
- $sb = New-Object System.Text.StringBuilder 256
1170
- [Win32]::GetWindowText($hwnd, $sb, 256)
1171
- $sb.ToString()`;
1172
- const { stdout } = await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
1173
- return ok(stdout.trim() || "Unknown window");
1174
- } else if (process.platform === "darwin") {
1175
- const { stdout } = await execAsync3(`osascript -e 'tell application "System Events" to get name of first application process whose frontmost is true'`);
1176
- return ok(stdout.trim());
1177
- } else {
1178
- const { stdout } = await execAsync3(`xdotool getactivewindow getwindowname`);
1179
- return ok(stdout.trim());
1180
- }
1181
- } catch (error) {
1182
- return err(`Failed to get active window: ${error instanceof Error ? error.message : "Unknown error"}`);
1183
- }
1184
- }
1185
- async function listWindows() {
1186
- try {
1187
- if (process.platform === "win32") {
1188
- const { stdout } = await execAsync3(`powershell -Command "Get-Process | Where-Object {$_.MainWindowTitle} | Select-Object ProcessName, MainWindowTitle | Format-Table -AutoSize"`, { shell: "cmd.exe" });
1189
- return ok(stdout);
1190
- } else if (process.platform === "darwin") {
1191
- const { stdout } = await execAsync3(`osascript -e 'tell application "System Events" to get name of every application process whose visible is true'`);
1192
- return ok(stdout);
1193
- } else {
1194
- const { stdout } = await execAsync3(`wmctrl -l`);
1195
- return ok(stdout);
1196
- }
1197
- } catch (error) {
1198
- return err(`Failed to list windows: ${error instanceof Error ? error.message : "Unknown error"}`);
1199
- }
1200
- }
1201
- async function focusWindow(title) {
1202
- try {
1203
- if (process.platform === "win32") {
1204
- const escaped = title.replace(/'/g, "''");
1205
- await execAsync3(`powershell -Command "$wshell = New-Object -ComObject wscript.shell; $wshell.AppActivate('${escaped}')"`, { shell: "cmd.exe" });
1206
- } else if (process.platform === "darwin") {
1207
- await execAsync3(`osascript -e 'tell application "${title}" to activate'`);
1208
- } else {
1209
- await execAsync3(`wmctrl -a "${title}"`);
1210
- }
1211
- return ok(`Focused window: ${title}`);
1212
- } catch (error) {
1213
- return err(`Failed to focus window: ${error instanceof Error ? error.message : "Unknown error"}`);
1214
- }
1215
- }
1216
- async function minimizeWindow(title) {
1217
- try {
1218
- if (process.platform === "win32") {
1219
- if (title) {
1220
- const escaped = title.replace(/'/g, "''");
1221
- const script = `
1222
- $proc = Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
1223
- if ($proc) {
1224
- Add-Type @"
1225
- using System;
1226
- using System.Runtime.InteropServices;
1227
- public class Win32 {
1228
- [DllImport("user32.dll")]
1229
- public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
1230
- }
1231
- "@
1232
- [Win32]::ShowWindow($proc.MainWindowHandle, 6)
1233
- Write-Output "Minimized: $($proc.MainWindowTitle)"
1234
- } else {
1235
- Write-Output "NOT_FOUND"
1236
- }`;
1237
- const { stdout } = await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
1238
- if (stdout.includes("NOT_FOUND")) {
1239
- return err(`Window containing "${title}" not found`);
1240
- }
1241
- return ok(stdout.trim());
1242
- } else {
1243
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('% n')"`, { shell: "cmd.exe" });
1244
- return ok("Minimized active window");
1245
- }
1246
- } else if (process.platform === "darwin") {
1247
- if (title) {
1248
- await execAsync3(`osascript -e 'tell application "${title}" to set miniaturized of window 1 to true'`);
1249
- } else {
1250
- await execAsync3(`osascript -e 'tell application "System Events" to keystroke "m" using command down'`);
1251
- }
1252
- return ok(`Minimized window${title ? `: ${title}` : ""}`);
1253
- } else {
1254
- if (title) {
1255
- await execAsync3(`wmctrl -r "${title}" -b add,hidden`);
1256
- } else {
1257
- await execAsync3(`xdotool getactivewindow windowminimize`);
1258
- }
1259
- return ok(`Minimized window${title ? `: ${title}` : ""}`);
1260
- }
1261
- } catch (error) {
1262
- return err(`Failed to minimize window: ${error instanceof Error ? error.message : "Unknown error"}`);
1263
- }
1264
- }
1265
- async function maximizeWindow(title) {
1266
- try {
1267
- if (process.platform === "win32") {
1268
- if (title) {
1269
- const escaped = title.replace(/'/g, "''");
1270
- const script = `
1271
- $proc = Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
1272
- if ($proc) {
1273
- Add-Type @"
1274
- using System;
1275
- using System.Runtime.InteropServices;
1276
- public class Win32 {
1277
- [DllImport("user32.dll")]
1278
- public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
1279
- }
1280
- "@
1281
- [Win32]::ShowWindow($proc.MainWindowHandle, 3)
1282
- Write-Output "Maximized: $($proc.MainWindowTitle)"
1283
- } else {
1284
- Write-Output "NOT_FOUND"
1285
- }`;
1286
- const { stdout } = await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
1287
- if (stdout.includes("NOT_FOUND")) {
1288
- return err(`Window containing "${title}" not found`);
1289
- }
1290
- return ok(stdout.trim());
1291
- } else {
1292
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('% x')"`, { shell: "cmd.exe" });
1293
- return ok("Maximized active window");
1294
- }
1295
- } else if (process.platform === "darwin") {
1296
- if (title) {
1297
- await execAsync3(`osascript -e 'tell application "${title}" to set zoomed of window 1 to true'`);
1298
- } else {
1299
- await execAsync3(`osascript -e 'tell application "System Events" to keystroke "f" using {control down, command down}'`);
1300
- }
1301
- return ok(`Maximized window${title ? `: ${title}` : ""}`);
1302
- } else {
1303
- if (title) {
1304
- await execAsync3(`wmctrl -r "${title}" -b add,maximized_vert,maximized_horz`);
1305
- } else {
1306
- await execAsync3(`wmctrl -r :ACTIVE: -b add,maximized_vert,maximized_horz`);
1307
- }
1308
- return ok(`Maximized window${title ? `: ${title}` : ""}`);
1309
- }
1310
- } catch (error) {
1311
- return err(`Failed to maximize window: ${error instanceof Error ? error.message : "Unknown error"}`);
1312
- }
1313
- }
1314
- async function closeWindow(title) {
1315
- try {
1316
- if (process.platform === "win32") {
1317
- if (title) {
1318
- const escaped = title.replace(/'/g, "''");
1319
- await execAsync3(`powershell -Command "Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' } | ForEach-Object { $_.CloseMainWindow() }"`, { shell: "cmd.exe" });
1320
- return ok(`Closed window: ${title}`);
1321
- } else {
1322
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('%{F4}')"`, { shell: "cmd.exe" });
1323
- return ok("Closed active window");
1324
- }
1325
- } else if (process.platform === "darwin") {
1326
- if (title) {
1327
- await execAsync3(`osascript -e 'tell application "${title}" to close window 1'`);
1328
- } else {
1329
- await execAsync3(`osascript -e 'tell application "System Events" to keystroke "w" using command down'`);
1330
- }
1331
- return ok(`Closed window${title ? `: ${title}` : ""}`);
1332
- } else {
1333
- if (title) {
1334
- await execAsync3(`wmctrl -c "${title}"`);
1335
- } else {
1336
- await execAsync3(`xdotool getactivewindow windowclose`);
1337
- }
1338
- return ok(`Closed window${title ? `: ${title}` : ""}`);
1339
- }
1340
- } catch (error) {
1341
- return err(`Failed to close window: ${error instanceof Error ? error.message : "Unknown error"}`);
1342
- }
1343
- }
1344
- async function restoreWindow(title) {
1345
- try {
1346
- if (process.platform === "win32") {
1347
- const escaped = title.replace(/'/g, "''");
1348
- const script = `
1349
- $proc = Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
1350
- if ($proc) {
1351
- Add-Type @"
1352
- using System;
1353
- using System.Runtime.InteropServices;
1354
- public class Win32 {
1355
- [DllImport("user32.dll")]
1356
- public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
1357
- }
1358
- "@
1359
- [Win32]::ShowWindow($proc.MainWindowHandle, 9)
1360
- Write-Output "Restored: $($proc.MainWindowTitle)"
1361
- } else {
1362
- Write-Output "NOT_FOUND"
1363
- }`;
1364
- const { stdout } = await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
1365
- if (stdout.includes("NOT_FOUND")) {
1366
- return err(`Window containing "${title}" not found`);
1367
- }
1368
- return ok(stdout.trim());
1369
- } else if (process.platform === "darwin") {
1370
- await execAsync3(`osascript -e 'tell application "${title}" to set miniaturized of window 1 to false'`);
1371
- return ok(`Restored window: ${title}`);
1372
- } else {
1373
- await execAsync3(`wmctrl -r "${title}" -b remove,hidden`);
1374
- return ok(`Restored window: ${title}`);
1375
- }
1376
- } catch (error) {
1377
- return err(`Failed to restore window: ${error instanceof Error ? error.message : "Unknown error"}`);
1378
- }
1379
- }
1380
- async function scrollMouse(amount) {
1381
- try {
1382
- if (process.platform === "win32") {
1383
- const direction = amount > 0 ? 120 * Math.abs(amount) : -120 * Math.abs(amount);
1384
- const script = `
1385
- Add-Type -MemberDefinition @"
1386
- [DllImport("user32.dll",CharSet=CharSet.Auto,CallingConvention=CallingConvention.StdCall)]
1387
- public static extern void mouse_event(long dwFlags, long dx, long dy, long cButtons, long dwExtraInfo);
1388
- "@ -Name Mouse -Namespace Win32
1389
- [Win32.Mouse]::mouse_event(0x0800, 0, 0, ${direction}, 0)`;
1390
- await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
1391
- } else if (process.platform === "darwin") {
1392
- const dir = amount > 0 ? "u" : "d";
1393
- await execAsync3(`cliclick -r ${dir}:${Math.abs(amount)}`);
1394
- } else {
1395
- const btn = amount > 0 ? "4" : "5";
1396
- await execAsync3(`xdotool click --repeat ${Math.abs(amount)} ${btn}`);
1397
- }
1398
- return ok(`Scrolled ${amount > 0 ? "up" : "down"} by ${Math.abs(amount)}`);
1399
- } catch (error) {
1400
- return err(`Failed to scroll: ${error instanceof Error ? error.message : "Unknown error"}`);
1401
- }
1402
- }
1403
- async function getMousePosition() {
1404
- try {
1405
- if (process.platform === "win32") {
1406
- const { stdout } = await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; $p = [System.Windows.Forms.Cursor]::Position; Write-Output \\"$($p.X),$($p.Y)\\""`, { shell: "cmd.exe" });
1407
- return ok(`Mouse position: ${stdout.trim()}`);
1408
- } else if (process.platform === "darwin") {
1409
- const { stdout } = await execAsync3(`cliclick p`);
1410
- return ok(`Mouse position: ${stdout.trim()}`);
1411
- } else {
1412
- const { stdout } = await execAsync3(`xdotool getmouselocation --shell`);
1413
- return ok(stdout);
1414
- }
1415
- } catch (error) {
1416
- return err(`Failed to get mouse position: ${error instanceof Error ? error.message : "Unknown error"}`);
1417
- }
1418
- }
1419
-
1420
- // src/tools/index.ts
1421
- function ok(output) {
1422
- return { success: true, output };
1423
- }
1424
- function err(error) {
1425
- return { success: false, output: "", error };
1426
- }
1427
-
1428
- // src/tools/shell.ts
1429
- var execAsync4 = promisify4(exec4);
1430
- async function runCommand(cmd, timeout = 3e4) {
1431
- try {
1432
- const isWindows = process.platform === "win32";
1433
- const shell = isWindows ? "cmd.exe" : "/bin/sh";
1434
- const shellArg = isWindows ? "/C" : "-c";
1435
- const { stdout, stderr } = await execAsync4(cmd, {
1436
- shell,
1437
- timeout,
1438
- maxBuffer: 10 * 1024 * 1024
1439
- // 10MB
1440
- });
1441
- if (stderr && stderr.trim()) {
1442
- return ok(`${stdout}
1443
- [stderr]: ${stderr}`);
1444
- }
1445
- return ok(stdout || "(no output)");
1446
- } catch (error) {
1447
- if (error.killed) {
1448
- return err(`Command timed out after ${timeout}ms`);
1449
- }
1450
- const stderr = error.stderr || "";
1451
- const stdout = error.stdout || "";
1452
- return {
1453
- success: false,
1454
- output: stdout,
1455
- error: `Exit code: ${error.code || -1}
1456
- ${stderr}`
1457
- };
1458
- }
1459
- }
1460
-
1461
- // src/services/browser.ts
1462
- function sleep(ms) {
1463
- return new Promise((resolve) => setTimeout(resolve, ms));
1464
- }
1465
- async function openUrl(url) {
1466
- const fullUrl = url.startsWith("http") ? url : `https://${url}`;
1467
- try {
1468
- if (process.platform === "win32") {
1469
- await runCommand(`start "" "${fullUrl}"`, 5e3);
1470
- } else if (process.platform === "darwin") {
1471
- await runCommand(`open "${fullUrl}"`, 5e3);
1472
- } else {
1473
- await runCommand(`xdg-open "${fullUrl}"`, 5e3);
1474
- }
1475
- return { success: true };
1476
- } catch (error) {
1477
- return {
1478
- success: false,
1479
- error: error instanceof Error ? error.message : "Failed to open URL"
1480
- };
1481
- }
1482
- }
1483
- async function searchGoogle(query) {
1484
- const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}`;
1485
- return openUrl(searchUrl);
1486
- }
1487
- async function webSearch(query, engine = "google") {
1488
- const urls = {
1489
- google: `https://www.google.com/search?q=${encodeURIComponent(query)}`,
1490
- bing: `https://www.bing.com/search?q=${encodeURIComponent(query)}`,
1491
- duckduckgo: `https://duckduckgo.com/?q=${encodeURIComponent(query)}`
1492
- };
1493
- await openUrl(urls[engine]);
1494
- await sleep(3e3);
1495
- const vision = await describeScreen();
1496
- return `\u{1F50D} Search results for "${query}":
1497
-
1498
- ${vision.description}`;
1499
- }
1500
- async function askAI(site, question) {
1501
- const urls = {
1502
- perplexity: "https://www.perplexity.ai",
1503
- chatgpt: "https://chat.openai.com",
1504
- claude: "https://claude.ai",
1505
- copilot: "https://copilot.microsoft.com",
1506
- google: "https://www.google.com"
1507
- };
1508
- await openUrl(urls[site]);
1509
- await sleep(4e3);
1510
- await typeText(question);
1511
- await sleep(500);
1512
- await pressKey("Return");
1513
- await sleep(site === "google" ? 3e3 : 1e4);
1514
- const vision = await describeScreen();
1515
- return {
1516
- response: vision.description,
1517
- screenshot: vision.screenshot
1518
- };
1519
- }
1520
- async function openGmailCompose(to, subject, body) {
1521
- let url = "https://mail.google.com/mail/u/0/?fs=1&tf=cm";
1522
- if (to) url += `&to=${encodeURIComponent(to)}`;
1523
- if (subject) url += `&su=${encodeURIComponent(subject)}`;
1524
- if (body) url += `&body=${encodeURIComponent(body)}`;
1525
- const result = await openUrl(url);
1526
- return result.success;
1527
- }
1528
- async function sendGmail(email) {
1529
- try {
1530
- await openGmailCompose(email.to, email.subject, email.body);
1531
- await sleep(5e3);
1532
- await keyCombo(["control", "Return"]);
1533
- await sleep(2e3);
1534
- return true;
1535
- } catch {
1536
- return false;
1537
- }
1538
- }
1539
- async function openOutlookCompose(to, subject, body) {
1540
- let url = "https://outlook.office.com/mail/deeplink/compose?";
1541
- if (to) url += `to=${encodeURIComponent(to)}&`;
1542
- if (subject) url += `subject=${encodeURIComponent(subject)}&`;
1543
- if (body) url += `body=${encodeURIComponent(body)}&`;
1544
- const result = await openUrl(url);
1545
- return result.success;
1546
- }
1547
- async function sendOutlook(email) {
1548
- try {
1549
- await openOutlookCompose(email.to, email.subject, email.body);
1550
- await sleep(5e3);
1551
- await keyCombo(["control", "Return"]);
1552
- await sleep(2e3);
1553
- return true;
1554
- } catch {
1555
- return false;
1556
- }
1557
- }
1558
- async function openGoogleSheet() {
1559
- const result = await openUrl("https://docs.google.com/spreadsheets/create");
1560
- return result.success;
1561
- }
1562
- async function openGoogleDoc() {
1563
- const result = await openUrl("https://docs.google.com/document/create");
1564
- return result.success;
1565
- }
1566
- async function scroll(direction, amount = 3) {
1567
- const key = direction === "down" ? "pagedown" : "pageup";
1568
- for (let i = 0; i < amount; i++) {
1569
- await pressKey(key);
1570
- await sleep(200);
1571
- }
1572
- }
1573
- async function getPageText() {
1574
- const vision = await describeScreen();
1575
- return vision.description;
1576
- }
1577
- async function research(topic, maxSources = 3) {
1578
- await searchGoogle(topic);
1579
- await sleep(3e3);
1580
- const searchResults = await describeScreen();
1581
- return {
1582
- query: topic,
1583
- sources: [{
1584
- title: `Google search: ${topic}`,
1585
- url: `https://www.google.com/search?q=${encodeURIComponent(topic)}`,
1586
- content: searchResults.description
1587
- }],
1588
- summary: searchResults.description
1589
- };
1590
- }
1591
- async function googleSheetsType(cells) {
1592
- try {
1593
- for (const { cell, value } of cells) {
1594
- await keyCombo(["control", "g"]);
1595
- await sleep(500);
1596
- await typeText(cell);
1597
- await pressKey("Return");
1598
- await sleep(300);
1599
- await typeText(value);
1600
- await pressKey("Return");
1601
- await sleep(200);
1602
- }
1603
- return true;
1604
- } catch {
1605
- return false;
1606
- }
1607
- }
1608
- async function googleDocsType(text) {
1609
- try {
1610
- await sleep(1e3);
1611
- await typeText(text);
1612
- return true;
1613
- } catch {
1614
- return false;
1615
- }
1616
- }
1617
-
1618
371
  // src/lib/tasks.ts
1619
- import * as fs2 from "fs";
372
+ import * as fs from "fs";
1620
373
  import * as path from "path";
1621
- import * as os2 from "os";
1622
- var TASK_MEMORY_FILE = path.join(os2.homedir(), ".cnapse", "task-memory.json");
374
+ import * as os from "os";
375
+ var TASK_MEMORY_FILE = path.join(os.homedir(), ".cnapse", "task-memory.json");
1623
376
  function loadTaskMemory() {
1624
377
  try {
1625
- if (fs2.existsSync(TASK_MEMORY_FILE)) {
1626
- const data = fs2.readFileSync(TASK_MEMORY_FILE, "utf-8");
378
+ if (fs.existsSync(TASK_MEMORY_FILE)) {
379
+ const data = fs.readFileSync(TASK_MEMORY_FILE, "utf-8");
1627
380
  return JSON.parse(data);
1628
381
  }
1629
382
  } catch {
@@ -1650,10 +403,10 @@ function saveTaskPattern(input, steps) {
1650
403
  }
1651
404
  memory.patterns = memory.patterns.sort((a, b) => b.successCount - a.successCount).slice(0, 100);
1652
405
  const dir = path.dirname(TASK_MEMORY_FILE);
1653
- if (!fs2.existsSync(dir)) {
1654
- fs2.mkdirSync(dir, { recursive: true });
406
+ if (!fs.existsSync(dir)) {
407
+ fs.mkdirSync(dir, { recursive: true });
1655
408
  }
1656
- fs2.writeFileSync(TASK_MEMORY_FILE, JSON.stringify(memory, null, 2));
409
+ fs.writeFileSync(TASK_MEMORY_FILE, JSON.stringify(memory, null, 2));
1657
410
  } catch {
1658
411
  }
1659
412
  }
@@ -2004,9 +757,9 @@ async function executeStep(step) {
2004
757
  switch (actionType) {
2005
758
  case "open_app":
2006
759
  await keyCombo(["meta", "r"]);
2007
- await sleep2(500);
760
+ await sleep(500);
2008
761
  await typeText(params);
2009
- await sleep2(300);
762
+ await sleep(300);
2010
763
  await pressKey("Return");
2011
764
  step.result = `Opened ${params}`;
2012
765
  break;
@@ -2030,7 +783,7 @@ async function executeStep(step) {
2030
783
  break;
2031
784
  case "wait":
2032
785
  const seconds = parseInt(params) || 1;
2033
- await sleep2(seconds * 1e3);
786
+ await sleep(seconds * 1e3);
2034
787
  step.result = `Waited ${seconds}s`;
2035
788
  break;
2036
789
  case "focus_window":
@@ -2146,11 +899,11 @@ ${existingResult.output}`;
2146
899
  ${result.response}`;
2147
900
  } else {
2148
901
  await openUrl(`https://${site}`);
2149
- await sleep2(4e3);
902
+ await sleep(4e3);
2150
903
  await typeText(question);
2151
- await sleep2(300);
904
+ await sleep(300);
2152
905
  await pressKey("Return");
2153
- await sleep2(5e3);
906
+ await sleep(5e3);
2154
907
  const pageText = await getPageText();
2155
908
  step.result = `\u{1F4DD} Response from ${site}:
2156
909
 
@@ -2192,7 +945,7 @@ ${pageText.slice(0, 3e3)}`;
2192
945
  case "new": {
2193
946
  const sheetName = sheetArgs[0] || "Untitled spreadsheet";
2194
947
  await openGoogleSheet();
2195
- await sleep2(5e3);
948
+ await sleep(5e3);
2196
949
  step.result = `\u{1F4CA} Opened new Google Sheet: ${sheetName}`;
2197
950
  break;
2198
951
  }
@@ -2220,7 +973,7 @@ ${vision2.description}`;
2220
973
  case "new": {
2221
974
  const docName = docArgs[0] || "Untitled document";
2222
975
  await openGoogleDoc();
2223
- await sleep2(5e3);
976
+ await sleep(5e3);
2224
977
  step.result = `\u{1F4C4} Opened new Google Doc: ${docName}`;
2225
978
  break;
2226
979
  }
@@ -2298,99 +1051,206 @@ ${uiAnalysis.content}`;
2298
1051
  }
2299
1052
  case "adaptive_do": {
2300
1053
  const goal = params;
2301
- const maxAttempts = 5;
1054
+ const maxAttempts = 25;
2302
1055
  const actionHistory = [];
2303
1056
  let accomplished = false;
1057
+ let stuckCount = 0;
1058
+ const stuckThreshold = 3;
1059
+ let lastScreenHash = "";
1060
+ const { getLearner } = await import("./learner-KH3TFTD7.js");
1061
+ const learner = getLearner();
1062
+ await learner.load();
1063
+ const initialScreen = await describeScreen();
1064
+ const remembered = await learner.recall(goal, initialScreen.description);
1065
+ if (remembered && remembered.successCount > remembered.failCount) {
1066
+ actionHistory.push(`\u{1F4DA} Found remembered solution from ${remembered.source}`);
1067
+ }
2304
1068
  for (let attempt = 0; attempt < maxAttempts && !accomplished; attempt++) {
2305
1069
  const currentScreen = await describeScreen();
1070
+ const currentHash = currentScreen.screenshot.slice(0, 1e3);
1071
+ const screenChanged = currentHash !== lastScreenHash;
1072
+ if (!screenChanged && attempt > 0) {
1073
+ stuckCount++;
1074
+ } else {
1075
+ stuckCount = Math.max(0, stuckCount - 1);
1076
+ }
1077
+ lastScreenHash = currentHash;
2306
1078
  const nextAction = await chat([{
2307
1079
  role: "user",
2308
1080
  content: `GOAL: ${goal}
2309
1081
 
2310
1082
  CURRENT SCREEN: ${currentScreen.description}
2311
1083
 
2312
- PREVIOUS ACTIONS TAKEN:
2313
- ${actionHistory.length > 0 ? actionHistory.join("\n") : "None yet"}
1084
+ PREVIOUS ACTIONS:
1085
+ ${actionHistory.slice(-5).join("\n") || "None yet"}
1086
+
1087
+ ATTEMPT: ${attempt + 1}/${maxAttempts}
1088
+ STUCK COUNT: ${stuckCount} (will ask for help at ${stuckThreshold})
2314
1089
 
2315
1090
  Based on what you see, what's the SINGLE next action to take?
2316
- Options:
2317
- - click: Click (will click at current mouse position)
2318
- - type: Type something (specify text)
2319
- - press: Press a key (specify key like Enter, Tab, Escape)
2320
- - scroll: Scroll up/down
2321
- - navigate: Go to URL (opens in browser)
1091
+
1092
+ Available actions:
1093
+ - click: Click at current mouse position
1094
+ - clickAt: Click at coordinates (VALUE: x,y)
1095
+ - moveTo: Move mouse to coordinates (VALUE: x,y)
1096
+ - type: Type text (VALUE: text to type)
1097
+ - press: Press a key (VALUE: Enter, Tab, Escape, etc.)
1098
+ - keyCombo: Key combination (VALUE: command+s, control+c, etc.)
1099
+ - scroll: Scroll (VALUE: up or down)
1100
+ - navigate: Open URL (VALUE: full URL)
1101
+ - wait: Wait for something to load (VALUE: seconds)
1102
+ - findClick: Find element and click it (VALUE: description of element)
2322
1103
  - done: Goal is accomplished
2323
1104
  - stuck: Can't figure out what to do
2324
1105
 
2325
- Respond in format:
1106
+ Respond EXACTLY in this format:
2326
1107
  ACTION: <action_type>
2327
- VALUE: <text to type, URL to navigate, or key to press>
2328
- REASONING: <why>`
1108
+ VALUE: <parameter>
1109
+ REASONING: <brief why>`
2329
1110
  }]);
2330
1111
  const actionContent = nextAction.content;
2331
1112
  const actionMatch = actionContent.match(/ACTION:\s*(\w+)/i);
2332
1113
  const valueMatch = actionContent.match(/VALUE:\s*(.+?)(?:\n|$)/i);
2333
1114
  if (!actionMatch) {
2334
- actionHistory.push(`Attempt ${attempt + 1}: Couldn't parse action`);
1115
+ actionHistory.push(`[${attempt + 1}] \u26A0\uFE0F Couldn't parse action`);
2335
1116
  continue;
2336
1117
  }
2337
1118
  const action = actionMatch[1].toLowerCase();
2338
1119
  const value = valueMatch?.[1]?.trim() || "";
2339
1120
  if (action === "done") {
2340
1121
  accomplished = true;
2341
- actionHistory.push(`Attempt ${attempt + 1}: Goal accomplished!`);
1122
+ actionHistory.push(`[${attempt + 1}] \u2705 Goal accomplished!`);
1123
+ if (actionHistory.length > 1) {
1124
+ const lastSuccessfulAction = actionHistory[actionHistory.length - 2];
1125
+ const actionParts = lastSuccessfulAction.match(/→ (\w+)(?:\s*"(.+)")?/);
1126
+ if (actionParts) {
1127
+ await learner.learn(
1128
+ currentScreen.description.slice(0, 300),
1129
+ goal,
1130
+ actionParts[1],
1131
+ actionParts[2] || "",
1132
+ "self"
1133
+ );
1134
+ }
1135
+ }
2342
1136
  break;
2343
1137
  }
2344
- if (action === "stuck") {
2345
- actionHistory.push(`Attempt ${attempt + 1}: Got stuck, asking Perplexity for help...`);
2346
- const helpRequest = `I'm trying to: ${goal}
2347
-
2348
- I'm stuck. What should I do next? Be specific about what to click or type.`;
2349
- const advice = await askAI("perplexity", helpRequest);
2350
- actionHistory.push(`Got advice: ${advice.response.slice(0, 200)}...`);
1138
+ if (action === "stuck" || stuckCount >= stuckThreshold) {
1139
+ actionHistory.push(`[${attempt + 1}] \u{1F198} Asking for help...`);
1140
+ const suggestions = await learner.getHelp(
1141
+ goal,
1142
+ currentScreen.description,
1143
+ actionHistory.slice(-3)
1144
+ );
1145
+ if (suggestions.length > 0) {
1146
+ const suggestion = suggestions[0];
1147
+ actionHistory.push(`\u{1F4A1} Got suggestion from ${suggestion.source}: ${suggestion.value.slice(0, 100)}`);
1148
+ if (suggestion.action && suggestion.action !== "suggested") {
1149
+ try {
1150
+ await executeAdaptiveAction(suggestion.action, suggestion.value);
1151
+ actionHistory.push(`[${attempt + 1}] \u2192 ${suggestion.action} "${suggestion.value.slice(0, 30)}"`);
1152
+ await learner.learn(
1153
+ currentScreen.description.slice(0, 300),
1154
+ goal,
1155
+ suggestion.action,
1156
+ suggestion.value,
1157
+ suggestion.source
1158
+ );
1159
+ stuckCount = 0;
1160
+ } catch (e) {
1161
+ actionHistory.push(`[${attempt + 1}] \u274C Suggestion failed`);
1162
+ }
1163
+ }
1164
+ } else {
1165
+ actionHistory.push(`[${attempt + 1}] \u{1F615} No helpful suggestions found`);
1166
+ }
2351
1167
  continue;
2352
1168
  }
2353
1169
  try {
2354
- switch (action) {
2355
- case "click":
2356
- await clickMouse("left");
2357
- actionHistory.push(`Attempt ${attempt + 1}: Clicked`);
2358
- break;
2359
- case "type":
2360
- if (value) {
2361
- await typeText(value);
2362
- }
2363
- actionHistory.push(`Attempt ${attempt + 1}: Typed "${value}"`);
2364
- break;
2365
- case "press":
2366
- await pressKey(value || "Return");
2367
- actionHistory.push(`Attempt ${attempt + 1}: Pressed ${value || "Enter"}`);
2368
- break;
2369
- case "scroll":
2370
- await scroll(value.toLowerCase().includes("up") ? "up" : "down");
2371
- actionHistory.push(`Attempt ${attempt + 1}: Scrolled ${value || "down"}`);
2372
- break;
2373
- case "navigate":
2374
- const url = value.startsWith("http") ? value : `https://${value}`;
2375
- await openUrl(url);
2376
- actionHistory.push(`Attempt ${attempt + 1}: Opened ${url}`);
2377
- break;
2378
- default:
2379
- actionHistory.push(`Attempt ${attempt + 1}: Unknown action ${action}`);
2380
- }
1170
+ await executeAdaptiveAction(action, value);
1171
+ actionHistory.push(`[${attempt + 1}] \u2192 ${action}${value ? ` "${value.slice(0, 40)}"` : ""}`);
2381
1172
  } catch (e) {
2382
- actionHistory.push(`Attempt ${attempt + 1}: Action failed - ${e}`);
1173
+ actionHistory.push(`[${attempt + 1}] \u274C ${action} failed - ${e}`);
1174
+ await learner.recordFailure(goal, action, value);
2383
1175
  }
2384
- await sleep2(2e3);
1176
+ await sleep(1e3 + Math.random() * 1e3);
2385
1177
  }
2386
1178
  step.result = `\u{1F3AF} Adaptive Agent Result:
2387
1179
 
2388
1180
  Goal: ${goal}
2389
1181
  Accomplished: ${accomplished ? "Yes \u2705" : "Partial/No \u274C"}
1182
+ Attempts: ${Math.min(actionHistory.length, maxAttempts)}/${maxAttempts}
2390
1183
 
2391
1184
  Action Log:
2392
1185
  ${actionHistory.join("\n")}`;
2393
1186
  break;
1187
+ async function executeAdaptiveAction(action, value) {
1188
+ switch (action) {
1189
+ case "click":
1190
+ await clickMouse("left");
1191
+ break;
1192
+ case "clickat":
1193
+ case "clickAt": {
1194
+ const [x, y] = value.split(",").map((n) => parseInt(n.trim()));
1195
+ if (!isNaN(x) && !isNaN(y)) {
1196
+ await moveMouse(x, y);
1197
+ await sleep(100);
1198
+ await clickMouse("left");
1199
+ }
1200
+ break;
1201
+ }
1202
+ case "moveto":
1203
+ case "moveTo": {
1204
+ const [mx, my] = value.split(",").map((n) => parseInt(n.trim()));
1205
+ if (!isNaN(mx) && !isNaN(my)) {
1206
+ await moveMouse(mx, my);
1207
+ }
1208
+ break;
1209
+ }
1210
+ case "type":
1211
+ if (value) {
1212
+ if (typeTextHuman) {
1213
+ await typeTextHuman(value, 50);
1214
+ } else {
1215
+ await typeText(value);
1216
+ }
1217
+ }
1218
+ break;
1219
+ case "press":
1220
+ await pressKey(value || "Return");
1221
+ break;
1222
+ case "keycombo":
1223
+ case "keyCombo": {
1224
+ const keys2 = value.split("+").map((k) => k.trim().toLowerCase());
1225
+ await keyCombo(keys2);
1226
+ break;
1227
+ }
1228
+ case "scroll":
1229
+ await scroll(value.toLowerCase().includes("up") ? "up" : "down");
1230
+ break;
1231
+ case "navigate": {
1232
+ const navUrl = value.startsWith("http") ? value : `https://${value}`;
1233
+ await openUrl(navUrl);
1234
+ await sleep(2e3);
1235
+ break;
1236
+ }
1237
+ case "wait": {
1238
+ const seconds2 = parseFloat(value) || 2;
1239
+ await sleep(seconds2 * 1e3);
1240
+ break;
1241
+ }
1242
+ case "findclick":
1243
+ case "findClick":
1244
+ if (findAndClick) {
1245
+ await findAndClick(value);
1246
+ } else {
1247
+ throw new Error("findAndClick not available");
1248
+ }
1249
+ break;
1250
+ default:
1251
+ throw new Error(`Unknown action: ${action}`);
1252
+ }
1253
+ }
2394
1254
  }
2395
1255
  case "chat":
2396
1256
  step.result = `Task noted: ${params}`;
@@ -2429,7 +1289,7 @@ async function executeTask(task, onProgress) {
2429
1289
  task.completedAt = /* @__PURE__ */ new Date();
2430
1290
  return task;
2431
1291
  }
2432
- function sleep2(ms) {
1292
+ function sleep(ms) {
2433
1293
  return new Promise((resolve) => setTimeout(resolve, ms));
2434
1294
  }
2435
1295
  function getTaskMemoryStats() {
@@ -2444,8 +1304,8 @@ function getTaskMemoryStats() {
2444
1304
  }
2445
1305
  function clearTaskMemory() {
2446
1306
  try {
2447
- if (fs2.existsSync(TASK_MEMORY_FILE)) {
2448
- fs2.unlinkSync(TASK_MEMORY_FILE);
1307
+ if (fs.existsSync(TASK_MEMORY_FILE)) {
1308
+ fs.unlinkSync(TASK_MEMORY_FILE);
2449
1309
  }
2450
1310
  } catch {
2451
1311
  }
@@ -2545,6 +1405,7 @@ var TelegramBotService = class extends EventEmitter {
2545
1405
  isRunning = false;
2546
1406
  allowedChatIds = /* @__PURE__ */ new Set();
2547
1407
  chatHistory = /* @__PURE__ */ new Map();
1408
+ watchIntervals = /* @__PURE__ */ new Map();
2548
1409
  constructor() {
2549
1410
  super();
2550
1411
  }
@@ -2603,18 +1464,24 @@ var TelegramBotService = class extends EventEmitter {
2603
1464
  await ctx.reply(
2604
1465
  `\u{1F916} C-napse connected!
2605
1466
 
2606
- Commands:
1467
+ \u{1F4CB} Commands:
2607
1468
  /screen - Take screenshot
2608
1469
  /describe - Screenshot + AI description
2609
1470
  /task <desc> - Multi-step automation
2610
1471
  /run <cmd> - Execute shell command
2611
1472
  /status - System status
2612
1473
 
2613
- Examples:
2614
- \u2022 /task open folder E:/Test and list files
1474
+ \u{1F916} Autonomous Agent:
1475
+ /agent <goal> - Start autonomous agent
1476
+ /agent stop - Stop the agent
1477
+ /watch - Stream screen live
1478
+ /learn - View learned actions
1479
+
1480
+ \u{1F4A1} Examples:
1481
+ \u2022 /agent open safari and search for weather
2615
1482
  \u2022 /task open notepad and type hello
1483
+ \u2022 /watch 10 (screenshot every 10s)
2616
1484
  \u2022 minimize chrome
2617
- \u2022 what windows are open?
2618
1485
 
2619
1486
  Your chat ID: ${chatId}`
2620
1487
  );
@@ -2692,16 +1559,253 @@ ${result.error}
2692
1559
  return;
2693
1560
  }
2694
1561
  const config = getConfig();
1562
+ let agentStatus = "Not running";
1563
+ try {
1564
+ const { getAutonomousAgent } = await import("./autonomous-VGEVIXXQ.js");
1565
+ const agent = getAutonomousAgent();
1566
+ const state = agent.getState();
1567
+ if (state.isActive) {
1568
+ agentStatus = `Running (${state.attemptCount} attempts, goal: ${state.goal?.slice(0, 30)}...)`;
1569
+ }
1570
+ } catch {
1571
+ }
1572
+ let learnedCount = 0;
1573
+ try {
1574
+ const { getLearner } = await import("./learner-KH3TFTD7.js");
1575
+ const learner = getLearner();
1576
+ const stats = learner.getStats();
1577
+ learnedCount = stats.memorySize;
1578
+ } catch {
1579
+ }
2695
1580
  const status = [
2696
1581
  "\u{1F4CA} C-napse Status",
2697
1582
  "",
2698
1583
  `Provider: ${config.provider}`,
2699
1584
  `Model: ${config.model}`,
2700
1585
  `Platform: ${process.platform}`,
2701
- `Node: ${process.version}`
1586
+ `Node: ${process.version}`,
1587
+ "",
1588
+ "\u{1F916} Agent Status:",
1589
+ ` Agent: ${agentStatus}`,
1590
+ ` Learned actions: ${learnedCount}`
2702
1591
  ].join("\n");
2703
1592
  await ctx.reply(status);
2704
1593
  });
1594
+ this.bot.command("agent", async (ctx) => {
1595
+ if (!this.isAllowed(ctx.chat.id)) {
1596
+ await ctx.reply("\u26D4 Not authorized. Send /start first.");
1597
+ return;
1598
+ }
1599
+ const args2 = ctx.message.text.replace("/agent", "").trim();
1600
+ const { getAutonomousAgent } = await import("./autonomous-VGEVIXXQ.js");
1601
+ const agent = getAutonomousAgent();
1602
+ if (args2 === "stop") {
1603
+ agent.stop();
1604
+ await ctx.reply("\u{1F6D1} Agent stopped");
1605
+ return;
1606
+ }
1607
+ if (args2 === "pause") {
1608
+ agent.pause();
1609
+ await ctx.reply("\u23F8\uFE0F Agent paused");
1610
+ return;
1611
+ }
1612
+ if (args2 === "resume") {
1613
+ agent.resume();
1614
+ await ctx.reply("\u25B6\uFE0F Agent resumed");
1615
+ return;
1616
+ }
1617
+ if (args2 === "status") {
1618
+ const state = agent.getState();
1619
+ const history = agent.getHistory().slice(-5);
1620
+ let statusMsg = "\u{1F916} Agent Status\n\n";
1621
+ statusMsg += `Goal: ${state.goal || "None"}
1622
+ `;
1623
+ statusMsg += `Active: ${state.isActive ? "Yes" : "No"}
1624
+ `;
1625
+ statusMsg += `Attempts: ${state.attemptCount}
1626
+ `;
1627
+ statusMsg += `Confidence: ${state.confidence}%
1628
+ `;
1629
+ statusMsg += `Stuck count: ${state.stuckCount}
1630
+
1631
+ `;
1632
+ if (history.length > 0) {
1633
+ statusMsg += "Recent actions:\n";
1634
+ history.forEach((h) => {
1635
+ statusMsg += `\u2022 ${h.action}: ${h.value?.slice(0, 30) || ""} (${h.result})
1636
+ `;
1637
+ });
1638
+ }
1639
+ await ctx.reply(statusMsg);
1640
+ return;
1641
+ }
1642
+ if (!args2) {
1643
+ await ctx.reply(
1644
+ "\u{1F916} Autonomous Agent Commands:\n\n/agent <goal> - Start agent with a goal\n/agent stop - Stop the agent\n/agent pause - Pause the agent\n/agent resume - Resume the agent\n/agent status - Get agent status\n\nExamples:\n\u2022 /agent open safari and search for weather\n\u2022 /agent compose an email in gmail to john@test.com\n\u2022 /agent book a flight on kayak.com from NYC to LA"
1645
+ );
1646
+ return;
1647
+ }
1648
+ const goal = args2;
1649
+ await ctx.reply(`\u{1F680} Starting agent with goal:
1650
+ "${goal}"
1651
+
1652
+ Sending updates...`);
1653
+ const chatId = ctx.chat.id;
1654
+ let lastUpdate = Date.now();
1655
+ let screenshotCount = 0;
1656
+ const onAttempt = async (data) => {
1657
+ if (Date.now() - lastUpdate < 3e3) return;
1658
+ lastUpdate = Date.now();
1659
+ await ctx.reply(`\u{1F504} Attempt ${data.count}/${data.max}`);
1660
+ };
1661
+ const onDecided = async (decision) => {
1662
+ await ctx.reply(`\u{1F4AD} ${decision.action}: ${decision.value?.slice(0, 50) || ""}`);
1663
+ };
1664
+ const onCompleted = async (data) => {
1665
+ const emoji = data.success ? "\u2705" : "\u274C";
1666
+ await ctx.reply(`${emoji} Agent ${data.success ? "completed" : "stopped"} after ${data.attempts} attempts`);
1667
+ try {
1668
+ const screenshot = await captureScreenshot();
1669
+ if (screenshot) {
1670
+ const buffer = Buffer.from(screenshot, "base64");
1671
+ await ctx.replyWithPhoto({ source: buffer }, {
1672
+ caption: data.success ? "\u2705 Goal accomplished!" : "\u{1F4F8} Final state"
1673
+ });
1674
+ }
1675
+ } catch {
1676
+ }
1677
+ agent.off("attempt", onAttempt);
1678
+ agent.off("decided", onDecided);
1679
+ agent.off("completed", onCompleted);
1680
+ agent.off("error", onError);
1681
+ };
1682
+ const onError = async (data) => {
1683
+ await ctx.reply(`\u274C Error: ${data.error}`);
1684
+ };
1685
+ const screenshotInterval = setInterval(async () => {
1686
+ if (!agent.getState().isActive) {
1687
+ clearInterval(screenshotInterval);
1688
+ return;
1689
+ }
1690
+ screenshotCount++;
1691
+ if (screenshotCount % 6 === 0) {
1692
+ try {
1693
+ const screenshot = await captureScreenshot();
1694
+ if (screenshot) {
1695
+ const buffer = Buffer.from(screenshot, "base64");
1696
+ await ctx.replyWithPhoto({ source: buffer }, {
1697
+ caption: `\u{1F4F8} Progress update (attempt ${agent.getState().attemptCount})`
1698
+ });
1699
+ }
1700
+ } catch {
1701
+ }
1702
+ }
1703
+ }, 5e3);
1704
+ agent.on("attempt", onAttempt);
1705
+ agent.on("decided", onDecided);
1706
+ agent.on("completed", onCompleted);
1707
+ agent.on("error", onError);
1708
+ agent.start(goal).then((result) => {
1709
+ clearInterval(screenshotInterval);
1710
+ }).catch((err) => {
1711
+ clearInterval(screenshotInterval);
1712
+ ctx.reply(`\u274C Agent error: ${err.message}`);
1713
+ });
1714
+ });
1715
+ this.bot.command("watch", async (ctx) => {
1716
+ if (!this.isAllowed(ctx.chat.id)) {
1717
+ await ctx.reply("\u26D4 Not authorized. Send /start first.");
1718
+ return;
1719
+ }
1720
+ const args2 = ctx.message.text.replace("/watch", "").trim();
1721
+ const chatId = ctx.chat.id;
1722
+ if (args2 === "stop") {
1723
+ if (this.watchIntervals?.has(chatId)) {
1724
+ clearInterval(this.watchIntervals.get(chatId));
1725
+ this.watchIntervals.delete(chatId);
1726
+ await ctx.reply("\u{1F441}\uFE0F Screen streaming stopped");
1727
+ } else {
1728
+ await ctx.reply("Not currently streaming");
1729
+ }
1730
+ return;
1731
+ }
1732
+ const intervalSeconds = parseInt(args2) || 5;
1733
+ const intervalMs = Math.max(3e3, Math.min(6e4, intervalSeconds * 1e3));
1734
+ if (!this.watchIntervals) {
1735
+ this.watchIntervals = /* @__PURE__ */ new Map();
1736
+ }
1737
+ if (this.watchIntervals.has(chatId)) {
1738
+ clearInterval(this.watchIntervals.get(chatId));
1739
+ }
1740
+ await ctx.reply(`\u{1F441}\uFE0F Starting screen stream (every ${intervalMs / 1e3}s)
1741
+ Send /watch stop to end`);
1742
+ try {
1743
+ const screenshot = await captureScreenshot();
1744
+ if (screenshot) {
1745
+ const buffer = Buffer.from(screenshot, "base64");
1746
+ await ctx.replyWithPhoto({ source: buffer }, { caption: "\u{1F4F8} Stream started" });
1747
+ }
1748
+ } catch {
1749
+ }
1750
+ const interval = setInterval(async () => {
1751
+ try {
1752
+ const screenshot = await captureScreenshot();
1753
+ if (screenshot) {
1754
+ const buffer = Buffer.from(screenshot, "base64");
1755
+ await ctx.replyWithPhoto({ source: buffer }, {
1756
+ caption: `\u{1F4F8} ${(/* @__PURE__ */ new Date()).toLocaleTimeString()}`
1757
+ });
1758
+ }
1759
+ } catch {
1760
+ }
1761
+ }, intervalMs);
1762
+ this.watchIntervals.set(chatId, interval);
1763
+ });
1764
+ this.bot.command("learn", async (ctx) => {
1765
+ if (!this.isAllowed(ctx.chat.id)) {
1766
+ await ctx.reply("\u26D4 Not authorized. Send /start first.");
1767
+ return;
1768
+ }
1769
+ const args2 = ctx.message.text.replace("/learn", "").trim();
1770
+ const { getLearner } = await import("./learner-KH3TFTD7.js");
1771
+ const learner = getLearner();
1772
+ await learner.load();
1773
+ if (args2 === "clear") {
1774
+ await learner.clearMemory();
1775
+ await ctx.reply("\u{1F5D1}\uFE0F Learned actions cleared");
1776
+ return;
1777
+ }
1778
+ if (args2 === "stats") {
1779
+ const stats = learner.getStats();
1780
+ await ctx.reply(
1781
+ `\u{1F4CA} Learning Stats:
1782
+
1783
+ Total learned: ${stats.memorySize}
1784
+ Total successes: ${stats.totalSuccesses}
1785
+ Total attempts: ${stats.totalAttempts}
1786
+
1787
+ Sources:
1788
+ ` + Object.entries(stats.sourceCounts).map(([source, count]) => `\u2022 ${source}: ${count}`).join("\n")
1789
+ );
1790
+ return;
1791
+ }
1792
+ const learned = learner.getAllLearned().slice(-10);
1793
+ if (learned.length === 0) {
1794
+ await ctx.reply("\u{1F4DA} No learned actions yet\n\nThe agent learns from successful actions and remembers them for next time.");
1795
+ return;
1796
+ }
1797
+ let msg = "\u{1F4DA} Recent Learned Actions:\n\n";
1798
+ learned.forEach((l, i) => {
1799
+ msg += `${i + 1}. ${l.goal.slice(0, 40)}
1800
+ `;
1801
+ msg += ` \u2192 ${l.actionType}: ${l.actionValue.slice(0, 30)}
1802
+ `;
1803
+ msg += ` (${l.successCount}\u2705 ${l.failCount}\u274C from ${l.source})
1804
+
1805
+ `;
1806
+ });
1807
+ await ctx.reply(msg);
1808
+ });
2705
1809
  this.bot.command("task", async (ctx) => {
2706
1810
  if (!this.isAllowed(ctx.chat.id)) {
2707
1811
  await ctx.reply("\u26D4 Not authorized. Send /start first.");
@@ -2824,8 +1928,8 @@ ${result.error}
2824
1928
  this.emit("error", new Error(errorMsg));
2825
1929
  }
2826
1930
  });
2827
- this.bot.catch((err2) => {
2828
- this.emit("error", err2);
1931
+ this.bot.catch((err) => {
1932
+ this.emit("error", err);
2829
1933
  });
2830
1934
  }
2831
1935
  /**
@@ -2967,15 +2071,15 @@ function useTelegram(onMessage) {
2967
2071
  setLastMessage(msg);
2968
2072
  onMessageRef.current?.(msg);
2969
2073
  });
2970
- bot.on("error", (err2) => {
2971
- setError(err2.message);
2074
+ bot.on("error", (err) => {
2075
+ setError(err.message);
2972
2076
  });
2973
2077
  await bot.start();
2974
2078
  setIsEnabled(true);
2975
- } catch (err2) {
2976
- const errorMsg = err2 instanceof Error ? err2.message : "Failed to start Telegram bot";
2079
+ } catch (err) {
2080
+ const errorMsg = err instanceof Error ? err.message : "Failed to start Telegram bot";
2977
2081
  setError(errorMsg);
2978
- throw err2;
2082
+ throw err;
2979
2083
  } finally {
2980
2084
  setIsStarting(false);
2981
2085
  }
@@ -2986,10 +2090,10 @@ function useTelegram(onMessage) {
2986
2090
  const bot = getTelegramBot();
2987
2091
  await bot.stop();
2988
2092
  setIsEnabled(false);
2989
- } catch (err2) {
2990
- const errorMsg = err2 instanceof Error ? err2.message : "Failed to stop Telegram bot";
2093
+ } catch (err) {
2094
+ const errorMsg = err instanceof Error ? err.message : "Failed to stop Telegram bot";
2991
2095
  setError(errorMsg);
2992
- throw err2;
2096
+ throw err;
2993
2097
  }
2994
2098
  }, [isEnabled]);
2995
2099
  const toggle = useCallback3(async () => {
@@ -3030,10 +2134,10 @@ function useTasks(onProgress) {
3030
2134
  });
3031
2135
  setCurrentTask(result);
3032
2136
  return result;
3033
- } catch (err2) {
3034
- const errorMsg = err2 instanceof Error ? err2.message : "Task failed";
2137
+ } catch (err) {
2138
+ const errorMsg = err instanceof Error ? err.message : "Task failed";
3035
2139
  setError(errorMsg);
3036
- throw err2;
2140
+ throw err;
3037
2141
  } finally {
3038
2142
  setIsRunning(false);
3039
2143
  setCurrentStep(null);
@@ -3175,7 +2279,7 @@ Type /help for commands`);
3175
2279
  chat2.addSystemMessage(`\u{1F5A5}\uFE0F Screen:
3176
2280
 
3177
2281
  ${description}`);
3178
- } catch (err2) {
2282
+ } catch (err) {
3179
2283
  chat2.addSystemMessage(`\u274C ${vision.error || "Vision failed"}`);
3180
2284
  } finally {
3181
2285
  setStatus("Ready");
@@ -3331,7 +2435,7 @@ async function main() {
3331
2435
  case "config": {
3332
2436
  const subcommand = args[1];
3333
2437
  if (!subcommand) {
3334
- const { ProviderSelector: ProviderSelector2 } = await import("./ProviderSelector-MXRZFAOB.js");
2438
+ const { ProviderSelector: ProviderSelector2 } = await import("./ProviderSelector-GZYF26LL.js");
3335
2439
  const { Box: Box7 } = await import("ink");
3336
2440
  render(
3337
2441
  /* @__PURE__ */ jsx7(Box7, { flexDirection: "column", padding: 1, children: /* @__PURE__ */ jsx7(
@@ -3447,7 +2551,7 @@ ${dim}GitHub: https://github.com/projectservan8n/C-napse${reset}
3447
2551
  process.exit(0);
3448
2552
  }
3449
2553
  case "init": {
3450
- const { ProviderSelector: ProviderSelector2 } = await import("./ProviderSelector-MXRZFAOB.js");
2554
+ const { ProviderSelector: ProviderSelector2 } = await import("./ProviderSelector-GZYF26LL.js");
3451
2555
  const { Box: Box7, Text: Text7 } = await import("ink");
3452
2556
  render(
3453
2557
  /* @__PURE__ */ jsxs6(Box7, { flexDirection: "column", padding: 1, children: [