@projectservan8n/cnapse 0.8.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,12 +1,59 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
- ProviderSelector,
3
+ ProviderSelector
4
+ } from "./chunk-7SDY7OPA.js";
5
+ import {
6
+ askAI,
7
+ getPageText,
8
+ googleDocsType,
9
+ googleSheetsType,
10
+ openGoogleDoc,
11
+ openGoogleSheet,
12
+ openUrl,
13
+ research,
14
+ scroll,
15
+ sendGmail,
16
+ sendOutlook,
17
+ webSearch
18
+ } from "./chunk-MOKGR7WE.js";
19
+ import {
20
+ chat,
21
+ chatWithVision
22
+ } from "./chunk-GP73OJCZ.js";
23
+ import {
24
+ clickMouse,
25
+ closeWindow,
26
+ doubleClick,
27
+ findAndClick,
28
+ focusWindow,
29
+ getActiveWindow,
30
+ getMousePosition,
31
+ keyCombo,
32
+ listDir,
33
+ listWindows,
34
+ maximizeWindow,
35
+ minimizeWindow,
36
+ moveMouse,
37
+ pressKey,
38
+ readFile,
39
+ restoreWindow,
40
+ runCommand,
41
+ scrollMouse,
42
+ typeText,
43
+ typeTextHuman,
44
+ writeFile
45
+ } from "./chunk-TFHK5CYF.js";
46
+ import {
47
+ captureScreenshot,
48
+ describeScreen
49
+ } from "./chunk-OIVTPXE4.js";
50
+ import {
4
51
  getApiKey,
5
52
  getConfig,
6
53
  setApiKey,
7
54
  setModel,
8
55
  setProvider
9
- } from "./chunk-OPX7FFL6.js";
56
+ } from "./chunk-COKO6V5J.js";
10
57
 
11
58
  // src/index.tsx
12
59
  import { render } from "ink";
@@ -194,580 +241,6 @@ function HelpMenu({ onClose, onSelect }) {
194
241
 
195
242
  // src/hooks/useChat.ts
196
243
  import { useState as useState2, useCallback, useRef, useEffect } from "react";
197
-
198
- // src/lib/system.ts
199
- import os from "os";
200
- import { exec } from "child_process";
201
- import { promisify } from "util";
202
- var execAsync = promisify(exec);
203
- var cachedSystemInfo = null;
204
- async function getSystemInfo() {
205
- if (cachedSystemInfo) return cachedSystemInfo;
206
- const platform = os.platform();
207
- const cpus = os.cpus();
208
- let osName = platform;
209
- const osVersion = os.release();
210
- if (platform === "win32") {
211
- try {
212
- const { stdout } = await execAsync("wmic os get Caption /value", { timeout: 5e3 });
213
- const match = stdout.match(/Caption=(.+)/);
214
- if (match) osName = match[1].trim();
215
- } catch {
216
- osName = `Windows ${osVersion}`;
217
- }
218
- } else if (platform === "darwin") {
219
- try {
220
- const { stdout } = await execAsync("sw_vers -productName && sw_vers -productVersion", { timeout: 5e3 });
221
- const lines = stdout.trim().split("\n");
222
- osName = `${lines[0]} ${lines[1]}`;
223
- } catch {
224
- osName = `macOS ${osVersion}`;
225
- }
226
- } else if (platform === "linux") {
227
- try {
228
- const { stdout } = await execAsync("cat /etc/os-release | grep PRETTY_NAME", { timeout: 5e3 });
229
- const match = stdout.match(/PRETTY_NAME="(.+)"/);
230
- if (match) osName = match[1];
231
- } catch {
232
- osName = `Linux ${osVersion}`;
233
- }
234
- }
235
- cachedSystemInfo = {
236
- platform,
237
- osName,
238
- osVersion,
239
- arch: os.arch(),
240
- cpuModel: cpus[0]?.model || "Unknown CPU",
241
- cpuCores: cpus.length,
242
- totalMemoryGB: Math.round(os.totalmem() / 1024 ** 3 * 10) / 10,
243
- freeMemoryGB: Math.round(os.freemem() / 1024 ** 3 * 10) / 10,
244
- username: os.userInfo().username,
245
- hostname: os.hostname(),
246
- homeDir: os.homedir(),
247
- shell: process.env.SHELL || process.env.COMSPEC || "unknown"
248
- };
249
- return cachedSystemInfo;
250
- }
251
- async function getSystemContext() {
252
- const info = await getSystemInfo();
253
- return `SYSTEM INFO:
254
- - OS: ${info.osName} (${info.arch})
255
- - CPU: ${info.cpuModel} (${info.cpuCores} cores)
256
- - RAM: ${info.totalMemoryGB}GB total, ${info.freeMemoryGB}GB free
257
- - User: ${info.username}@${info.hostname}
258
- - Home: ${info.homeDir}
259
- - Shell: ${info.shell}`;
260
- }
261
- function getCwd() {
262
- return process.cwd();
263
- }
264
-
265
- // src/lib/api.ts
266
- var BASE_PROMPT = `You are C-napse, an AI assistant for PC automation running on the user's desktop.
267
- You have access to their system and can help with coding, file management, shell commands, and more.
268
-
269
- When responding:
270
- - Be direct and practical
271
- - Use markdown formatting for code blocks
272
- - If asked to do something, explain what you'll do first
273
- - Give commands specific to the user's OS (use the system info below)
274
- - Be aware of the user's current working directory`;
275
- var systemContextCache = null;
276
- async function getSystemPrompt() {
277
- if (!systemContextCache) {
278
- systemContextCache = await getSystemContext();
279
- }
280
- const cwd = getCwd();
281
- return `${BASE_PROMPT}
282
-
283
- ${systemContextCache}
284
- - Current directory: ${cwd}`;
285
- }
286
- async function chat(messages, systemPrompt) {
287
- const config = getConfig();
288
- const finalPrompt = systemPrompt || await getSystemPrompt();
289
- const allMessages = [
290
- { role: "system", content: finalPrompt },
291
- ...messages
292
- ];
293
- switch (config.provider) {
294
- case "openrouter":
295
- return chatOpenRouter(allMessages, config.model);
296
- case "ollama":
297
- return chatOllama(allMessages, config.model);
298
- case "anthropic":
299
- return chatAnthropic(allMessages, config.model);
300
- case "openai":
301
- return chatOpenAI(allMessages, config.model);
302
- default:
303
- throw new Error(`Unknown provider: ${config.provider}`);
304
- }
305
- }
306
- async function chatOpenRouter(messages, model) {
307
- const apiKey = getApiKey("openrouter");
308
- if (!apiKey) {
309
- throw new Error("OpenRouter API key not configured. Run: cnapse auth openrouter <key>");
310
- }
311
- const config = getConfig();
312
- const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
313
- method: "POST",
314
- headers: {
315
- "Authorization": `Bearer ${apiKey}`,
316
- "Content-Type": "application/json",
317
- "HTTP-Referer": config.openrouter.siteUrl,
318
- "X-Title": config.openrouter.appName
319
- },
320
- body: JSON.stringify({
321
- model,
322
- messages,
323
- max_tokens: 2048,
324
- temperature: 0.7
325
- })
326
- });
327
- if (!response.ok) {
328
- const error = await response.text();
329
- throw new Error(`OpenRouter error: ${response.status} - ${error}`);
330
- }
331
- const data = await response.json();
332
- const content = data.choices?.[0]?.message?.content || "";
333
- return { content, model };
334
- }
335
- async function chatOllama(messages, model) {
336
- const config = getConfig();
337
- const response = await fetch(`${config.ollamaHost}/api/chat`, {
338
- method: "POST",
339
- headers: { "Content-Type": "application/json" },
340
- body: JSON.stringify({
341
- model,
342
- messages,
343
- stream: false
344
- })
345
- });
346
- if (!response.ok) {
347
- const error = await response.text();
348
- throw new Error(`Ollama error: ${response.status} - ${error}`);
349
- }
350
- const data = await response.json();
351
- const content = data.message?.content || "";
352
- return { content, model };
353
- }
354
- async function chatAnthropic(messages, model) {
355
- const apiKey = getApiKey("anthropic");
356
- if (!apiKey) {
357
- throw new Error("Anthropic API key not configured. Run: cnapse auth anthropic <key>");
358
- }
359
- const systemMsg = messages.find((m) => m.role === "system");
360
- const chatMessages = messages.filter((m) => m.role !== "system");
361
- const response = await fetch("https://api.anthropic.com/v1/messages", {
362
- method: "POST",
363
- headers: {
364
- "x-api-key": apiKey,
365
- "Content-Type": "application/json",
366
- "anthropic-version": "2023-06-01"
367
- },
368
- body: JSON.stringify({
369
- model,
370
- max_tokens: 2048,
371
- system: systemMsg?.content || "",
372
- messages: chatMessages
373
- })
374
- });
375
- if (!response.ok) {
376
- const error = await response.text();
377
- throw new Error(`Anthropic error: ${response.status} - ${error}`);
378
- }
379
- const data = await response.json();
380
- const content = data.content?.[0]?.text || "";
381
- return { content, model };
382
- }
383
- async function chatOpenAI(messages, model) {
384
- const apiKey = getApiKey("openai");
385
- if (!apiKey) {
386
- throw new Error("OpenAI API key not configured. Run: cnapse auth openai <key>");
387
- }
388
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
389
- method: "POST",
390
- headers: {
391
- "Authorization": `Bearer ${apiKey}`,
392
- "Content-Type": "application/json"
393
- },
394
- body: JSON.stringify({
395
- model,
396
- messages,
397
- max_tokens: 2048,
398
- temperature: 0.7
399
- })
400
- });
401
- if (!response.ok) {
402
- const error = await response.text();
403
- throw new Error(`OpenAI error: ${response.status} - ${error}`);
404
- }
405
- const data = await response.json();
406
- const content = data.choices?.[0]?.message?.content || "";
407
- return { content, model };
408
- }
409
- async function chatWithVision(messages, screenshotBase64) {
410
- const config = getConfig();
411
- const systemPrompt = await getSystemPrompt();
412
- const visionPrompt = systemPrompt + "\n\nYou can see the user's screen. Describe what you see and help them with their request.";
413
- switch (config.provider) {
414
- case "openrouter":
415
- return chatWithVisionOpenRouter(messages, screenshotBase64, visionPrompt);
416
- case "ollama":
417
- return chatWithVisionOllama(messages, screenshotBase64, visionPrompt);
418
- case "anthropic":
419
- return chatWithVisionAnthropic(messages, screenshotBase64, visionPrompt);
420
- case "openai":
421
- return chatWithVisionOpenAI(messages, screenshotBase64, visionPrompt);
422
- default:
423
- throw new Error(`Vision not supported for provider: ${config.provider}`);
424
- }
425
- }
426
- async function chatWithVisionOpenRouter(messages, screenshot, systemPrompt) {
427
- const apiKey = getApiKey("openrouter");
428
- if (!apiKey) throw new Error("OpenRouter API key not configured");
429
- const config = getConfig();
430
- let model = config.model;
431
- if (!model.includes("gpt-5") && !model.includes("claude") && !model.includes("gemini")) {
432
- model = "openai/gpt-5-nano";
433
- }
434
- const lastUserIdx = messages.length - 1;
435
- const visionMessages = messages.map((m, i) => {
436
- if (i === lastUserIdx && m.role === "user") {
437
- return {
438
- role: "user",
439
- content: [
440
- { type: "text", text: m.content },
441
- { type: "image_url", image_url: { url: `data:image/png;base64,${screenshot}` } }
442
- ]
443
- };
444
- }
445
- return m;
446
- });
447
- const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
448
- method: "POST",
449
- headers: {
450
- "Authorization": `Bearer ${apiKey}`,
451
- "Content-Type": "application/json",
452
- "HTTP-Referer": config.openrouter.siteUrl,
453
- "X-Title": config.openrouter.appName
454
- },
455
- body: JSON.stringify({
456
- model,
457
- messages: [{ role: "system", content: systemPrompt }, ...visionMessages],
458
- max_tokens: 2048
459
- })
460
- });
461
- if (!response.ok) {
462
- const error = await response.text();
463
- throw new Error(`OpenRouter vision error: ${response.status} - ${error}`);
464
- }
465
- const data = await response.json();
466
- return { content: data.choices?.[0]?.message?.content || "", model };
467
- }
468
- async function chatWithVisionOllama(messages, screenshot, systemPrompt) {
469
- const config = getConfig();
470
- const visionModels = ["llava", "llama3.2-vision", "bakllava"];
471
- const model = visionModels.find((m) => config.model.includes(m)) || "llava";
472
- const lastUserMsg = messages.filter((m) => m.role === "user").pop();
473
- const response = await fetch(`${config.ollamaHost}/api/generate`, {
474
- method: "POST",
475
- headers: { "Content-Type": "application/json" },
476
- body: JSON.stringify({
477
- model,
478
- prompt: `${systemPrompt}
479
-
480
- User: ${lastUserMsg?.content || "What do you see?"}`,
481
- images: [screenshot],
482
- stream: false
483
- })
484
- });
485
- if (!response.ok) {
486
- const error = await response.text();
487
- throw new Error(`Ollama vision error: ${error}`);
488
- }
489
- const data = await response.json();
490
- return { content: data.response || "", model };
491
- }
492
- async function chatWithVisionAnthropic(messages, screenshot, systemPrompt) {
493
- const apiKey = getApiKey("anthropic");
494
- if (!apiKey) throw new Error("Anthropic API key not configured");
495
- const chatMessages = messages.filter((m) => m.role !== "system");
496
- const lastUserIdx = chatMessages.length - 1;
497
- const visionMessages = chatMessages.map((m, i) => {
498
- if (i === lastUserIdx && m.role === "user") {
499
- return {
500
- role: "user",
501
- content: [
502
- { type: "image", source: { type: "base64", media_type: "image/png", data: screenshot } },
503
- { type: "text", text: m.content }
504
- ]
505
- };
506
- }
507
- return { role: m.role, content: m.content };
508
- });
509
- const response = await fetch("https://api.anthropic.com/v1/messages", {
510
- method: "POST",
511
- headers: {
512
- "x-api-key": apiKey,
513
- "anthropic-version": "2023-06-01",
514
- "Content-Type": "application/json"
515
- },
516
- body: JSON.stringify({
517
- model: "claude-3-5-sonnet-20241022",
518
- max_tokens: 2048,
519
- system: systemPrompt,
520
- messages: visionMessages
521
- })
522
- });
523
- if (!response.ok) {
524
- const error = await response.text();
525
- throw new Error(`Anthropic vision error: ${error}`);
526
- }
527
- const data = await response.json();
528
- return { content: data.content?.[0]?.text || "", model: "claude-3-5-sonnet-20241022" };
529
- }
530
- async function chatWithVisionOpenAI(messages, screenshot, systemPrompt) {
531
- const apiKey = getApiKey("openai");
532
- if (!apiKey) throw new Error("OpenAI API key not configured");
533
- const lastUserIdx = messages.length - 1;
534
- const visionMessages = messages.map((m, i) => {
535
- if (i === lastUserIdx && m.role === "user") {
536
- return {
537
- role: "user",
538
- content: [
539
- { type: "text", text: m.content },
540
- { type: "image_url", image_url: { url: `data:image/png;base64,${screenshot}` } }
541
- ]
542
- };
543
- }
544
- return m;
545
- });
546
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
547
- method: "POST",
548
- headers: {
549
- "Authorization": `Bearer ${apiKey}`,
550
- "Content-Type": "application/json"
551
- },
552
- body: JSON.stringify({
553
- model: "gpt-4o",
554
- messages: [{ role: "system", content: systemPrompt }, ...visionMessages],
555
- max_tokens: 2048
556
- })
557
- });
558
- if (!response.ok) {
559
- const error = await response.text();
560
- throw new Error(`OpenAI vision error: ${error}`);
561
- }
562
- const data = await response.json();
563
- return { content: data.choices?.[0]?.message?.content || "", model: "gpt-4o" };
564
- }
565
-
566
- // src/lib/vision.ts
567
- async function describeScreen() {
568
- const screenshot = await captureScreenshot();
569
- if (!screenshot) {
570
- throw new Error("Failed to capture screenshot");
571
- }
572
- const config = getConfig();
573
- const description = await analyzeWithVision(screenshot, config.provider);
574
- return { description, screenshot };
575
- }
576
- async function captureScreenshot() {
577
- try {
578
- const screenshotDesktop = await import("screenshot-desktop");
579
- const buffer = await screenshotDesktop.default({ format: "png" });
580
- return buffer.toString("base64");
581
- } catch {
582
- return captureScreenFallback();
583
- }
584
- }
585
- async function captureScreenFallback() {
586
- const { exec: exec5 } = await import("child_process");
587
- const { promisify: promisify5 } = await import("util");
588
- const { tmpdir } = await import("os");
589
- const { join: join4 } = await import("path");
590
- const { readFile: readFile2, unlink } = await import("fs/promises");
591
- const execAsync5 = promisify5(exec5);
592
- const tempFile = join4(tmpdir(), `cnapse-screen-${Date.now()}.png`);
593
- try {
594
- const platform = process.platform;
595
- if (platform === "win32") {
596
- await execAsync5(`
597
- Add-Type -AssemblyName System.Windows.Forms
598
- $screen = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds
599
- $bitmap = New-Object System.Drawing.Bitmap($screen.Width, $screen.Height)
600
- $graphics = [System.Drawing.Graphics]::FromImage($bitmap)
601
- $graphics.CopyFromScreen($screen.Location, [System.Drawing.Point]::Empty, $screen.Size)
602
- $bitmap.Save("${tempFile.replace(/\\/g, "\\\\")}")
603
- $graphics.Dispose()
604
- $bitmap.Dispose()
605
- `, { shell: "powershell.exe" });
606
- } else if (platform === "darwin") {
607
- await execAsync5(`screencapture -x "${tempFile}"`);
608
- } else {
609
- await execAsync5(`gnome-screenshot -f "${tempFile}" 2>/dev/null || scrot "${tempFile}" 2>/dev/null || import -window root "${tempFile}"`);
610
- }
611
- const imageBuffer = await readFile2(tempFile);
612
- await unlink(tempFile).catch(() => {
613
- });
614
- return imageBuffer.toString("base64");
615
- } catch {
616
- return null;
617
- }
618
- }
619
- async function analyzeWithVision(base64Image, provider) {
620
- const prompt = `Look at this screenshot and describe:
621
- 1. What application or window is visible
622
- 2. Key UI elements you can see (buttons, text fields, menus)
623
- 3. What the user appears to be doing or could do next
624
- 4. Any notable content or state
625
-
626
- Be concise but helpful.`;
627
- switch (provider) {
628
- case "ollama":
629
- return analyzeWithOllama(base64Image, prompt);
630
- case "openrouter":
631
- return analyzeWithOpenRouter(base64Image, prompt);
632
- case "anthropic":
633
- return analyzeWithAnthropic(base64Image, prompt);
634
- case "openai":
635
- return analyzeWithOpenAI(base64Image, prompt);
636
- default:
637
- throw new Error(`Vision not supported for provider: ${provider}`);
638
- }
639
- }
640
- async function analyzeWithOllama(base64Image, prompt) {
641
- const config = getConfig();
642
- const ollamaHost = config.ollamaHost || "http://localhost:11434";
643
- const visionModels = ["llava", "llama3.2-vision", "bakllava", "llava-llama3"];
644
- const model = visionModels.find((m) => config.model.includes(m)) || "llava";
645
- const response = await fetch(`${ollamaHost}/api/generate`, {
646
- method: "POST",
647
- headers: { "Content-Type": "application/json" },
648
- body: JSON.stringify({
649
- model,
650
- prompt,
651
- images: [base64Image],
652
- stream: false
653
- })
654
- });
655
- if (!response.ok) {
656
- const text = await response.text();
657
- throw new Error(`Ollama vision error: ${text}`);
658
- }
659
- const data = await response.json();
660
- return data.response || "Unable to analyze image";
661
- }
662
- async function analyzeWithOpenRouter(base64Image, prompt) {
663
- const apiKey = getApiKey("openrouter");
664
- if (!apiKey) throw new Error("OpenRouter API key not configured");
665
- const model = "anthropic/claude-3-5-sonnet";
666
- const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
667
- method: "POST",
668
- headers: {
669
- "Authorization": `Bearer ${apiKey}`,
670
- "Content-Type": "application/json",
671
- "HTTP-Referer": "https://c-napse.up.railway.app",
672
- "X-Title": "C-napse"
673
- },
674
- body: JSON.stringify({
675
- model,
676
- messages: [
677
- {
678
- role: "user",
679
- content: [
680
- { type: "text", text: prompt },
681
- {
682
- type: "image_url",
683
- image_url: { url: `data:image/png;base64,${base64Image}` }
684
- }
685
- ]
686
- }
687
- ],
688
- max_tokens: 1e3
689
- })
690
- });
691
- if (!response.ok) {
692
- const text = await response.text();
693
- throw new Error(`OpenRouter vision error: ${text}`);
694
- }
695
- const data = await response.json();
696
- return data.choices?.[0]?.message?.content || "Unable to analyze image";
697
- }
698
- async function analyzeWithAnthropic(base64Image, prompt) {
699
- const apiKey = getApiKey("anthropic");
700
- if (!apiKey) throw new Error("Anthropic API key not configured");
701
- const response = await fetch("https://api.anthropic.com/v1/messages", {
702
- method: "POST",
703
- headers: {
704
- "x-api-key": apiKey,
705
- "anthropic-version": "2023-06-01",
706
- "Content-Type": "application/json"
707
- },
708
- body: JSON.stringify({
709
- model: "claude-3-5-sonnet-20241022",
710
- max_tokens: 1e3,
711
- messages: [
712
- {
713
- role: "user",
714
- content: [
715
- {
716
- type: "image",
717
- source: {
718
- type: "base64",
719
- media_type: "image/png",
720
- data: base64Image
721
- }
722
- },
723
- { type: "text", text: prompt }
724
- ]
725
- }
726
- ]
727
- })
728
- });
729
- if (!response.ok) {
730
- const text = await response.text();
731
- throw new Error(`Anthropic vision error: ${text}`);
732
- }
733
- const data = await response.json();
734
- return data.content?.[0]?.text || "Unable to analyze image";
735
- }
736
- async function analyzeWithOpenAI(base64Image, prompt) {
737
- const apiKey = getApiKey("openai");
738
- if (!apiKey) throw new Error("OpenAI API key not configured");
739
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
740
- method: "POST",
741
- headers: {
742
- "Authorization": `Bearer ${apiKey}`,
743
- "Content-Type": "application/json"
744
- },
745
- body: JSON.stringify({
746
- model: "gpt-4-vision-preview",
747
- messages: [
748
- {
749
- role: "user",
750
- content: [
751
- { type: "text", text: prompt },
752
- {
753
- type: "image_url",
754
- image_url: { url: `data:image/png;base64,${base64Image}` }
755
- }
756
- ]
757
- }
758
- ],
759
- max_tokens: 1e3
760
- })
761
- });
762
- if (!response.ok) {
763
- const text = await response.text();
764
- throw new Error(`OpenAI vision error: ${text}`);
765
- }
766
- const data = await response.json();
767
- return data.choices?.[0]?.message?.content || "Unable to analyze image";
768
- }
769
-
770
- // src/hooks/useChat.ts
771
244
  var WELCOME_MESSAGE = {
772
245
  id: "0",
773
246
  role: "system",
@@ -810,1028 +283,100 @@ function useChat(screenWatch = false) {
810
283
  timestamp: /* @__PURE__ */ new Date(),
811
284
  isStreaming: true
812
285
  };
813
- setMessages((prev) => [...prev, userMsg, assistantMsg]);
814
- setIsProcessing(true);
815
- try {
816
- const apiMessages = messages.filter((m) => m.role === "user" || m.role === "assistant").slice(-10).map((m) => ({ role: m.role, content: m.content }));
817
- apiMessages.push({ role: "user", content });
818
- let response;
819
- if (screenWatchRef.current) {
820
- const screenshot = await captureScreenshot();
821
- if (screenshot) {
822
- response = await chatWithVision(apiMessages, screenshot);
823
- } else {
824
- response = await chat(apiMessages);
825
- }
826
- } else {
827
- response = await chat(apiMessages);
828
- }
829
- setMessages(
830
- (prev) => prev.map(
831
- (m) => m.id === assistantId ? { ...m, content: response.content || "(no response)", isStreaming: false } : m
832
- )
833
- );
834
- } catch (err2) {
835
- const errorMsg = err2 instanceof Error ? err2.message : "Unknown error";
836
- setError(errorMsg);
837
- setMessages(
838
- (prev) => prev.map(
839
- (m) => m.id === assistantId ? { ...m, content: `Error: ${errorMsg}`, isStreaming: false } : m
840
- )
841
- );
842
- } finally {
843
- setIsProcessing(false);
844
- }
845
- }, [messages, isProcessing]);
846
- const clearMessages = useCallback(() => {
847
- setMessages([WELCOME_MESSAGE]);
848
- setError(null);
849
- }, []);
850
- return {
851
- messages,
852
- isProcessing,
853
- error,
854
- sendMessage,
855
- addSystemMessage,
856
- clearMessages
857
- };
858
- }
859
-
860
- // src/hooks/useVision.ts
861
- import { useState as useState3, useCallback as useCallback2 } from "react";
862
- function useVision() {
863
- const [isAnalyzing, setIsAnalyzing] = useState3(false);
864
- const [lastDescription, setLastDescription] = useState3(null);
865
- const [lastScreenshot, setLastScreenshot] = useState3(null);
866
- const [error, setError] = useState3(null);
867
- const analyze = useCallback2(async () => {
868
- setIsAnalyzing(true);
869
- setError(null);
870
- try {
871
- const result = await describeScreen();
872
- setLastDescription(result.description);
873
- setLastScreenshot(result.screenshot);
874
- return result.description;
875
- } catch (err2) {
876
- const errorMsg = err2 instanceof Error ? err2.message : "Vision analysis failed";
877
- setError(errorMsg);
878
- throw err2;
879
- } finally {
880
- setIsAnalyzing(false);
881
- }
882
- }, []);
883
- return {
884
- isAnalyzing,
885
- lastDescription,
886
- lastScreenshot,
887
- error,
888
- analyze
889
- };
890
- }
891
-
892
- // src/hooks/useTelegram.ts
893
- import { useState as useState4, useCallback as useCallback3, useEffect as useEffect2, useRef as useRef2 } from "react";
894
-
895
- // src/services/telegram.ts
896
- import { EventEmitter } from "events";
897
-
898
- // src/tools/shell.ts
899
- import { exec as exec4 } from "child_process";
900
- import { promisify as promisify4 } from "util";
901
-
902
- // src/tools/filesystem.ts
903
- import { promises as fs } from "fs";
904
- import { join, dirname } from "path";
905
- async function readFile(path3) {
906
- try {
907
- const content = await fs.readFile(path3, "utf-8");
908
- return ok(content);
909
- } catch (error) {
910
- return err(`Failed to read file: ${error.message}`);
911
- }
912
- }
913
- async function writeFile(path3, content) {
914
- try {
915
- const dir = dirname(path3);
916
- await fs.mkdir(dir, { recursive: true });
917
- await fs.writeFile(path3, content, "utf-8");
918
- return ok(`Written ${content.length} bytes to ${path3}`);
919
- } catch (error) {
920
- return err(`Failed to write file: ${error.message}`);
921
- }
922
- }
923
- async function listDir(path3, recursive = false) {
924
- try {
925
- const stat = await fs.stat(path3);
926
- if (!stat.isDirectory()) {
927
- return err(`Not a directory: ${path3}`);
928
- }
929
- const entries = [];
930
- async function walkDir(dir, prefix) {
931
- const items = await fs.readdir(dir, { withFileTypes: true });
932
- for (const item of items) {
933
- const displayPath = prefix ? `${prefix}/${item.name}` : item.name;
934
- if (item.isDirectory()) {
935
- entries.push(`${displayPath}/`);
936
- if (recursive) {
937
- await walkDir(join(dir, item.name), displayPath);
938
- }
939
- } else {
940
- entries.push(displayPath);
941
- }
942
- }
943
- }
944
- await walkDir(path3, "");
945
- entries.sort();
946
- return ok(entries.join("\n"));
947
- } catch (error) {
948
- return err(`Failed to list directory: ${error.message}`);
949
- }
950
- }
951
-
952
- // src/tools/clipboard.ts
953
- import clipboardy from "clipboardy";
954
-
955
- // src/tools/process.ts
956
- import { exec as exec2 } from "child_process";
957
- import { promisify as promisify2 } from "util";
958
- var execAsync2 = promisify2(exec2);
959
-
960
- // src/tools/computer.ts
961
- import { exec as exec3 } from "child_process";
962
- import { promisify as promisify3 } from "util";
963
- var execAsync3 = promisify3(exec3);
964
- async function moveMouse(x, y) {
965
- try {
966
- if (process.platform === "win32") {
967
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${x}, ${y})"`, { shell: "cmd.exe" });
968
- } else if (process.platform === "darwin") {
969
- await execAsync3(`cliclick m:${x},${y}`);
970
- } else {
971
- await execAsync3(`xdotool mousemove ${x} ${y}`);
972
- }
973
- return ok(`Mouse moved to (${x}, ${y})`);
974
- } catch (error) {
975
- return err(`Failed to move mouse: ${error instanceof Error ? error.message : "Unknown error"}`);
976
- }
977
- }
978
- async function clickMouse(button = "left") {
979
- try {
980
- if (process.platform === "win32") {
981
- const script = `
982
- Add-Type -MemberDefinition @"
983
- [DllImport("user32.dll",CharSet=CharSet.Auto,CallingConvention=CallingConvention.StdCall)]
984
- public static extern void mouse_event(long dwFlags, long dx, long dy, long cButtons, long dwExtraInfo);
985
- "@ -Name Mouse -Namespace Win32
986
- ${button === "left" ? "[Win32.Mouse]::mouse_event(0x02, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x04, 0, 0, 0, 0)" : button === "right" ? "[Win32.Mouse]::mouse_event(0x08, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x10, 0, 0, 0, 0)" : "[Win32.Mouse]::mouse_event(0x20, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x40, 0, 0, 0, 0)"}`;
987
- await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
988
- } else if (process.platform === "darwin") {
989
- await execAsync3(`cliclick c:.`);
990
- } else {
991
- const btn = button === "left" ? "1" : button === "right" ? "3" : "2";
992
- await execAsync3(`xdotool click ${btn}`);
993
- }
994
- return ok(`Clicked ${button} button`);
995
- } catch (error) {
996
- return err(`Failed to click: ${error instanceof Error ? error.message : "Unknown error"}`);
997
- }
998
- }
999
- async function doubleClick() {
1000
- try {
1001
- if (process.platform === "win32") {
1002
- const script = `
1003
- Add-Type -MemberDefinition @"
1004
- [DllImport("user32.dll",CharSet=CharSet.Auto,CallingConvention=CallingConvention.StdCall)]
1005
- public static extern void mouse_event(long dwFlags, long dx, long dy, long cButtons, long dwExtraInfo);
1006
- "@ -Name Mouse -Namespace Win32
1007
- [Win32.Mouse]::mouse_event(0x02, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x04, 0, 0, 0, 0)
1008
- Start-Sleep -Milliseconds 50
1009
- [Win32.Mouse]::mouse_event(0x02, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x04, 0, 0, 0, 0)`;
1010
- await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
1011
- } else if (process.platform === "darwin") {
1012
- await execAsync3(`cliclick dc:.`);
1013
- } else {
1014
- await execAsync3(`xdotool click --repeat 2 --delay 50 1`);
1015
- }
1016
- return ok("Double clicked");
1017
- } catch (error) {
1018
- return err(`Failed to double click: ${error instanceof Error ? error.message : "Unknown error"}`);
1019
- }
1020
- }
1021
- async function typeText(text) {
1022
- try {
1023
- if (process.platform === "win32") {
1024
- const escapedText = text.replace(/'/g, "''").replace(/[+^%~(){}[\]]/g, "{$&}");
1025
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${escapedText}')"`, { shell: "cmd.exe" });
1026
- } else if (process.platform === "darwin") {
1027
- const escaped = text.replace(/'/g, "'\\''");
1028
- await execAsync3(`osascript -e 'tell application "System Events" to keystroke "${escaped}"'`);
1029
- } else {
1030
- const escaped = text.replace(/'/g, "'\\''");
1031
- await execAsync3(`xdotool type '${escaped}'`);
1032
- }
1033
- return ok(`Typed: ${text}`);
1034
- } catch (error) {
1035
- return err(`Failed to type: ${error instanceof Error ? error.message : "Unknown error"}`);
1036
- }
1037
- }
1038
- async function pressKey(key) {
1039
- try {
1040
- if (process.platform === "win32") {
1041
- const winKeyMap = {
1042
- "enter": "{ENTER}",
1043
- "return": "{ENTER}",
1044
- "escape": "{ESC}",
1045
- "esc": "{ESC}",
1046
- "tab": "{TAB}",
1047
- "space": " ",
1048
- "backspace": "{BACKSPACE}",
1049
- "delete": "{DELETE}",
1050
- "up": "{UP}",
1051
- "down": "{DOWN}",
1052
- "left": "{LEFT}",
1053
- "right": "{RIGHT}",
1054
- "home": "{HOME}",
1055
- "end": "{END}",
1056
- "pageup": "{PGUP}",
1057
- "pagedown": "{PGDN}",
1058
- "f1": "{F1}",
1059
- "f2": "{F2}",
1060
- "f3": "{F3}",
1061
- "f4": "{F4}",
1062
- "f5": "{F5}",
1063
- "f6": "{F6}",
1064
- "f7": "{F7}",
1065
- "f8": "{F8}",
1066
- "f9": "{F9}",
1067
- "f10": "{F10}",
1068
- "f11": "{F11}",
1069
- "f12": "{F12}"
1070
- };
1071
- const winKey = winKeyMap[key.toLowerCase()] || key;
1072
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${winKey}')"`, { shell: "cmd.exe" });
1073
- } else if (process.platform === "darwin") {
1074
- const macKeyMap = {
1075
- "return": 36,
1076
- "enter": 36,
1077
- "escape": 53,
1078
- "esc": 53,
1079
- "tab": 48,
1080
- "space": 49,
1081
- "backspace": 51,
1082
- "delete": 117,
1083
- "up": 126,
1084
- "down": 125,
1085
- "left": 123,
1086
- "right": 124
1087
- };
1088
- const keyCode = macKeyMap[key.toLowerCase()];
1089
- if (keyCode) {
1090
- await execAsync3(`osascript -e 'tell application "System Events" to key code ${keyCode}'`);
1091
- } else {
1092
- await execAsync3(`osascript -e 'tell application "System Events" to keystroke "${key}"'`);
1093
- }
1094
- } else {
1095
- await execAsync3(`xdotool key ${key}`);
1096
- }
1097
- return ok(`Pressed: ${key}`);
1098
- } catch (error) {
1099
- return err(`Failed to press key: ${error instanceof Error ? error.message : "Unknown error"}`);
1100
- }
1101
- }
1102
- async function keyCombo(keys) {
1103
- try {
1104
- if (process.platform === "win32") {
1105
- const hasWin = keys.some((k) => k.toLowerCase() === "meta" || k.toLowerCase() === "win");
1106
- const hasR = keys.some((k) => k.toLowerCase() === "r");
1107
- if (hasWin && hasR) {
1108
- await execAsync3(`powershell -Command "$shell = New-Object -ComObject WScript.Shell; $shell.Run('explorer shell:::{2559a1f3-21d7-11d4-bdaf-00c04f60b9f0}')"`, { shell: "cmd.exe" });
1109
- return ok(`Pressed: ${keys.join("+")}`);
1110
- }
1111
- const modifierMap = {
1112
- "control": "^",
1113
- "ctrl": "^",
1114
- "alt": "%",
1115
- "shift": "+"
1116
- };
1117
- let combo = "";
1118
- const regularKeys = [];
1119
- for (const key of keys) {
1120
- const lower = key.toLowerCase();
1121
- if (modifierMap[lower]) {
1122
- combo += modifierMap[lower];
1123
- } else if (lower !== "meta" && lower !== "win") {
1124
- regularKeys.push(key.toLowerCase());
1125
- }
1126
- }
1127
- combo += regularKeys.join("");
1128
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${combo}')"`, { shell: "cmd.exe" });
1129
- } else if (process.platform === "darwin") {
1130
- const modifiers = keys.filter((k) => ["control", "ctrl", "alt", "shift", "command", "meta"].includes(k.toLowerCase()));
1131
- const regular = keys.filter((k) => !["control", "ctrl", "alt", "shift", "command", "meta"].includes(k.toLowerCase()));
1132
- let cmd = 'tell application "System Events" to keystroke "' + regular.join("") + '"';
1133
- if (modifiers.length > 0) {
1134
- const modMap = {
1135
- "control": "control down",
1136
- "ctrl": "control down",
1137
- "alt": "option down",
1138
- "shift": "shift down",
1139
- "command": "command down",
1140
- "meta": "command down"
1141
- };
1142
- cmd += " using {" + modifiers.map((m) => modMap[m.toLowerCase()]).join(", ") + "}";
1143
- }
1144
- await execAsync3(`osascript -e '${cmd}'`);
1145
- } else {
1146
- await execAsync3(`xdotool key ${keys.join("+")}`);
1147
- }
1148
- return ok(`Pressed: ${keys.join("+")}`);
1149
- } catch (error) {
1150
- return err(`Failed to press combo: ${error instanceof Error ? error.message : "Unknown error"}`);
1151
- }
1152
- }
1153
- async function getActiveWindow() {
1154
- try {
1155
- if (process.platform === "win32") {
1156
- const script = `
1157
- Add-Type @"
1158
- using System;
1159
- using System.Runtime.InteropServices;
1160
- using System.Text;
1161
- public class Win32 {
1162
- [DllImport("user32.dll")]
1163
- public static extern IntPtr GetForegroundWindow();
1164
- [DllImport("user32.dll")]
1165
- public static extern int GetWindowText(IntPtr hWnd, StringBuilder text, int count);
1166
- }
1167
- "@
1168
- $hwnd = [Win32]::GetForegroundWindow()
1169
- $sb = New-Object System.Text.StringBuilder 256
1170
- [Win32]::GetWindowText($hwnd, $sb, 256)
1171
- $sb.ToString()`;
1172
- const { stdout } = await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
1173
- return ok(stdout.trim() || "Unknown window");
1174
- } else if (process.platform === "darwin") {
1175
- const { stdout } = await execAsync3(`osascript -e 'tell application "System Events" to get name of first application process whose frontmost is true'`);
1176
- return ok(stdout.trim());
1177
- } else {
1178
- const { stdout } = await execAsync3(`xdotool getactivewindow getwindowname`);
1179
- return ok(stdout.trim());
1180
- }
1181
- } catch (error) {
1182
- return err(`Failed to get active window: ${error instanceof Error ? error.message : "Unknown error"}`);
1183
- }
1184
- }
1185
- async function listWindows() {
1186
- try {
1187
- if (process.platform === "win32") {
1188
- const { stdout } = await execAsync3(`powershell -Command "Get-Process | Where-Object {$_.MainWindowTitle} | Select-Object ProcessName, MainWindowTitle | Format-Table -AutoSize"`, { shell: "cmd.exe" });
1189
- return ok(stdout);
1190
- } else if (process.platform === "darwin") {
1191
- const { stdout } = await execAsync3(`osascript -e 'tell application "System Events" to get name of every application process whose visible is true'`);
1192
- return ok(stdout);
1193
- } else {
1194
- const { stdout } = await execAsync3(`wmctrl -l`);
1195
- return ok(stdout);
1196
- }
1197
- } catch (error) {
1198
- return err(`Failed to list windows: ${error instanceof Error ? error.message : "Unknown error"}`);
1199
- }
1200
- }
1201
- async function focusWindow(title) {
1202
- try {
1203
- if (process.platform === "win32") {
1204
- const escaped = title.replace(/'/g, "''");
1205
- await execAsync3(`powershell -Command "$wshell = New-Object -ComObject wscript.shell; $wshell.AppActivate('${escaped}')"`, { shell: "cmd.exe" });
1206
- } else if (process.platform === "darwin") {
1207
- await execAsync3(`osascript -e 'tell application "${title}" to activate'`);
1208
- } else {
1209
- await execAsync3(`wmctrl -a "${title}"`);
1210
- }
1211
- return ok(`Focused window: ${title}`);
1212
- } catch (error) {
1213
- return err(`Failed to focus window: ${error instanceof Error ? error.message : "Unknown error"}`);
1214
- }
1215
- }
1216
- async function minimizeWindow(title) {
1217
- try {
1218
- if (process.platform === "win32") {
1219
- if (title) {
1220
- const escaped = title.replace(/'/g, "''");
1221
- const script = `
1222
- $proc = Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
1223
- if ($proc) {
1224
- Add-Type @"
1225
- using System;
1226
- using System.Runtime.InteropServices;
1227
- public class Win32 {
1228
- [DllImport("user32.dll")]
1229
- public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
1230
- }
1231
- "@
1232
- [Win32]::ShowWindow($proc.MainWindowHandle, 6)
1233
- Write-Output "Minimized: $($proc.MainWindowTitle)"
1234
- } else {
1235
- Write-Output "NOT_FOUND"
1236
- }`;
1237
- const { stdout } = await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
1238
- if (stdout.includes("NOT_FOUND")) {
1239
- return err(`Window containing "${title}" not found`);
1240
- }
1241
- return ok(stdout.trim());
1242
- } else {
1243
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('% n')"`, { shell: "cmd.exe" });
1244
- return ok("Minimized active window");
1245
- }
1246
- } else if (process.platform === "darwin") {
1247
- if (title) {
1248
- await execAsync3(`osascript -e 'tell application "${title}" to set miniaturized of window 1 to true'`);
1249
- } else {
1250
- await execAsync3(`osascript -e 'tell application "System Events" to keystroke "m" using command down'`);
1251
- }
1252
- return ok(`Minimized window${title ? `: ${title}` : ""}`);
1253
- } else {
1254
- if (title) {
1255
- await execAsync3(`wmctrl -r "${title}" -b add,hidden`);
1256
- } else {
1257
- await execAsync3(`xdotool getactivewindow windowminimize`);
1258
- }
1259
- return ok(`Minimized window${title ? `: ${title}` : ""}`);
1260
- }
1261
- } catch (error) {
1262
- return err(`Failed to minimize window: ${error instanceof Error ? error.message : "Unknown error"}`);
1263
- }
1264
- }
1265
- async function maximizeWindow(title) {
1266
- try {
1267
- if (process.platform === "win32") {
1268
- if (title) {
1269
- const escaped = title.replace(/'/g, "''");
1270
- const script = `
1271
- $proc = Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
1272
- if ($proc) {
1273
- Add-Type @"
1274
- using System;
1275
- using System.Runtime.InteropServices;
1276
- public class Win32 {
1277
- [DllImport("user32.dll")]
1278
- public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
1279
- }
1280
- "@
1281
- [Win32]::ShowWindow($proc.MainWindowHandle, 3)
1282
- Write-Output "Maximized: $($proc.MainWindowTitle)"
1283
- } else {
1284
- Write-Output "NOT_FOUND"
1285
- }`;
1286
- const { stdout } = await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
1287
- if (stdout.includes("NOT_FOUND")) {
1288
- return err(`Window containing "${title}" not found`);
1289
- }
1290
- return ok(stdout.trim());
1291
- } else {
1292
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('% x')"`, { shell: "cmd.exe" });
1293
- return ok("Maximized active window");
1294
- }
1295
- } else if (process.platform === "darwin") {
1296
- if (title) {
1297
- await execAsync3(`osascript -e 'tell application "${title}" to set zoomed of window 1 to true'`);
1298
- } else {
1299
- await execAsync3(`osascript -e 'tell application "System Events" to keystroke "f" using {control down, command down}'`);
1300
- }
1301
- return ok(`Maximized window${title ? `: ${title}` : ""}`);
1302
- } else {
1303
- if (title) {
1304
- await execAsync3(`wmctrl -r "${title}" -b add,maximized_vert,maximized_horz`);
1305
- } else {
1306
- await execAsync3(`wmctrl -r :ACTIVE: -b add,maximized_vert,maximized_horz`);
1307
- }
1308
- return ok(`Maximized window${title ? `: ${title}` : ""}`);
1309
- }
1310
- } catch (error) {
1311
- return err(`Failed to maximize window: ${error instanceof Error ? error.message : "Unknown error"}`);
1312
- }
1313
- }
1314
- async function closeWindow(title) {
1315
- try {
1316
- if (process.platform === "win32") {
1317
- if (title) {
1318
- const escaped = title.replace(/'/g, "''");
1319
- await execAsync3(`powershell -Command "Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' } | ForEach-Object { $_.CloseMainWindow() }"`, { shell: "cmd.exe" });
1320
- return ok(`Closed window: ${title}`);
1321
- } else {
1322
- await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('%{F4}')"`, { shell: "cmd.exe" });
1323
- return ok("Closed active window");
1324
- }
1325
- } else if (process.platform === "darwin") {
1326
- if (title) {
1327
- await execAsync3(`osascript -e 'tell application "${title}" to close window 1'`);
1328
- } else {
1329
- await execAsync3(`osascript -e 'tell application "System Events" to keystroke "w" using command down'`);
1330
- }
1331
- return ok(`Closed window${title ? `: ${title}` : ""}`);
1332
- } else {
1333
- if (title) {
1334
- await execAsync3(`wmctrl -c "${title}"`);
1335
- } else {
1336
- await execAsync3(`xdotool getactivewindow windowclose`);
1337
- }
1338
- return ok(`Closed window${title ? `: ${title}` : ""}`);
1339
- }
1340
- } catch (error) {
1341
- return err(`Failed to close window: ${error instanceof Error ? error.message : "Unknown error"}`);
1342
- }
1343
- }
1344
- async function restoreWindow(title) {
1345
- try {
1346
- if (process.platform === "win32") {
1347
- const escaped = title.replace(/'/g, "''");
1348
- const script = `
1349
- $proc = Get-Process | Where-Object { $_.MainWindowTitle -like '*${escaped}*' -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
1350
- if ($proc) {
1351
- Add-Type @"
1352
- using System;
1353
- using System.Runtime.InteropServices;
1354
- public class Win32 {
1355
- [DllImport("user32.dll")]
1356
- public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
1357
- }
1358
- "@
1359
- [Win32]::ShowWindow($proc.MainWindowHandle, 9)
1360
- Write-Output "Restored: $($proc.MainWindowTitle)"
1361
- } else {
1362
- Write-Output "NOT_FOUND"
1363
- }`;
1364
- const { stdout } = await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
1365
- if (stdout.includes("NOT_FOUND")) {
1366
- return err(`Window containing "${title}" not found`);
1367
- }
1368
- return ok(stdout.trim());
1369
- } else if (process.platform === "darwin") {
1370
- await execAsync3(`osascript -e 'tell application "${title}" to set miniaturized of window 1 to false'`);
1371
- return ok(`Restored window: ${title}`);
1372
- } else {
1373
- await execAsync3(`wmctrl -r "${title}" -b remove,hidden`);
1374
- return ok(`Restored window: ${title}`);
1375
- }
1376
- } catch (error) {
1377
- return err(`Failed to restore window: ${error instanceof Error ? error.message : "Unknown error"}`);
1378
- }
1379
- }
1380
- async function scrollMouse(amount) {
1381
- try {
1382
- if (process.platform === "win32") {
1383
- const direction = amount > 0 ? 120 * Math.abs(amount) : -120 * Math.abs(amount);
1384
- const script = `
1385
- Add-Type -MemberDefinition @"
1386
- [DllImport("user32.dll",CharSet=CharSet.Auto,CallingConvention=CallingConvention.StdCall)]
1387
- public static extern void mouse_event(long dwFlags, long dx, long dy, long cButtons, long dwExtraInfo);
1388
- "@ -Name Mouse -Namespace Win32
1389
- [Win32.Mouse]::mouse_event(0x0800, 0, 0, ${direction}, 0)`;
1390
- await execAsync3(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
1391
- } else if (process.platform === "darwin") {
1392
- const dir = amount > 0 ? "u" : "d";
1393
- await execAsync3(`cliclick -r ${dir}:${Math.abs(amount)}`);
1394
- } else {
1395
- const btn = amount > 0 ? "4" : "5";
1396
- await execAsync3(`xdotool click --repeat ${Math.abs(amount)} ${btn}`);
1397
- }
1398
- return ok(`Scrolled ${amount > 0 ? "up" : "down"} by ${Math.abs(amount)}`);
1399
- } catch (error) {
1400
- return err(`Failed to scroll: ${error instanceof Error ? error.message : "Unknown error"}`);
1401
- }
1402
- }
1403
- async function getMousePosition() {
1404
- try {
1405
- if (process.platform === "win32") {
1406
- const { stdout } = await execAsync3(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; $p = [System.Windows.Forms.Cursor]::Position; Write-Output \\"$($p.X),$($p.Y)\\""`, { shell: "cmd.exe" });
1407
- return ok(`Mouse position: ${stdout.trim()}`);
1408
- } else if (process.platform === "darwin") {
1409
- const { stdout } = await execAsync3(`cliclick p`);
1410
- return ok(`Mouse position: ${stdout.trim()}`);
1411
- } else {
1412
- const { stdout } = await execAsync3(`xdotool getmouselocation --shell`);
1413
- return ok(stdout);
1414
- }
1415
- } catch (error) {
1416
- return err(`Failed to get mouse position: ${error instanceof Error ? error.message : "Unknown error"}`);
1417
- }
1418
- }
1419
-
1420
- // src/tools/index.ts
1421
- function ok(output) {
1422
- return { success: true, output };
1423
- }
1424
- function err(error) {
1425
- return { success: false, output: "", error };
1426
- }
1427
-
1428
- // src/tools/shell.ts
1429
- var execAsync4 = promisify4(exec4);
1430
- async function runCommand(cmd, timeout = 3e4) {
1431
- try {
1432
- const isWindows = process.platform === "win32";
1433
- const shell = isWindows ? "cmd.exe" : "/bin/sh";
1434
- const shellArg = isWindows ? "/C" : "-c";
1435
- const { stdout, stderr } = await execAsync4(cmd, {
1436
- shell,
1437
- timeout,
1438
- maxBuffer: 10 * 1024 * 1024
1439
- // 10MB
1440
- });
1441
- if (stderr && stderr.trim()) {
1442
- return ok(`${stdout}
1443
- [stderr]: ${stderr}`);
1444
- }
1445
- return ok(stdout || "(no output)");
1446
- } catch (error) {
1447
- if (error.killed) {
1448
- return err(`Command timed out after ${timeout}ms`);
1449
- }
1450
- const stderr = error.stderr || "";
1451
- const stdout = error.stdout || "";
1452
- return {
1453
- success: false,
1454
- output: stdout,
1455
- error: `Exit code: ${error.code || -1}
1456
- ${stderr}`
1457
- };
1458
- }
1459
- }
1460
-
1461
- // src/services/browser.ts
1462
- import { chromium } from "playwright";
1463
- import * as path from "path";
1464
- import * as os2 from "os";
1465
- import * as fs2 from "fs";
1466
- var context = null;
1467
- var activePage = null;
1468
- var defaultConfig = {
1469
- headless: false,
1470
- // Show browser so user can see what's happening
1471
- slowMo: 50,
1472
- // Slight delay for visibility
1473
- viewport: { width: 1280, height: 800 },
1474
- useSystemBrowser: true
1475
- // Default to using system Chrome
1476
- };
1477
- function findSystemBrowser() {
1478
- const possiblePaths = [
1479
- // Chrome paths
1480
- path.join(process.env["PROGRAMFILES"] || "", "Google", "Chrome", "Application", "chrome.exe"),
1481
- path.join(process.env["PROGRAMFILES(X86)"] || "", "Google", "Chrome", "Application", "chrome.exe"),
1482
- path.join(process.env["LOCALAPPDATA"] || "", "Google", "Chrome", "Application", "chrome.exe"),
1483
- // Edge paths (fallback)
1484
- path.join(process.env["PROGRAMFILES"] || "", "Microsoft", "Edge", "Application", "msedge.exe"),
1485
- path.join(process.env["PROGRAMFILES(X86)"] || "", "Microsoft", "Edge", "Application", "msedge.exe")
1486
- ];
1487
- for (const browserPath of possiblePaths) {
1488
- if (fs2.existsSync(browserPath)) {
1489
- return browserPath;
1490
- }
1491
- }
1492
- return null;
1493
- }
1494
- function getChromeUserDataDir() {
1495
- const cnapseProfile = path.join(os2.homedir(), ".cnapse", "chrome-profile");
1496
- if (!fs2.existsSync(cnapseProfile)) {
1497
- fs2.mkdirSync(cnapseProfile, { recursive: true });
1498
- }
1499
- return cnapseProfile;
1500
- }
1501
- async function initBrowser(config = {}) {
1502
- const cfg = { ...defaultConfig, ...config };
1503
- if (!context) {
1504
- const browserPath = cfg.useSystemBrowser ? findSystemBrowser() : null;
1505
- const userDataDir = getChromeUserDataDir();
1506
- if (browserPath && cfg.useSystemBrowser) {
1507
- context = await chromium.launchPersistentContext(userDataDir, {
1508
- headless: cfg.headless,
1509
- slowMo: cfg.slowMo,
1510
- viewport: cfg.viewport,
1511
- executablePath: browserPath,
1512
- channel: void 0,
1513
- // Don't use channel when specifying executablePath
1514
- args: [
1515
- "--disable-blink-features=AutomationControlled",
1516
- // Less bot detection
1517
- "--no-first-run",
1518
- "--no-default-browser-check"
1519
- ]
1520
- });
1521
- } else {
1522
- context = await chromium.launchPersistentContext(userDataDir, {
1523
- headless: cfg.headless,
1524
- slowMo: cfg.slowMo,
1525
- viewport: cfg.viewport,
1526
- args: [
1527
- "--disable-blink-features=AutomationControlled"
1528
- ]
1529
- });
1530
- }
1531
- }
1532
- const pages = context.pages();
1533
- if (pages.length > 0) {
1534
- activePage = pages[0];
1535
- } else {
1536
- activePage = await context.newPage();
1537
- }
1538
- return activePage;
1539
- }
1540
- async function getPage() {
1541
- if (!activePage) {
1542
- return initBrowser();
1543
- }
1544
- return activePage;
1545
- }
1546
- async function navigateTo(url) {
1547
- const page = await getPage();
1548
- await page.goto(url, { waitUntil: "domcontentloaded" });
1549
- }
1550
- async function takeScreenshot() {
1551
- const page = await getPage();
1552
- const buffer = await page.screenshot({ type: "png" });
1553
- return buffer.toString("base64");
1554
- }
1555
- async function clickElement(selector, timeout = 1e4) {
1556
- const page = await getPage();
1557
- try {
1558
- await page.click(selector, { timeout });
1559
- return true;
1560
- } catch {
1561
- return false;
1562
- }
1563
- }
1564
- async function typeInElement(selector, text, timeout = 1e4) {
1565
- const page = await getPage();
1566
- try {
1567
- await page.fill(selector, text, { timeout });
1568
- return true;
1569
- } catch {
1570
- return false;
1571
- }
1572
- }
1573
- async function pressKey2(key) {
1574
- const page = await getPage();
1575
- await page.keyboard.press(key);
1576
- }
1577
- async function scroll(direction, amount = 500) {
1578
- const page = await getPage();
1579
- await page.mouse.wheel(0, direction === "down" ? amount : -amount);
1580
- }
1581
- async function getPageText() {
1582
- const page = await getPage();
1583
- return await page.evaluate(() => document.body.innerText);
1584
- }
1585
- async function elementExists(selector) {
1586
- const page = await getPage();
1587
- try {
1588
- const element = await page.$(selector);
1589
- return element !== null;
1590
- } catch {
1591
- return false;
1592
- }
1593
- }
1594
- var aiChatConfigs = {
1595
- perplexity: {
1596
- url: "https://www.perplexity.ai",
1597
- inputSelector: 'textarea[placeholder*="Ask"]',
1598
- submitKey: "Enter",
1599
- responseSelector: '.prose, [class*="answer"], [class*="response"]',
1600
- waitForResponse: 15e3
1601
- },
1602
- chatgpt: {
1603
- url: "https://chat.openai.com",
1604
- inputSelector: 'textarea[id="prompt-textarea"], textarea[data-id="root"]',
1605
- submitSelector: 'button[data-testid="send-button"]',
1606
- responseSelector: '[data-message-author-role="assistant"]',
1607
- waitForResponse: 2e4
1608
- },
1609
- claude: {
1610
- url: "https://claude.ai",
1611
- inputSelector: '[contenteditable="true"], textarea',
1612
- submitKey: "Enter",
1613
- responseSelector: '[data-testid="message-content"]',
1614
- waitForResponse: 2e4
1615
- },
1616
- copilot: {
1617
- url: "https://copilot.microsoft.com",
1618
- inputSelector: 'textarea, [contenteditable="true"]',
1619
- submitKey: "Enter",
1620
- responseSelector: '[class*="response"], [class*="message"]',
1621
- waitForResponse: 15e3
1622
- },
1623
- google: {
1624
- url: "https://www.google.com",
1625
- inputSelector: 'textarea[name="q"], input[name="q"]',
1626
- submitKey: "Enter",
1627
- responseSelector: "#search",
1628
- waitForResponse: 5e3
1629
- }
1630
- };
1631
- async function askAI(site, question, includeScreenshot = false) {
1632
- const config = aiChatConfigs[site];
1633
- if (!config) {
1634
- throw new Error(`Unknown AI site: ${site}`);
1635
- }
1636
- const page = await getPage();
1637
- await page.goto(config.url, { waitUntil: "domcontentloaded" });
1638
- await page.waitForTimeout(2e3);
1639
- try {
1640
- await page.waitForSelector(config.inputSelector, { timeout: 1e4 });
1641
- await page.fill(config.inputSelector, question);
1642
- } catch {
1643
- await page.click(config.inputSelector);
1644
- await page.type(config.inputSelector, question, { delay: 30 });
1645
- }
1646
- if (config.submitSelector) {
1647
- await page.click(config.submitSelector);
1648
- } else if (config.submitKey) {
1649
- await page.keyboard.press(config.submitKey);
1650
- }
1651
- await page.waitForTimeout(config.waitForResponse);
1652
- let response = "";
1653
- try {
1654
- const elements = await page.$$(config.responseSelector);
1655
- if (elements.length > 0) {
1656
- const lastElement = elements[elements.length - 1];
1657
- response = await lastElement.textContent() || "";
1658
- }
1659
- } catch {
1660
- response = await getPageText();
1661
- }
1662
- let screenshot;
1663
- if (includeScreenshot) {
1664
- screenshot = await takeScreenshot();
1665
- }
1666
- return { response: response.trim(), screenshot };
1667
- }
1668
- async function getFullAIResponse(site, maxScrolls = 5) {
1669
- const config = aiChatConfigs[site];
1670
- const page = await getPage();
1671
- const responseParts = [];
1672
- for (let i = 0; i < maxScrolls; i++) {
286
+ setMessages((prev) => [...prev, userMsg, assistantMsg]);
287
+ setIsProcessing(true);
1673
288
  try {
1674
- const elements = await page.$$(config.responseSelector);
1675
- if (elements.length > 0) {
1676
- const lastElement = elements[elements.length - 1];
1677
- const text = await lastElement.textContent();
1678
- if (text) {
1679
- responseParts.push(text.trim());
289
+ const apiMessages = messages.filter((m) => m.role === "user" || m.role === "assistant").slice(-10).map((m) => ({ role: m.role, content: m.content }));
290
+ apiMessages.push({ role: "user", content });
291
+ let response;
292
+ if (screenWatchRef.current) {
293
+ const screenshot = await captureScreenshot();
294
+ if (screenshot) {
295
+ response = await chatWithVision(apiMessages, screenshot);
296
+ } else {
297
+ response = await chat(apiMessages);
1680
298
  }
299
+ } else {
300
+ response = await chat(apiMessages);
1681
301
  }
1682
- await page.mouse.wheel(0, 500);
1683
- await page.waitForTimeout(1e3);
1684
- const atBottom = await page.evaluate(() => {
1685
- return window.innerHeight + window.scrollY >= document.body.scrollHeight - 100;
1686
- });
1687
- if (atBottom) break;
1688
- } catch {
1689
- break;
302
+ setMessages(
303
+ (prev) => prev.map(
304
+ (m) => m.id === assistantId ? { ...m, content: response.content || "(no response)", isStreaming: false } : m
305
+ )
306
+ );
307
+ } catch (err) {
308
+ const errorMsg = err instanceof Error ? err.message : "Unknown error";
309
+ setError(errorMsg);
310
+ setMessages(
311
+ (prev) => prev.map(
312
+ (m) => m.id === assistantId ? { ...m, content: `Error: ${errorMsg}`, isStreaming: false } : m
313
+ )
314
+ );
315
+ } finally {
316
+ setIsProcessing(false);
1690
317
  }
1691
- }
1692
- return responseParts;
1693
- }
1694
- async function sendGmail(email) {
1695
- const page = await getPage();
1696
- try {
1697
- await page.goto("https://mail.google.com/mail/u/0/#inbox?compose=new");
1698
- await page.waitForTimeout(3e3);
1699
- await page.waitForSelector('input[aria-label*="To"]', { timeout: 1e4 });
1700
- await page.fill('input[aria-label*="To"]', email.to);
1701
- await page.keyboard.press("Tab");
1702
- await page.fill('input[name="subjectbox"]', email.subject);
1703
- await page.keyboard.press("Tab");
1704
- await page.fill('[aria-label*="Message Body"], [role="textbox"]', email.body);
1705
- await page.keyboard.press("Control+Enter");
1706
- await page.waitForTimeout(2e3);
1707
- return true;
1708
- } catch {
1709
- return false;
1710
- }
1711
- }
1712
- async function sendOutlook(email) {
1713
- const page = await getPage();
1714
- try {
1715
- await page.goto("https://outlook.office.com/mail/0/inbox");
1716
- await page.waitForTimeout(3e3);
1717
- await page.click('button[aria-label*="New mail"], button[title*="New mail"]');
1718
- await page.waitForTimeout(2e3);
1719
- await page.fill('input[aria-label*="To"]', email.to);
1720
- await page.keyboard.press("Tab");
1721
- await page.fill('input[aria-label*="Subject"], input[placeholder*="Subject"]', email.subject);
1722
- await page.keyboard.press("Tab");
1723
- await page.fill('[aria-label*="Message body"], [role="textbox"]', email.body);
1724
- await page.click('button[aria-label*="Send"], button[title*="Send"]');
1725
- await page.waitForTimeout(2e3);
1726
- return true;
1727
- } catch {
1728
- return false;
1729
- }
1730
- }
1731
- async function googleSheetsType(cellData) {
1732
- const page = await getPage();
1733
- try {
1734
- await page.goto("https://docs.google.com/spreadsheets/create");
1735
- await page.waitForTimeout(5e3);
1736
- for (const { cell, value } of cellData) {
1737
- await page.click("input#t-name-box");
1738
- await page.fill("input#t-name-box", cell);
1739
- await page.keyboard.press("Enter");
1740
- await page.waitForTimeout(500);
1741
- await page.keyboard.type(value);
1742
- await page.keyboard.press("Enter");
1743
- await page.waitForTimeout(300);
1744
- }
1745
- return true;
1746
- } catch {
1747
- return false;
1748
- }
1749
- }
1750
- async function googleDocsType(text) {
1751
- const page = await getPage();
1752
- try {
1753
- await page.goto("https://docs.google.com/document/create");
1754
- await page.waitForTimeout(5e3);
1755
- await page.click(".kix-appview-editor");
1756
- await page.waitForTimeout(500);
1757
- await page.keyboard.type(text, { delay: 20 });
1758
- return true;
1759
- } catch {
1760
- return false;
1761
- }
1762
- }
1763
- async function webSearch(query, engine = "google") {
1764
- const page = await getPage();
1765
- const results = [];
1766
- const urls = {
1767
- google: "https://www.google.com",
1768
- bing: "https://www.bing.com",
1769
- duckduckgo: "https://duckduckgo.com"
1770
- };
1771
- const selectors = {
1772
- google: { input: 'textarea[name="q"]', results: "#search .g h3" },
1773
- bing: { input: 'input[name="q"]', results: "#b_results h2 a" },
1774
- duckduckgo: { input: 'input[name="q"]', results: "[data-result] h2" }
318
+ }, [messages, isProcessing]);
319
+ const clearMessages = useCallback(() => {
320
+ setMessages([WELCOME_MESSAGE]);
321
+ setError(null);
322
+ }, []);
323
+ return {
324
+ messages,
325
+ isProcessing,
326
+ error,
327
+ sendMessage,
328
+ addSystemMessage,
329
+ clearMessages
1775
330
  };
1776
- try {
1777
- await page.goto(urls[engine]);
1778
- await page.waitForTimeout(2e3);
1779
- await page.fill(selectors[engine].input, query);
1780
- await page.keyboard.press("Enter");
1781
- await page.waitForTimeout(3e3);
1782
- const elements = await page.$$(selectors[engine].results);
1783
- for (const el of elements.slice(0, 10)) {
1784
- const text = await el.textContent();
1785
- if (text) results.push(text);
1786
- }
1787
- } catch {
1788
- }
1789
- return results;
1790
331
  }
1791
- async function research(topic, maxSources = 3) {
1792
- const page = await getPage();
1793
- const sources = [];
1794
- await webSearch(topic);
1795
- await page.waitForTimeout(2e3);
1796
- for (let i = 0; i < maxSources; i++) {
332
+
333
+ // src/hooks/useVision.ts
334
+ import { useState as useState3, useCallback as useCallback2 } from "react";
335
+ function useVision() {
336
+ const [isAnalyzing, setIsAnalyzing] = useState3(false);
337
+ const [lastDescription, setLastDescription] = useState3(null);
338
+ const [lastScreenshot, setLastScreenshot] = useState3(null);
339
+ const [error, setError] = useState3(null);
340
+ const analyze = useCallback2(async () => {
341
+ setIsAnalyzing(true);
342
+ setError(null);
1797
343
  try {
1798
- const results = await page.$$("#search .g");
1799
- if (results[i]) {
1800
- const titleEl = await results[i].$("h3");
1801
- const linkEl = await results[i].$("a");
1802
- const title = await titleEl?.textContent() || "Unknown";
1803
- const url = await linkEl?.getAttribute("href") || "";
1804
- await titleEl?.click();
1805
- await page.waitForTimeout(3e3);
1806
- const content = await page.evaluate(() => {
1807
- const article = document.querySelector("article, main, .content, #content");
1808
- return article?.textContent?.slice(0, 2e3) || document.body.innerText.slice(0, 2e3);
1809
- });
1810
- sources.push({ title, url, content: content.trim() });
1811
- await page.goBack();
1812
- await page.waitForTimeout(1500);
1813
- }
1814
- } catch {
1815
- continue;
344
+ const result = await describeScreen();
345
+ setLastDescription(result.description);
346
+ setLastScreenshot(result.screenshot);
347
+ return result.description;
348
+ } catch (err) {
349
+ const errorMsg = err instanceof Error ? err.message : "Vision analysis failed";
350
+ setError(errorMsg);
351
+ throw err;
352
+ } finally {
353
+ setIsAnalyzing(false);
1816
354
  }
1817
- }
355
+ }, []);
1818
356
  return {
1819
- query: topic,
1820
- sources,
1821
- summary: ""
1822
- // To be filled by AI
357
+ isAnalyzing,
358
+ lastDescription,
359
+ lastScreenshot,
360
+ error,
361
+ analyze
1823
362
  };
1824
363
  }
1825
364
 
365
+ // src/hooks/useTelegram.ts
366
+ import { useState as useState4, useCallback as useCallback3, useEffect as useEffect2, useRef as useRef2 } from "react";
367
+
368
+ // src/services/telegram.ts
369
+ import { EventEmitter } from "events";
370
+
1826
371
  // src/lib/tasks.ts
1827
- import * as fs3 from "fs";
1828
- import * as path2 from "path";
1829
- import * as os3 from "os";
1830
- var TASK_MEMORY_FILE = path2.join(os3.homedir(), ".cnapse", "task-memory.json");
372
+ import * as fs from "fs";
373
+ import * as path from "path";
374
+ import * as os from "os";
375
+ var TASK_MEMORY_FILE = path.join(os.homedir(), ".cnapse", "task-memory.json");
1831
376
  function loadTaskMemory() {
1832
377
  try {
1833
- if (fs3.existsSync(TASK_MEMORY_FILE)) {
1834
- const data = fs3.readFileSync(TASK_MEMORY_FILE, "utf-8");
378
+ if (fs.existsSync(TASK_MEMORY_FILE)) {
379
+ const data = fs.readFileSync(TASK_MEMORY_FILE, "utf-8");
1835
380
  return JSON.parse(data);
1836
381
  }
1837
382
  } catch {
@@ -1857,11 +402,11 @@ function saveTaskPattern(input, steps) {
1857
402
  });
1858
403
  }
1859
404
  memory.patterns = memory.patterns.sort((a, b) => b.successCount - a.successCount).slice(0, 100);
1860
- const dir = path2.dirname(TASK_MEMORY_FILE);
1861
- if (!fs3.existsSync(dir)) {
1862
- fs3.mkdirSync(dir, { recursive: true });
405
+ const dir = path.dirname(TASK_MEMORY_FILE);
406
+ if (!fs.existsSync(dir)) {
407
+ fs.mkdirSync(dir, { recursive: true });
1863
408
  }
1864
- fs3.writeFileSync(TASK_MEMORY_FILE, JSON.stringify(memory, null, 2));
409
+ fs.writeFileSync(TASK_MEMORY_FILE, JSON.stringify(memory, null, 2));
1865
410
  } catch {
1866
411
  }
1867
412
  }
@@ -2348,31 +893,16 @@ ${existingResult.output}`;
2348
893
  const supportedSites = ["perplexity", "chatgpt", "claude", "copilot", "google"];
2349
894
  const siteLower = site.toLowerCase();
2350
895
  if (supportedSites.includes(siteLower)) {
2351
- const result = await askAI(siteLower, question, true);
2352
- if (result.response.length < 500) {
2353
- const fullParts = await getFullAIResponse(siteLower, 5);
2354
- if (fullParts.length > 0) {
2355
- step.result = `\u{1F4DD} ${site.charAt(0).toUpperCase() + site.slice(1)} says:
2356
-
2357
- ${fullParts.join("\n\n")}`;
2358
- break;
2359
- }
2360
- }
896
+ const result = await askAI(siteLower, question);
2361
897
  step.result = `\u{1F4DD} ${site.charAt(0).toUpperCase() + site.slice(1)} says:
2362
898
 
2363
899
  ${result.response}`;
2364
900
  } else {
2365
- await navigateTo(`https://${site}`);
2366
- await sleep(2e3);
2367
- const page = await getPage();
2368
- const inputs = ["textarea", 'input[type="text"]', 'input[type="search"]', '[contenteditable="true"]'];
2369
- for (const selector of inputs) {
2370
- if (await elementExists(selector)) {
2371
- await typeInElement(selector, question);
2372
- await pressKey2("Enter");
2373
- break;
2374
- }
2375
- }
901
+ await openUrl(`https://${site}`);
902
+ await sleep(4e3);
903
+ await typeText(question);
904
+ await sleep(300);
905
+ await pressKey("Return");
2376
906
  await sleep(5e3);
2377
907
  const pageText = await getPageText();
2378
908
  step.result = `\u{1F4DD} Response from ${site}:
@@ -2386,17 +916,8 @@ ${pageText.slice(0, 3e3)}`;
2386
916
  step.result = vision.description;
2387
917
  break;
2388
918
  case "web_search": {
2389
- const searchResults = await webSearch(params, "google");
2390
- if (searchResults.length > 0) {
2391
- step.result = `\u{1F50D} Search results for "${params}":
2392
-
2393
- ${searchResults.map((r, i) => `${i + 1}. ${r}`).join("\n")}`;
2394
- } else {
2395
- const pageText = await getPageText();
2396
- step.result = `\u{1F50D} Search results for "${params}":
2397
-
2398
- ${pageText.slice(0, 2e3)}`;
2399
- }
919
+ const searchResult = await webSearch(params, "google");
920
+ step.result = searchResult;
2400
921
  break;
2401
922
  }
2402
923
  case "send_email": {
@@ -2412,9 +933,9 @@ ${pageText.slice(0, 2e3)}`;
2412
933
  throw new Error(`Unsupported email provider: ${provider}. Use gmail or outlook.`);
2413
934
  }
2414
935
  if (success) {
2415
- step.result = `\u{1F4E7} Email sent via ${provider} to ${to}`;
936
+ step.result = `\u{1F4E7} Email composed via ${provider} to ${to} (check browser to confirm send)`;
2416
937
  } else {
2417
- throw new Error(`Failed to send email via ${provider}. Make sure you're logged in.`);
938
+ throw new Error(`Failed to open email via ${provider}. Make sure you're logged in.`);
2418
939
  }
2419
940
  break;
2420
941
  }
@@ -2423,9 +944,9 @@ ${pageText.slice(0, 2e3)}`;
2423
944
  switch (sheetCmd.toLowerCase()) {
2424
945
  case "new": {
2425
946
  const sheetName = sheetArgs[0] || "Untitled spreadsheet";
2426
- await navigateTo("https://docs.google.com/spreadsheets/create");
947
+ await openGoogleSheet();
2427
948
  await sleep(5e3);
2428
- step.result = `\u{1F4CA} Created Google Sheet: ${sheetName}`;
949
+ step.result = `\u{1F4CA} Opened new Google Sheet: ${sheetName}`;
2429
950
  break;
2430
951
  }
2431
952
  case "type": {
@@ -2436,13 +957,9 @@ ${pageText.slice(0, 2e3)}`;
2436
957
  break;
2437
958
  }
2438
959
  case "read": {
2439
- const screenshot = await takeScreenshot();
2440
- const analysis = await chat([{
2441
- role: "user",
2442
- content: "Describe the contents of this Google Sheet. List visible data in the cells."
2443
- }]);
960
+ const vision2 = await describeScreen();
2444
961
  step.result = `\u{1F4CA} Current sheet view:
2445
- ${analysis.content}`;
962
+ ${vision2.description}`;
2446
963
  break;
2447
964
  }
2448
965
  default:
@@ -2455,8 +972,9 @@ ${analysis.content}`;
2455
972
  switch (docCmd.toLowerCase()) {
2456
973
  case "new": {
2457
974
  const docName = docArgs[0] || "Untitled document";
2458
- const success = await googleDocsType("");
2459
- step.result = success ? `\u{1F4C4} Created Google Doc: ${docName}` : `\u{1F4C4} Could not create Google Doc`;
975
+ await openGoogleDoc();
976
+ await sleep(5e3);
977
+ step.result = `\u{1F4C4} Opened new Google Doc: ${docName}`;
2460
978
  break;
2461
979
  }
2462
980
  case "type": {
@@ -2473,15 +991,11 @@ ${analysis.content}`;
2473
991
  case "research": {
2474
992
  const researchQuery = params;
2475
993
  const researchData = await research(researchQuery, 3);
2476
- const sourceSummaries = researchData.sources.map(
2477
- (s, i) => `Source ${i + 1}: ${s.title}
2478
- ${s.content.slice(0, 500)}...`
2479
- ).join("\n\n");
2480
994
  const synthesis = await chat([{
2481
995
  role: "user",
2482
- content: `Based on the following research gathered about "${researchQuery}", provide a comprehensive summary:
996
+ content: `Based on the following search results about "${researchQuery}", provide a comprehensive summary:
2483
997
 
2484
- ${sourceSummaries}
998
+ ${researchData.summary}
2485
999
 
2486
1000
  Create a well-organized summary with:
2487
1001
  1. Key findings
@@ -2508,12 +1022,10 @@ Here's what I see on my screen: ${currentScreen.description}`;
2508
1022
  if (!supportedLLMs.includes(llmLower)) {
2509
1023
  throw new Error(`Unknown LLM: ${llmName}. Supported: ${supportedLLMs.join(", ")}`);
2510
1024
  }
2511
- const result = await askAI(llmLower, fullQuestion, false);
2512
- const fullParts = await getFullAIResponse(llmLower, 3);
2513
- const finalResponse = fullParts.length > 0 ? fullParts.join("\n\n") : result.response;
1025
+ const result = await askAI(llmLower, fullQuestion);
2514
1026
  step.result = `\u{1F916} ${llmName} says:
2515
1027
 
2516
- ${finalResponse}`;
1028
+ ${result.response}`;
2517
1029
  break;
2518
1030
  }
2519
1031
  case "learn_ui": {
@@ -2539,116 +1051,206 @@ ${uiAnalysis.content}`;
2539
1051
  }
2540
1052
  case "adaptive_do": {
2541
1053
  const goal = params;
2542
- const maxAttempts = 5;
1054
+ const maxAttempts = 25;
2543
1055
  const actionHistory = [];
2544
1056
  let accomplished = false;
2545
- const page = await getPage();
1057
+ let stuckCount = 0;
1058
+ const stuckThreshold = 3;
1059
+ let lastScreenHash = "";
1060
+ const { getLearner } = await import("./learner-KH3TFTD7.js");
1061
+ const learner = getLearner();
1062
+ await learner.load();
1063
+ const initialScreen = await describeScreen();
1064
+ const remembered = await learner.recall(goal, initialScreen.description);
1065
+ if (remembered && remembered.successCount > remembered.failCount) {
1066
+ actionHistory.push(`\u{1F4DA} Found remembered solution from ${remembered.source}`);
1067
+ }
2546
1068
  for (let attempt = 0; attempt < maxAttempts && !accomplished; attempt++) {
2547
- const screenshot = await takeScreenshot();
2548
- const currentState = await chat([{
2549
- role: "user",
2550
- content: `Describe what you see on this screen. What app/website is it? What elements are visible?`
2551
- }]);
1069
+ const currentScreen = await describeScreen();
1070
+ const currentHash = currentScreen.screenshot.slice(0, 1e3);
1071
+ const screenChanged = currentHash !== lastScreenHash;
1072
+ if (!screenChanged && attempt > 0) {
1073
+ stuckCount++;
1074
+ } else {
1075
+ stuckCount = Math.max(0, stuckCount - 1);
1076
+ }
1077
+ lastScreenHash = currentHash;
2552
1078
  const nextAction = await chat([{
2553
1079
  role: "user",
2554
1080
  content: `GOAL: ${goal}
2555
1081
 
2556
- CURRENT SCREEN: ${currentState.content}
1082
+ CURRENT SCREEN: ${currentScreen.description}
1083
+
1084
+ PREVIOUS ACTIONS:
1085
+ ${actionHistory.slice(-5).join("\n") || "None yet"}
2557
1086
 
2558
- PREVIOUS ACTIONS TAKEN:
2559
- ${actionHistory.length > 0 ? actionHistory.join("\n") : "None yet"}
1087
+ ATTEMPT: ${attempt + 1}/${maxAttempts}
1088
+ STUCK COUNT: ${stuckCount} (will ask for help at ${stuckThreshold})
2560
1089
 
2561
1090
  Based on what you see, what's the SINGLE next action to take?
2562
- Options:
2563
- - click: Click element (describe CSS selector or visible text)
2564
- - type: Type something (specify selector and text)
2565
- - press: Press a key (specify key)
2566
- - scroll: Scroll up/down
2567
- - navigate: Go to URL
1091
+
1092
+ Available actions:
1093
+ - click: Click at current mouse position
1094
+ - clickAt: Click at coordinates (VALUE: x,y)
1095
+ - moveTo: Move mouse to coordinates (VALUE: x,y)
1096
+ - type: Type text (VALUE: text to type)
1097
+ - press: Press a key (VALUE: Enter, Tab, Escape, etc.)
1098
+ - keyCombo: Key combination (VALUE: command+s, control+c, etc.)
1099
+ - scroll: Scroll (VALUE: up or down)
1100
+ - navigate: Open URL (VALUE: full URL)
1101
+ - wait: Wait for something to load (VALUE: seconds)
1102
+ - findClick: Find element and click it (VALUE: description of element)
2568
1103
  - done: Goal is accomplished
2569
1104
  - stuck: Can't figure out what to do
2570
1105
 
2571
- Respond in format:
1106
+ Respond EXACTLY in this format:
2572
1107
  ACTION: <action_type>
2573
- SELECTOR: <css selector or text to find>
2574
- VALUE: <text to type or URL>
2575
- REASONING: <why>`
1108
+ VALUE: <parameter>
1109
+ REASONING: <brief why>`
2576
1110
  }]);
2577
1111
  const actionContent = nextAction.content;
2578
1112
  const actionMatch = actionContent.match(/ACTION:\s*(\w+)/i);
2579
- const selectorMatch = actionContent.match(/SELECTOR:\s*(.+?)(?:\n|$)/i);
2580
1113
  const valueMatch = actionContent.match(/VALUE:\s*(.+?)(?:\n|$)/i);
2581
1114
  if (!actionMatch) {
2582
- actionHistory.push(`Attempt ${attempt + 1}: Couldn't parse action`);
1115
+ actionHistory.push(`[${attempt + 1}] \u26A0\uFE0F Couldn't parse action`);
2583
1116
  continue;
2584
1117
  }
2585
1118
  const action = actionMatch[1].toLowerCase();
2586
- const selector = selectorMatch?.[1]?.trim() || "";
2587
1119
  const value = valueMatch?.[1]?.trim() || "";
2588
1120
  if (action === "done") {
2589
1121
  accomplished = true;
2590
- actionHistory.push(`Attempt ${attempt + 1}: Goal accomplished!`);
1122
+ actionHistory.push(`[${attempt + 1}] \u2705 Goal accomplished!`);
1123
+ if (actionHistory.length > 1) {
1124
+ const lastSuccessfulAction = actionHistory[actionHistory.length - 2];
1125
+ const actionParts = lastSuccessfulAction.match(/→ (\w+)(?:\s*"(.+)")?/);
1126
+ if (actionParts) {
1127
+ await learner.learn(
1128
+ currentScreen.description.slice(0, 300),
1129
+ goal,
1130
+ actionParts[1],
1131
+ actionParts[2] || "",
1132
+ "self"
1133
+ );
1134
+ }
1135
+ }
2591
1136
  break;
2592
1137
  }
2593
- if (action === "stuck") {
2594
- actionHistory.push(`Attempt ${attempt + 1}: Got stuck, asking Perplexity for help...`);
2595
- const helpRequest = `I'm trying to: ${goal}
2596
-
2597
- I'm stuck. What should I do next? Be specific about what to click or type.`;
2598
- const advice = await askAI("perplexity", helpRequest, false);
2599
- actionHistory.push(`Got advice: ${advice.response.slice(0, 200)}...`);
2600
- await navigateTo(page.url());
1138
+ if (action === "stuck" || stuckCount >= stuckThreshold) {
1139
+ actionHistory.push(`[${attempt + 1}] \u{1F198} Asking for help...`);
1140
+ const suggestions = await learner.getHelp(
1141
+ goal,
1142
+ currentScreen.description,
1143
+ actionHistory.slice(-3)
1144
+ );
1145
+ if (suggestions.length > 0) {
1146
+ const suggestion = suggestions[0];
1147
+ actionHistory.push(`\u{1F4A1} Got suggestion from ${suggestion.source}: ${suggestion.value.slice(0, 100)}`);
1148
+ if (suggestion.action && suggestion.action !== "suggested") {
1149
+ try {
1150
+ await executeAdaptiveAction(suggestion.action, suggestion.value);
1151
+ actionHistory.push(`[${attempt + 1}] \u2192 ${suggestion.action} "${suggestion.value.slice(0, 30)}"`);
1152
+ await learner.learn(
1153
+ currentScreen.description.slice(0, 300),
1154
+ goal,
1155
+ suggestion.action,
1156
+ suggestion.value,
1157
+ suggestion.source
1158
+ );
1159
+ stuckCount = 0;
1160
+ } catch (e) {
1161
+ actionHistory.push(`[${attempt + 1}] \u274C Suggestion failed`);
1162
+ }
1163
+ }
1164
+ } else {
1165
+ actionHistory.push(`[${attempt + 1}] \u{1F615} No helpful suggestions found`);
1166
+ }
2601
1167
  continue;
2602
1168
  }
2603
1169
  try {
2604
- switch (action) {
2605
- case "click":
2606
- if (selector) {
2607
- const clicked = await clickElement(selector);
2608
- if (!clicked) {
2609
- await page.getByText(selector).first().click({ timeout: 5e3 });
2610
- }
2611
- }
2612
- actionHistory.push(`Attempt ${attempt + 1}: Clicked "${selector}"`);
2613
- break;
2614
- case "type":
2615
- if (selector && value) {
2616
- const typed = await typeInElement(selector, value);
2617
- if (!typed) {
2618
- await page.getByPlaceholder(selector).first().fill(value);
2619
- }
2620
- }
2621
- actionHistory.push(`Attempt ${attempt + 1}: Typed "${value}" in "${selector}"`);
2622
- break;
2623
- case "press":
2624
- await pressKey2(value || selector);
2625
- actionHistory.push(`Attempt ${attempt + 1}: Pressed ${value || selector}`);
2626
- break;
2627
- case "scroll":
2628
- await scroll(value.toLowerCase().includes("up") ? "up" : "down");
2629
- actionHistory.push(`Attempt ${attempt + 1}: Scrolled ${value || "down"}`);
2630
- break;
2631
- case "navigate":
2632
- const url = value.startsWith("http") ? value : `https://${value}`;
2633
- await navigateTo(url);
2634
- actionHistory.push(`Attempt ${attempt + 1}: Navigated to ${url}`);
2635
- break;
2636
- default:
2637
- actionHistory.push(`Attempt ${attempt + 1}: Unknown action ${action}`);
2638
- }
1170
+ await executeAdaptiveAction(action, value);
1171
+ actionHistory.push(`[${attempt + 1}] \u2192 ${action}${value ? ` "${value.slice(0, 40)}"` : ""}`);
2639
1172
  } catch (e) {
2640
- actionHistory.push(`Attempt ${attempt + 1}: Action failed - ${e}`);
1173
+ actionHistory.push(`[${attempt + 1}] \u274C ${action} failed - ${e}`);
1174
+ await learner.recordFailure(goal, action, value);
2641
1175
  }
2642
- await sleep(2e3);
1176
+ await sleep(1e3 + Math.random() * 1e3);
2643
1177
  }
2644
1178
  step.result = `\u{1F3AF} Adaptive Agent Result:
2645
1179
 
2646
1180
  Goal: ${goal}
2647
1181
  Accomplished: ${accomplished ? "Yes \u2705" : "Partial/No \u274C"}
1182
+ Attempts: ${Math.min(actionHistory.length, maxAttempts)}/${maxAttempts}
2648
1183
 
2649
1184
  Action Log:
2650
1185
  ${actionHistory.join("\n")}`;
2651
1186
  break;
1187
+ async function executeAdaptiveAction(action, value) {
1188
+ switch (action) {
1189
+ case "click":
1190
+ await clickMouse("left");
1191
+ break;
1192
+ case "clickat":
1193
+ case "clickAt": {
1194
+ const [x, y] = value.split(",").map((n) => parseInt(n.trim()));
1195
+ if (!isNaN(x) && !isNaN(y)) {
1196
+ await moveMouse(x, y);
1197
+ await sleep(100);
1198
+ await clickMouse("left");
1199
+ }
1200
+ break;
1201
+ }
1202
+ case "moveto":
1203
+ case "moveTo": {
1204
+ const [mx, my] = value.split(",").map((n) => parseInt(n.trim()));
1205
+ if (!isNaN(mx) && !isNaN(my)) {
1206
+ await moveMouse(mx, my);
1207
+ }
1208
+ break;
1209
+ }
1210
+ case "type":
1211
+ if (value) {
1212
+ if (typeTextHuman) {
1213
+ await typeTextHuman(value, 50);
1214
+ } else {
1215
+ await typeText(value);
1216
+ }
1217
+ }
1218
+ break;
1219
+ case "press":
1220
+ await pressKey(value || "Return");
1221
+ break;
1222
+ case "keycombo":
1223
+ case "keyCombo": {
1224
+ const keys2 = value.split("+").map((k) => k.trim().toLowerCase());
1225
+ await keyCombo(keys2);
1226
+ break;
1227
+ }
1228
+ case "scroll":
1229
+ await scroll(value.toLowerCase().includes("up") ? "up" : "down");
1230
+ break;
1231
+ case "navigate": {
1232
+ const navUrl = value.startsWith("http") ? value : `https://${value}`;
1233
+ await openUrl(navUrl);
1234
+ await sleep(2e3);
1235
+ break;
1236
+ }
1237
+ case "wait": {
1238
+ const seconds2 = parseFloat(value) || 2;
1239
+ await sleep(seconds2 * 1e3);
1240
+ break;
1241
+ }
1242
+ case "findclick":
1243
+ case "findClick":
1244
+ if (findAndClick) {
1245
+ await findAndClick(value);
1246
+ } else {
1247
+ throw new Error("findAndClick not available");
1248
+ }
1249
+ break;
1250
+ default:
1251
+ throw new Error(`Unknown action: ${action}`);
1252
+ }
1253
+ }
2652
1254
  }
2653
1255
  case "chat":
2654
1256
  step.result = `Task noted: ${params}`;
@@ -2702,8 +1304,8 @@ function getTaskMemoryStats() {
2702
1304
  }
2703
1305
  function clearTaskMemory() {
2704
1306
  try {
2705
- if (fs3.existsSync(TASK_MEMORY_FILE)) {
2706
- fs3.unlinkSync(TASK_MEMORY_FILE);
1307
+ if (fs.existsSync(TASK_MEMORY_FILE)) {
1308
+ fs.unlinkSync(TASK_MEMORY_FILE);
2707
1309
  }
2708
1310
  } catch {
2709
1311
  }
@@ -2803,6 +1405,7 @@ var TelegramBotService = class extends EventEmitter {
2803
1405
  isRunning = false;
2804
1406
  allowedChatIds = /* @__PURE__ */ new Set();
2805
1407
  chatHistory = /* @__PURE__ */ new Map();
1408
+ watchIntervals = /* @__PURE__ */ new Map();
2806
1409
  constructor() {
2807
1410
  super();
2808
1411
  }
@@ -2861,18 +1464,24 @@ var TelegramBotService = class extends EventEmitter {
2861
1464
  await ctx.reply(
2862
1465
  `\u{1F916} C-napse connected!
2863
1466
 
2864
- Commands:
1467
+ \u{1F4CB} Commands:
2865
1468
  /screen - Take screenshot
2866
1469
  /describe - Screenshot + AI description
2867
1470
  /task <desc> - Multi-step automation
2868
1471
  /run <cmd> - Execute shell command
2869
1472
  /status - System status
2870
1473
 
2871
- Examples:
2872
- \u2022 /task open folder E:/Test and list files
1474
+ \u{1F916} Autonomous Agent:
1475
+ /agent <goal> - Start autonomous agent
1476
+ /agent stop - Stop the agent
1477
+ /watch - Stream screen live
1478
+ /learn - View learned actions
1479
+
1480
+ \u{1F4A1} Examples:
1481
+ \u2022 /agent open safari and search for weather
2873
1482
  \u2022 /task open notepad and type hello
1483
+ \u2022 /watch 10 (screenshot every 10s)
2874
1484
  \u2022 minimize chrome
2875
- \u2022 what windows are open?
2876
1485
 
2877
1486
  Your chat ID: ${chatId}`
2878
1487
  );
@@ -2950,16 +1559,253 @@ ${result.error}
2950
1559
  return;
2951
1560
  }
2952
1561
  const config = getConfig();
1562
+ let agentStatus = "Not running";
1563
+ try {
1564
+ const { getAutonomousAgent } = await import("./autonomous-VGEVIXXQ.js");
1565
+ const agent = getAutonomousAgent();
1566
+ const state = agent.getState();
1567
+ if (state.isActive) {
1568
+ agentStatus = `Running (${state.attemptCount} attempts, goal: ${state.goal?.slice(0, 30)}...)`;
1569
+ }
1570
+ } catch {
1571
+ }
1572
+ let learnedCount = 0;
1573
+ try {
1574
+ const { getLearner } = await import("./learner-KH3TFTD7.js");
1575
+ const learner = getLearner();
1576
+ const stats = learner.getStats();
1577
+ learnedCount = stats.memorySize;
1578
+ } catch {
1579
+ }
2953
1580
  const status = [
2954
1581
  "\u{1F4CA} C-napse Status",
2955
1582
  "",
2956
1583
  `Provider: ${config.provider}`,
2957
1584
  `Model: ${config.model}`,
2958
1585
  `Platform: ${process.platform}`,
2959
- `Node: ${process.version}`
1586
+ `Node: ${process.version}`,
1587
+ "",
1588
+ "\u{1F916} Agent Status:",
1589
+ ` Agent: ${agentStatus}`,
1590
+ ` Learned actions: ${learnedCount}`
2960
1591
  ].join("\n");
2961
1592
  await ctx.reply(status);
2962
1593
  });
1594
+ this.bot.command("agent", async (ctx) => {
1595
+ if (!this.isAllowed(ctx.chat.id)) {
1596
+ await ctx.reply("\u26D4 Not authorized. Send /start first.");
1597
+ return;
1598
+ }
1599
+ const args2 = ctx.message.text.replace("/agent", "").trim();
1600
+ const { getAutonomousAgent } = await import("./autonomous-VGEVIXXQ.js");
1601
+ const agent = getAutonomousAgent();
1602
+ if (args2 === "stop") {
1603
+ agent.stop();
1604
+ await ctx.reply("\u{1F6D1} Agent stopped");
1605
+ return;
1606
+ }
1607
+ if (args2 === "pause") {
1608
+ agent.pause();
1609
+ await ctx.reply("\u23F8\uFE0F Agent paused");
1610
+ return;
1611
+ }
1612
+ if (args2 === "resume") {
1613
+ agent.resume();
1614
+ await ctx.reply("\u25B6\uFE0F Agent resumed");
1615
+ return;
1616
+ }
1617
+ if (args2 === "status") {
1618
+ const state = agent.getState();
1619
+ const history = agent.getHistory().slice(-5);
1620
+ let statusMsg = "\u{1F916} Agent Status\n\n";
1621
+ statusMsg += `Goal: ${state.goal || "None"}
1622
+ `;
1623
+ statusMsg += `Active: ${state.isActive ? "Yes" : "No"}
1624
+ `;
1625
+ statusMsg += `Attempts: ${state.attemptCount}
1626
+ `;
1627
+ statusMsg += `Confidence: ${state.confidence}%
1628
+ `;
1629
+ statusMsg += `Stuck count: ${state.stuckCount}
1630
+
1631
+ `;
1632
+ if (history.length > 0) {
1633
+ statusMsg += "Recent actions:\n";
1634
+ history.forEach((h) => {
1635
+ statusMsg += `\u2022 ${h.action}: ${h.value?.slice(0, 30) || ""} (${h.result})
1636
+ `;
1637
+ });
1638
+ }
1639
+ await ctx.reply(statusMsg);
1640
+ return;
1641
+ }
1642
+ if (!args2) {
1643
+ await ctx.reply(
1644
+ "\u{1F916} Autonomous Agent Commands:\n\n/agent <goal> - Start agent with a goal\n/agent stop - Stop the agent\n/agent pause - Pause the agent\n/agent resume - Resume the agent\n/agent status - Get agent status\n\nExamples:\n\u2022 /agent open safari and search for weather\n\u2022 /agent compose an email in gmail to john@test.com\n\u2022 /agent book a flight on kayak.com from NYC to LA"
1645
+ );
1646
+ return;
1647
+ }
1648
+ const goal = args2;
1649
+ await ctx.reply(`\u{1F680} Starting agent with goal:
1650
+ "${goal}"
1651
+
1652
+ Sending updates...`);
1653
+ const chatId = ctx.chat.id;
1654
+ let lastUpdate = Date.now();
1655
+ let screenshotCount = 0;
1656
+ const onAttempt = async (data) => {
1657
+ if (Date.now() - lastUpdate < 3e3) return;
1658
+ lastUpdate = Date.now();
1659
+ await ctx.reply(`\u{1F504} Attempt ${data.count}/${data.max}`);
1660
+ };
1661
+ const onDecided = async (decision) => {
1662
+ await ctx.reply(`\u{1F4AD} ${decision.action}: ${decision.value?.slice(0, 50) || ""}`);
1663
+ };
1664
+ const onCompleted = async (data) => {
1665
+ const emoji = data.success ? "\u2705" : "\u274C";
1666
+ await ctx.reply(`${emoji} Agent ${data.success ? "completed" : "stopped"} after ${data.attempts} attempts`);
1667
+ try {
1668
+ const screenshot = await captureScreenshot();
1669
+ if (screenshot) {
1670
+ const buffer = Buffer.from(screenshot, "base64");
1671
+ await ctx.replyWithPhoto({ source: buffer }, {
1672
+ caption: data.success ? "\u2705 Goal accomplished!" : "\u{1F4F8} Final state"
1673
+ });
1674
+ }
1675
+ } catch {
1676
+ }
1677
+ agent.off("attempt", onAttempt);
1678
+ agent.off("decided", onDecided);
1679
+ agent.off("completed", onCompleted);
1680
+ agent.off("error", onError);
1681
+ };
1682
+ const onError = async (data) => {
1683
+ await ctx.reply(`\u274C Error: ${data.error}`);
1684
+ };
1685
+ const screenshotInterval = setInterval(async () => {
1686
+ if (!agent.getState().isActive) {
1687
+ clearInterval(screenshotInterval);
1688
+ return;
1689
+ }
1690
+ screenshotCount++;
1691
+ if (screenshotCount % 6 === 0) {
1692
+ try {
1693
+ const screenshot = await captureScreenshot();
1694
+ if (screenshot) {
1695
+ const buffer = Buffer.from(screenshot, "base64");
1696
+ await ctx.replyWithPhoto({ source: buffer }, {
1697
+ caption: `\u{1F4F8} Progress update (attempt ${agent.getState().attemptCount})`
1698
+ });
1699
+ }
1700
+ } catch {
1701
+ }
1702
+ }
1703
+ }, 5e3);
1704
+ agent.on("attempt", onAttempt);
1705
+ agent.on("decided", onDecided);
1706
+ agent.on("completed", onCompleted);
1707
+ agent.on("error", onError);
1708
+ agent.start(goal).then((result) => {
1709
+ clearInterval(screenshotInterval);
1710
+ }).catch((err) => {
1711
+ clearInterval(screenshotInterval);
1712
+ ctx.reply(`\u274C Agent error: ${err.message}`);
1713
+ });
1714
+ });
1715
+ this.bot.command("watch", async (ctx) => {
1716
+ if (!this.isAllowed(ctx.chat.id)) {
1717
+ await ctx.reply("\u26D4 Not authorized. Send /start first.");
1718
+ return;
1719
+ }
1720
+ const args2 = ctx.message.text.replace("/watch", "").trim();
1721
+ const chatId = ctx.chat.id;
1722
+ if (args2 === "stop") {
1723
+ if (this.watchIntervals?.has(chatId)) {
1724
+ clearInterval(this.watchIntervals.get(chatId));
1725
+ this.watchIntervals.delete(chatId);
1726
+ await ctx.reply("\u{1F441}\uFE0F Screen streaming stopped");
1727
+ } else {
1728
+ await ctx.reply("Not currently streaming");
1729
+ }
1730
+ return;
1731
+ }
1732
+ const intervalSeconds = parseInt(args2) || 5;
1733
+ const intervalMs = Math.max(3e3, Math.min(6e4, intervalSeconds * 1e3));
1734
+ if (!this.watchIntervals) {
1735
+ this.watchIntervals = /* @__PURE__ */ new Map();
1736
+ }
1737
+ if (this.watchIntervals.has(chatId)) {
1738
+ clearInterval(this.watchIntervals.get(chatId));
1739
+ }
1740
+ await ctx.reply(`\u{1F441}\uFE0F Starting screen stream (every ${intervalMs / 1e3}s)
1741
+ Send /watch stop to end`);
1742
+ try {
1743
+ const screenshot = await captureScreenshot();
1744
+ if (screenshot) {
1745
+ const buffer = Buffer.from(screenshot, "base64");
1746
+ await ctx.replyWithPhoto({ source: buffer }, { caption: "\u{1F4F8} Stream started" });
1747
+ }
1748
+ } catch {
1749
+ }
1750
+ const interval = setInterval(async () => {
1751
+ try {
1752
+ const screenshot = await captureScreenshot();
1753
+ if (screenshot) {
1754
+ const buffer = Buffer.from(screenshot, "base64");
1755
+ await ctx.replyWithPhoto({ source: buffer }, {
1756
+ caption: `\u{1F4F8} ${(/* @__PURE__ */ new Date()).toLocaleTimeString()}`
1757
+ });
1758
+ }
1759
+ } catch {
1760
+ }
1761
+ }, intervalMs);
1762
+ this.watchIntervals.set(chatId, interval);
1763
+ });
1764
+ this.bot.command("learn", async (ctx) => {
1765
+ if (!this.isAllowed(ctx.chat.id)) {
1766
+ await ctx.reply("\u26D4 Not authorized. Send /start first.");
1767
+ return;
1768
+ }
1769
+ const args2 = ctx.message.text.replace("/learn", "").trim();
1770
+ const { getLearner } = await import("./learner-KH3TFTD7.js");
1771
+ const learner = getLearner();
1772
+ await learner.load();
1773
+ if (args2 === "clear") {
1774
+ await learner.clearMemory();
1775
+ await ctx.reply("\u{1F5D1}\uFE0F Learned actions cleared");
1776
+ return;
1777
+ }
1778
+ if (args2 === "stats") {
1779
+ const stats = learner.getStats();
1780
+ await ctx.reply(
1781
+ `\u{1F4CA} Learning Stats:
1782
+
1783
+ Total learned: ${stats.memorySize}
1784
+ Total successes: ${stats.totalSuccesses}
1785
+ Total attempts: ${stats.totalAttempts}
1786
+
1787
+ Sources:
1788
+ ` + Object.entries(stats.sourceCounts).map(([source, count]) => `\u2022 ${source}: ${count}`).join("\n")
1789
+ );
1790
+ return;
1791
+ }
1792
+ const learned = learner.getAllLearned().slice(-10);
1793
+ if (learned.length === 0) {
1794
+ await ctx.reply("\u{1F4DA} No learned actions yet\n\nThe agent learns from successful actions and remembers them for next time.");
1795
+ return;
1796
+ }
1797
+ let msg = "\u{1F4DA} Recent Learned Actions:\n\n";
1798
+ learned.forEach((l, i) => {
1799
+ msg += `${i + 1}. ${l.goal.slice(0, 40)}
1800
+ `;
1801
+ msg += ` \u2192 ${l.actionType}: ${l.actionValue.slice(0, 30)}
1802
+ `;
1803
+ msg += ` (${l.successCount}\u2705 ${l.failCount}\u274C from ${l.source})
1804
+
1805
+ `;
1806
+ });
1807
+ await ctx.reply(msg);
1808
+ });
2963
1809
  this.bot.command("task", async (ctx) => {
2964
1810
  if (!this.isAllowed(ctx.chat.id)) {
2965
1811
  await ctx.reply("\u26D4 Not authorized. Send /start first.");
@@ -3082,8 +1928,8 @@ ${result.error}
3082
1928
  this.emit("error", new Error(errorMsg));
3083
1929
  }
3084
1930
  });
3085
- this.bot.catch((err2) => {
3086
- this.emit("error", err2);
1931
+ this.bot.catch((err) => {
1932
+ this.emit("error", err);
3087
1933
  });
3088
1934
  }
3089
1935
  /**
@@ -3225,15 +2071,15 @@ function useTelegram(onMessage) {
3225
2071
  setLastMessage(msg);
3226
2072
  onMessageRef.current?.(msg);
3227
2073
  });
3228
- bot.on("error", (err2) => {
3229
- setError(err2.message);
2074
+ bot.on("error", (err) => {
2075
+ setError(err.message);
3230
2076
  });
3231
2077
  await bot.start();
3232
2078
  setIsEnabled(true);
3233
- } catch (err2) {
3234
- const errorMsg = err2 instanceof Error ? err2.message : "Failed to start Telegram bot";
2079
+ } catch (err) {
2080
+ const errorMsg = err instanceof Error ? err.message : "Failed to start Telegram bot";
3235
2081
  setError(errorMsg);
3236
- throw err2;
2082
+ throw err;
3237
2083
  } finally {
3238
2084
  setIsStarting(false);
3239
2085
  }
@@ -3244,10 +2090,10 @@ function useTelegram(onMessage) {
3244
2090
  const bot = getTelegramBot();
3245
2091
  await bot.stop();
3246
2092
  setIsEnabled(false);
3247
- } catch (err2) {
3248
- const errorMsg = err2 instanceof Error ? err2.message : "Failed to stop Telegram bot";
2093
+ } catch (err) {
2094
+ const errorMsg = err instanceof Error ? err.message : "Failed to stop Telegram bot";
3249
2095
  setError(errorMsg);
3250
- throw err2;
2096
+ throw err;
3251
2097
  }
3252
2098
  }, [isEnabled]);
3253
2099
  const toggle = useCallback3(async () => {
@@ -3288,10 +2134,10 @@ function useTasks(onProgress) {
3288
2134
  });
3289
2135
  setCurrentTask(result);
3290
2136
  return result;
3291
- } catch (err2) {
3292
- const errorMsg = err2 instanceof Error ? err2.message : "Task failed";
2137
+ } catch (err) {
2138
+ const errorMsg = err instanceof Error ? err.message : "Task failed";
3293
2139
  setError(errorMsg);
3294
- throw err2;
2140
+ throw err;
3295
2141
  } finally {
3296
2142
  setIsRunning(false);
3297
2143
  setCurrentStep(null);
@@ -3433,7 +2279,7 @@ Type /help for commands`);
3433
2279
  chat2.addSystemMessage(`\u{1F5A5}\uFE0F Screen:
3434
2280
 
3435
2281
  ${description}`);
3436
- } catch (err2) {
2282
+ } catch (err) {
3437
2283
  chat2.addSystemMessage(`\u274C ${vision.error || "Vision failed"}`);
3438
2284
  } finally {
3439
2285
  setStatus("Ready");
@@ -3589,7 +2435,7 @@ async function main() {
3589
2435
  case "config": {
3590
2436
  const subcommand = args[1];
3591
2437
  if (!subcommand) {
3592
- const { ProviderSelector: ProviderSelector2 } = await import("./ProviderSelector-MXRZFAOB.js");
2438
+ const { ProviderSelector: ProviderSelector2 } = await import("./ProviderSelector-GZYF26LL.js");
3593
2439
  const { Box: Box7 } = await import("ink");
3594
2440
  render(
3595
2441
  /* @__PURE__ */ jsx7(Box7, { flexDirection: "column", padding: 1, children: /* @__PURE__ */ jsx7(
@@ -3705,7 +2551,7 @@ ${dim}GitHub: https://github.com/projectservan8n/C-napse${reset}
3705
2551
  process.exit(0);
3706
2552
  }
3707
2553
  case "init": {
3708
- const { ProviderSelector: ProviderSelector2 } = await import("./ProviderSelector-MXRZFAOB.js");
2554
+ const { ProviderSelector: ProviderSelector2 } = await import("./ProviderSelector-GZYF26LL.js");
3709
2555
  const { Box: Box7, Text: Text7 } = await import("ink");
3710
2556
  render(
3711
2557
  /* @__PURE__ */ jsxs6(Box7, { flexDirection: "column", padding: 1, children: [