@projectservan8n/cnapse 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ProviderSelector-MXRZFAOB.js +6 -0
- package/dist/chunk-OPX7FFL6.js +391 -0
- package/dist/index.js +733 -702
- package/package.json +17 -16
- package/src/agents/executor.ts +20 -13
- package/src/index.tsx +32 -6
- package/src/lib/tasks.ts +307 -323
- package/src/services/browser.ts +669 -0
- package/src/tools/index.ts +0 -1
- package/dist/ConfigUI-I2CJVODT.js +0 -305
- package/dist/Setup-KGYXCA7Y.js +0 -177
- package/dist/chunk-COKO6V5J.js +0 -50
- package/src/components/ConfigUI.tsx +0 -352
- package/src/components/Setup.tsx +0 -202
- package/src/lib/screen.ts +0 -118
- package/src/tools/vision.ts +0 -65
package/src/lib/tasks.ts
CHANGED
|
@@ -4,11 +4,12 @@
|
|
|
4
4
|
* Uses chain-of-thought prompting + learning from past tasks
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
import { chat, Message } from './api.js';
|
|
7
|
+
import { chat, chatWithVision, Message } from './api.js';
|
|
8
8
|
import * as computer from '../tools/computer.js';
|
|
9
|
-
import { describeScreen } from './vision.js';
|
|
9
|
+
import { describeScreen, captureScreenshot } from './vision.js';
|
|
10
10
|
import * as filesystem from '../tools/filesystem.js';
|
|
11
11
|
import { runCommand } from '../tools/shell.js';
|
|
12
|
+
import * as browser from '../services/browser.js';
|
|
12
13
|
import * as fs from 'fs';
|
|
13
14
|
import * as path from 'path';
|
|
14
15
|
import * as os from 'os';
|
|
@@ -227,6 +228,12 @@ Before outputting steps, THINK through these questions:
|
|
|
227
228
|
### Research
|
|
228
229
|
- research: Multi-step web research - searches, gathers info, summarizes (e.g., "research:What are the latest AI trends in 2024?")
|
|
229
230
|
|
|
231
|
+
### Adaptive/Learning
|
|
232
|
+
- ask_llm: Ask another LLM for help with a screenshot (e.g., "ask_llm:perplexity|How do I do X in this app?")
|
|
233
|
+
- ask_llm: Supports: perplexity, chatgpt, claude, copilot - sends screenshot + question, gets answer
|
|
234
|
+
- adaptive_do: Try to accomplish something, if stuck ask LLMs for help (e.g., "adaptive_do:book a flight to NYC on kayak.com")
|
|
235
|
+
- learn_ui: Take screenshot and learn how to interact with current UI (e.g., "learn_ui:What buttons can I click here?")
|
|
236
|
+
|
|
230
237
|
### Utility
|
|
231
238
|
- wait: Wait N seconds (e.g., "wait:2" - use 1-3s for app loads)
|
|
232
239
|
- screenshot: Capture and describe screen
|
|
@@ -374,6 +381,39 @@ Output:
|
|
|
374
381
|
{ "description": "Type the content", "action": "google_docs:type|Project Status Report\n\nDate: Today\n\nSummary:\nThe project is on track. All milestones have been met.\n\nNext Steps:\n- Complete testing\n- Deploy to production" }
|
|
375
382
|
]
|
|
376
383
|
|
|
384
|
+
### Example 12: "I don't know how to use this app, can you figure it out?"
|
|
385
|
+
Thinking:
|
|
386
|
+
- Goal: Learn the current UI and understand how to use it
|
|
387
|
+
- How: Use learn_ui to take screenshot and analyze
|
|
388
|
+
- Sequence: Screenshot -> AI analysis -> report back
|
|
389
|
+
|
|
390
|
+
Output:
|
|
391
|
+
[
|
|
392
|
+
{ "description": "Analyze current UI", "action": "learn_ui:What are all the buttons, menus, and interactive elements I can use?" }
|
|
393
|
+
]
|
|
394
|
+
|
|
395
|
+
### Example 13: "book a hotel on booking.com for next weekend"
|
|
396
|
+
Thinking:
|
|
397
|
+
- Goal: Complex task on unfamiliar website - need adaptive approach
|
|
398
|
+
- How: Use adaptive_do which will try, and if stuck ask LLMs for help
|
|
399
|
+
- Sequence: Single adaptive action handles the complexity
|
|
400
|
+
|
|
401
|
+
Output:
|
|
402
|
+
[
|
|
403
|
+
{ "description": "Adaptively book hotel", "action": "adaptive_do:Go to booking.com and book a hotel for next weekend" }
|
|
404
|
+
]
|
|
405
|
+
|
|
406
|
+
### Example 14: "I'm stuck, ask Claude how to proceed"
|
|
407
|
+
Thinking:
|
|
408
|
+
- Goal: Get help from another LLM with current screen context
|
|
409
|
+
- How: Use ask_llm with claude and send screenshot
|
|
410
|
+
- Sequence: Screenshot + question -> Get answer
|
|
411
|
+
|
|
412
|
+
Output:
|
|
413
|
+
[
|
|
414
|
+
{ "description": "Ask Claude for help with screenshot", "action": "ask_llm:claude|I'm stuck on this screen. What should I do next to accomplish my task?" }
|
|
415
|
+
]
|
|
416
|
+
|
|
377
417
|
## YOUR TASK
|
|
378
418
|
Now parse this request: "${input}"
|
|
379
419
|
|
|
@@ -616,94 +656,49 @@ ${existingResult.output}`;
|
|
|
616
656
|
|
|
617
657
|
case 'browse_and_ask': {
|
|
618
658
|
// Format: browse_and_ask:site|question
|
|
659
|
+
// Using Playwright for reliable browser automation
|
|
619
660
|
const [site, ...questionParts] = params.split('|');
|
|
620
661
|
const question = questionParts.join('|');
|
|
621
662
|
|
|
622
|
-
//
|
|
623
|
-
const
|
|
624
|
-
|
|
625
|
-
chatgpt: { url: 'https://chat.openai.com', loadTime: 4, responseTime: 15 },
|
|
626
|
-
claude: { url: 'https://claude.ai', loadTime: 4, responseTime: 15 },
|
|
627
|
-
google: { url: 'https://www.google.com', loadTime: 2, responseTime: 3 },
|
|
628
|
-
bing: { url: 'https://www.bing.com', loadTime: 2, responseTime: 3 },
|
|
629
|
-
bard: { url: 'https://bard.google.com', loadTime: 3, responseTime: 12 },
|
|
630
|
-
copilot: { url: 'https://copilot.microsoft.com', loadTime: 3, responseTime: 12 },
|
|
631
|
-
};
|
|
632
|
-
|
|
633
|
-
const siteConfig = sites[site.toLowerCase()] || { url: `https://${site}`, loadTime: 3, responseTime: 10 };
|
|
634
|
-
|
|
635
|
-
// Open the site
|
|
636
|
-
if (process.platform === 'win32') {
|
|
637
|
-
await runCommand(`start "" "${siteConfig.url}"`, 5000);
|
|
638
|
-
} else if (process.platform === 'darwin') {
|
|
639
|
-
await runCommand(`open "${siteConfig.url}"`, 5000);
|
|
640
|
-
} else {
|
|
641
|
-
await runCommand(`xdg-open "${siteConfig.url}"`, 5000);
|
|
642
|
-
}
|
|
643
|
-
|
|
644
|
-
// Wait for page to load
|
|
645
|
-
await sleep(siteConfig.loadTime * 1000);
|
|
646
|
-
|
|
647
|
-
// Type the question (most sites have autofocus on search/input)
|
|
648
|
-
await computer.typeText(question);
|
|
649
|
-
await sleep(300);
|
|
650
|
-
|
|
651
|
-
// Press Enter to submit
|
|
652
|
-
await computer.pressKey('Return');
|
|
653
|
-
|
|
654
|
-
// Wait for AI to generate response
|
|
655
|
-
await sleep(siteConfig.responseTime * 1000);
|
|
656
|
-
|
|
657
|
-
// Capture multiple screenshots by scrolling to get full response
|
|
658
|
-
const extractedParts: string[] = [];
|
|
659
|
-
const maxScrolls = 5; // Maximum number of scroll captures
|
|
660
|
-
|
|
661
|
-
for (let scrollIndex = 0; scrollIndex < maxScrolls; scrollIndex++) {
|
|
662
|
-
// Capture current view
|
|
663
|
-
const screenResult = await describeScreen();
|
|
664
|
-
|
|
665
|
-
// Ask AI to extract just the response text from what it sees
|
|
666
|
-
const extractPrompt = `You are looking at screenshot ${scrollIndex + 1} of ${site}. The user asked: "${question}"
|
|
663
|
+
// Check if site is a supported AI chat
|
|
664
|
+
const supportedSites = ['perplexity', 'chatgpt', 'claude', 'copilot', 'google'];
|
|
665
|
+
const siteLower = site.toLowerCase();
|
|
667
666
|
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
- Any disclaimers, suggestions, or "related questions"
|
|
672
|
-
- Any "Sources" or citation links
|
|
673
|
-
- Any text you already extracted (avoid duplicates)
|
|
667
|
+
if (supportedSites.includes(siteLower)) {
|
|
668
|
+
// Use Playwright's AI chat helper
|
|
669
|
+
const result = await browser.askAI(siteLower as any, question, true);
|
|
674
670
|
|
|
675
|
-
|
|
671
|
+
// If response seems short, try getting full response by scrolling
|
|
672
|
+
if (result.response.length < 500) {
|
|
673
|
+
const fullParts = await browser.getFullAIResponse(siteLower as any, 5);
|
|
674
|
+
if (fullParts.length > 0) {
|
|
675
|
+
step.result = `📝 ${site.charAt(0).toUpperCase() + site.slice(1)} says:\n\n${fullParts.join('\n\n')}`;
|
|
676
|
+
break;
|
|
677
|
+
}
|
|
678
|
+
}
|
|
676
679
|
|
|
677
|
-
|
|
680
|
+
step.result = `📝 ${site.charAt(0).toUpperCase() + site.slice(1)} says:\n\n${result.response}`;
|
|
681
|
+
} else {
|
|
682
|
+
// Generic site - open and type
|
|
683
|
+
await browser.navigateTo(`https://${site}`);
|
|
684
|
+
await sleep(2000);
|
|
678
685
|
|
|
679
|
-
|
|
680
|
-
const
|
|
686
|
+
// Try to find and fill any input
|
|
687
|
+
const page = await browser.getPage();
|
|
688
|
+
const inputs = ['textarea', 'input[type="text"]', 'input[type="search"]', '[contenteditable="true"]'];
|
|
681
689
|
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
// Check for "no response" indicators
|
|
688
|
-
if (extracted.toLowerCase().includes('response not ready') ||
|
|
689
|
-
extracted.toLowerCase().includes('no response visible') ||
|
|
690
|
-
extracted.toLowerCase().includes('no additional text')) {
|
|
691
|
-
if (scrollIndex === 0) {
|
|
692
|
-
extractedParts.push('Response not ready yet or page still loading.');
|
|
690
|
+
for (const selector of inputs) {
|
|
691
|
+
if (await browser.elementExists(selector)) {
|
|
692
|
+
await browser.typeInElement(selector, question);
|
|
693
|
+
await browser.pressKey('Enter');
|
|
694
|
+
break;
|
|
693
695
|
}
|
|
694
|
-
break;
|
|
695
696
|
}
|
|
696
697
|
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
await computer.scrollMouse(-5); // Scroll down
|
|
701
|
-
await sleep(1000); // Wait for scroll animation
|
|
698
|
+
await sleep(5000);
|
|
699
|
+
const pageText = await browser.getPageText();
|
|
700
|
+
step.result = `📝 Response from ${site}:\n\n${pageText.slice(0, 3000)}`;
|
|
702
701
|
}
|
|
703
|
-
|
|
704
|
-
// Combine all extracted parts
|
|
705
|
-
const fullResponse = extractedParts.join('\n\n');
|
|
706
|
-
step.result = `📝 ${site.charAt(0).toUpperCase() + site.slice(1)} says:\n\n${fullResponse}`;
|
|
707
702
|
break;
|
|
708
703
|
}
|
|
709
704
|
|
|
@@ -713,181 +708,73 @@ Just give me the actual answer text, word for word as it appears. If there's no
|
|
|
713
708
|
break;
|
|
714
709
|
|
|
715
710
|
case 'web_search': {
|
|
716
|
-
//
|
|
717
|
-
|
|
718
|
-
await computer.keyCombo(['meta', 'r']); // Win+R
|
|
719
|
-
await sleep(500);
|
|
720
|
-
await computer.typeText('chrome'); // Try Chrome first
|
|
721
|
-
await computer.pressKey('Return');
|
|
722
|
-
await sleep(2000);
|
|
723
|
-
|
|
724
|
-
// Go to Google (Ctrl+L to focus address bar)
|
|
725
|
-
await computer.keyCombo(['control', 'l']);
|
|
726
|
-
await sleep(300);
|
|
727
|
-
await computer.typeText('google.com');
|
|
728
|
-
await computer.pressKey('Return');
|
|
729
|
-
await sleep(2000);
|
|
711
|
+
// Use Playwright for reliable web search
|
|
712
|
+
const searchResults = await browser.webSearch(params, 'google');
|
|
730
713
|
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
const searchScreen = await describeScreen();
|
|
739
|
-
const searchExtract = await chat([{
|
|
740
|
-
role: 'user',
|
|
741
|
-
content: `Extract the top search results from this Google search page. For each result, include:
|
|
742
|
-
- Title
|
|
743
|
-
- Brief snippet/description
|
|
744
|
-
- URL if visible
|
|
745
|
-
|
|
746
|
-
Format as a numbered list. Be concise.`
|
|
747
|
-
}]);
|
|
748
|
-
|
|
749
|
-
step.result = `🔍 Search results for "${params}":\n\n${searchExtract.content}`;
|
|
714
|
+
if (searchResults.length > 0) {
|
|
715
|
+
step.result = `🔍 Search results for "${params}":\n\n${searchResults.map((r, i) => `${i + 1}. ${r}`).join('\n')}`;
|
|
716
|
+
} else {
|
|
717
|
+
// Fallback: get page text
|
|
718
|
+
const pageText = await browser.getPageText();
|
|
719
|
+
step.result = `🔍 Search results for "${params}":\n\n${pageText.slice(0, 2000)}`;
|
|
720
|
+
}
|
|
750
721
|
break;
|
|
751
722
|
}
|
|
752
723
|
|
|
753
724
|
case 'send_email': {
|
|
754
|
-
//
|
|
725
|
+
// Use Playwright for reliable email sending
|
|
755
726
|
// Format: send_email:provider|to|subject|body
|
|
756
727
|
const [provider, to, subject, ...bodyParts] = params.split('|');
|
|
757
728
|
const body = bodyParts.join('|');
|
|
758
729
|
|
|
759
|
-
|
|
760
|
-
await computer.keyCombo(['meta', 'r']); // Win+R
|
|
761
|
-
await sleep(500);
|
|
762
|
-
await computer.typeText('chrome');
|
|
763
|
-
await computer.pressKey('Return');
|
|
764
|
-
await sleep(2000);
|
|
765
|
-
|
|
766
|
-
// Navigate to email service
|
|
767
|
-
await computer.keyCombo(['control', 'l']); // Focus address bar
|
|
768
|
-
await sleep(300);
|
|
730
|
+
const emailData = { to, subject, body };
|
|
769
731
|
|
|
732
|
+
let success = false;
|
|
770
733
|
if (provider.toLowerCase() === 'gmail') {
|
|
771
|
-
await
|
|
772
|
-
await computer.pressKey('Return');
|
|
773
|
-
await sleep(4000); // Wait for Gmail to load
|
|
774
|
-
|
|
775
|
-
// Click Compose button (use keyboard shortcut 'c')
|
|
776
|
-
await computer.typeText('c'); // Gmail shortcut for compose
|
|
777
|
-
await sleep(2000); // Wait for compose window
|
|
778
|
-
|
|
779
|
-
// Fill in fields
|
|
780
|
-
await computer.typeText(to); // To field is focused
|
|
781
|
-
await sleep(300);
|
|
782
|
-
await computer.pressKey('Tab'); // Move to subject
|
|
783
|
-
await sleep(200);
|
|
784
|
-
await computer.typeText(subject);
|
|
785
|
-
await sleep(300);
|
|
786
|
-
await computer.pressKey('Tab'); // Move to body
|
|
787
|
-
await sleep(200);
|
|
788
|
-
await computer.typeText(body);
|
|
789
|
-
await sleep(500);
|
|
790
|
-
|
|
791
|
-
// Send with Ctrl+Enter
|
|
792
|
-
await computer.keyCombo(['control', 'Return']);
|
|
793
|
-
|
|
734
|
+
success = await browser.sendGmail(emailData);
|
|
794
735
|
} else if (provider.toLowerCase() === 'outlook') {
|
|
795
|
-
await
|
|
796
|
-
await computer.pressKey('Return');
|
|
797
|
-
await sleep(4000); // Wait for Outlook to load
|
|
798
|
-
|
|
799
|
-
// Click New mail (use keyboard shortcut 'n')
|
|
800
|
-
await computer.typeText('n'); // Outlook shortcut for new mail
|
|
801
|
-
await sleep(2000);
|
|
802
|
-
|
|
803
|
-
// Fill in fields
|
|
804
|
-
await computer.typeText(to);
|
|
805
|
-
await sleep(300);
|
|
806
|
-
await computer.pressKey('Tab');
|
|
807
|
-
await sleep(200);
|
|
808
|
-
await computer.typeText(subject);
|
|
809
|
-
await sleep(300);
|
|
810
|
-
await computer.pressKey('Tab');
|
|
811
|
-
await sleep(200);
|
|
812
|
-
await computer.typeText(body);
|
|
813
|
-
await sleep(500);
|
|
814
|
-
|
|
815
|
-
// Send with Ctrl+Enter
|
|
816
|
-
await computer.keyCombo(['control', 'Return']);
|
|
736
|
+
success = await browser.sendOutlook(emailData);
|
|
817
737
|
} else {
|
|
818
738
|
throw new Error(`Unsupported email provider: ${provider}. Use gmail or outlook.`);
|
|
819
739
|
}
|
|
820
740
|
|
|
821
|
-
|
|
822
|
-
|
|
741
|
+
if (success) {
|
|
742
|
+
step.result = `📧 Email sent via ${provider} to ${to}`;
|
|
743
|
+
} else {
|
|
744
|
+
throw new Error(`Failed to send email via ${provider}. Make sure you're logged in.`);
|
|
745
|
+
}
|
|
823
746
|
break;
|
|
824
747
|
}
|
|
825
748
|
|
|
826
749
|
case 'google_sheets': {
|
|
827
|
-
//
|
|
750
|
+
// Use Playwright for Google Sheets
|
|
828
751
|
// Format: google_sheets:command|arg1|arg2...
|
|
829
752
|
const [sheetCmd, ...sheetArgs] = params.split('|');
|
|
830
753
|
|
|
831
754
|
switch (sheetCmd.toLowerCase()) {
|
|
832
755
|
case 'new': {
|
|
833
756
|
const sheetName = sheetArgs[0] || 'Untitled spreadsheet';
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
await computer.keyCombo(['meta', 'r']);
|
|
837
|
-
await sleep(500);
|
|
838
|
-
await computer.typeText('chrome');
|
|
839
|
-
await computer.pressKey('Return');
|
|
840
|
-
await sleep(2000);
|
|
841
|
-
|
|
842
|
-
await computer.keyCombo(['control', 'l']);
|
|
843
|
-
await sleep(300);
|
|
844
|
-
await computer.typeText('sheets.google.com');
|
|
845
|
-
await computer.pressKey('Return');
|
|
846
|
-
await sleep(3000);
|
|
847
|
-
|
|
848
|
-
// Click "Blank" to create new (or use keyboard)
|
|
849
|
-
// Usually there's a + or Blank option, let's try clicking near top
|
|
850
|
-
await computer.pressKey('Tab'); // Navigate
|
|
851
|
-
await computer.pressKey('Tab');
|
|
852
|
-
await computer.pressKey('Return'); // Create blank
|
|
853
|
-
await sleep(3000);
|
|
854
|
-
|
|
855
|
-
// Rename: click on title or use File > Rename
|
|
856
|
-
await computer.keyCombo(['alt', 'f']); // File menu
|
|
857
|
-
await sleep(500);
|
|
858
|
-
await computer.typeText('r'); // Rename option
|
|
859
|
-
await sleep(500);
|
|
860
|
-
await computer.keyCombo(['control', 'a']); // Select all
|
|
861
|
-
await computer.typeText(sheetName);
|
|
862
|
-
await computer.pressKey('Return');
|
|
863
|
-
await sleep(500);
|
|
864
|
-
await computer.pressKey('Escape'); // Close any dialog
|
|
865
|
-
|
|
757
|
+
await browser.navigateTo('https://docs.google.com/spreadsheets/create');
|
|
758
|
+
await sleep(5000);
|
|
866
759
|
step.result = `📊 Created Google Sheet: ${sheetName}`;
|
|
867
760
|
break;
|
|
868
761
|
}
|
|
869
762
|
case 'type': {
|
|
870
763
|
const cell = sheetArgs[0] || 'A1';
|
|
871
764
|
const cellValue = sheetArgs.slice(1).join('|');
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
await computer.typeText(cell);
|
|
877
|
-
await computer.pressKey('Return');
|
|
878
|
-
await sleep(300);
|
|
879
|
-
|
|
880
|
-
// Type the value
|
|
881
|
-
await computer.typeText(cellValue);
|
|
882
|
-
await computer.pressKey('Return'); // Confirm and move down
|
|
883
|
-
await sleep(200);
|
|
884
|
-
|
|
885
|
-
step.result = `📊 Typed "${cellValue}" in cell ${cell}`;
|
|
765
|
+
const success = await browser.googleSheetsType([{ cell, value: cellValue }]);
|
|
766
|
+
step.result = success
|
|
767
|
+
? `📊 Typed "${cellValue}" in cell ${cell}`
|
|
768
|
+
: `📊 Could not type in cell ${cell}`;
|
|
886
769
|
break;
|
|
887
770
|
}
|
|
888
771
|
case 'read': {
|
|
889
|
-
const
|
|
890
|
-
|
|
772
|
+
const screenshot = await browser.takeScreenshot();
|
|
773
|
+
const analysis = await chat([{
|
|
774
|
+
role: 'user',
|
|
775
|
+
content: 'Describe the contents of this Google Sheet. List visible data in the cells.'
|
|
776
|
+
}]);
|
|
777
|
+
step.result = `📊 Current sheet view:\n${analysis.content}`;
|
|
891
778
|
break;
|
|
892
779
|
}
|
|
893
780
|
default:
|
|
@@ -897,52 +784,25 @@ Format as a numbered list. Be concise.`
|
|
|
897
784
|
}
|
|
898
785
|
|
|
899
786
|
case 'google_docs': {
|
|
900
|
-
//
|
|
787
|
+
// Use Playwright for Google Docs
|
|
901
788
|
// Format: google_docs:command|arg1|arg2...
|
|
902
789
|
const [docCmd, ...docArgs] = params.split('|');
|
|
903
790
|
|
|
904
791
|
switch (docCmd.toLowerCase()) {
|
|
905
792
|
case 'new': {
|
|
906
793
|
const docName = docArgs[0] || 'Untitled document';
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
await computer.typeText('chrome');
|
|
912
|
-
await computer.pressKey('Return');
|
|
913
|
-
await sleep(2000);
|
|
914
|
-
|
|
915
|
-
await computer.keyCombo(['control', 'l']);
|
|
916
|
-
await sleep(300);
|
|
917
|
-
await computer.typeText('docs.google.com');
|
|
918
|
-
await computer.pressKey('Return');
|
|
919
|
-
await sleep(3000);
|
|
920
|
-
|
|
921
|
-
// Click "Blank" to create new
|
|
922
|
-
await computer.pressKey('Tab');
|
|
923
|
-
await computer.pressKey('Tab');
|
|
924
|
-
await computer.pressKey('Return');
|
|
925
|
-
await sleep(3000);
|
|
926
|
-
|
|
927
|
-
// Rename using File > Rename
|
|
928
|
-
await computer.keyCombo(['alt', 'f']); // File menu
|
|
929
|
-
await sleep(500);
|
|
930
|
-
await computer.typeText('r'); // Rename
|
|
931
|
-
await sleep(500);
|
|
932
|
-
await computer.keyCombo(['control', 'a']); // Select all
|
|
933
|
-
await computer.typeText(docName);
|
|
934
|
-
await computer.pressKey('Return');
|
|
935
|
-
await sleep(500);
|
|
936
|
-
await computer.pressKey('Escape'); // Close dialog, focus doc
|
|
937
|
-
|
|
938
|
-
step.result = `📄 Created Google Doc: ${docName}`;
|
|
794
|
+
const success = await browser.googleDocsType('');
|
|
795
|
+
step.result = success
|
|
796
|
+
? `📄 Created Google Doc: ${docName}`
|
|
797
|
+
: `📄 Could not create Google Doc`;
|
|
939
798
|
break;
|
|
940
799
|
}
|
|
941
800
|
case 'type': {
|
|
942
801
|
const docText = docArgs.join('|');
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
802
|
+
const success = await browser.googleDocsType(docText);
|
|
803
|
+
step.result = success
|
|
804
|
+
? `📄 Typed content in Google Doc`
|
|
805
|
+
: `📄 Could not type in Google Doc`;
|
|
946
806
|
break;
|
|
947
807
|
}
|
|
948
808
|
default:
|
|
@@ -952,83 +812,23 @@ Format as a numbered list. Be concise.`
|
|
|
952
812
|
}
|
|
953
813
|
|
|
954
814
|
case 'research': {
|
|
955
|
-
//
|
|
815
|
+
// Use Playwright for multi-step research
|
|
956
816
|
const researchQuery = params;
|
|
957
|
-
const researchResults: string[] = [];
|
|
958
817
|
|
|
959
|
-
//
|
|
960
|
-
await
|
|
961
|
-
await sleep(500);
|
|
962
|
-
await computer.typeText('chrome');
|
|
963
|
-
await computer.pressKey('Return');
|
|
964
|
-
await sleep(2000);
|
|
965
|
-
|
|
966
|
-
await computer.keyCombo(['control', 'l']); // Focus address bar
|
|
967
|
-
await sleep(300);
|
|
968
|
-
await computer.typeText('google.com');
|
|
969
|
-
await computer.pressKey('Return');
|
|
970
|
-
await sleep(2000);
|
|
818
|
+
// Use browser.research which handles search, clicking, gathering
|
|
819
|
+
const researchData = await browser.research(researchQuery, 3);
|
|
971
820
|
|
|
972
|
-
//
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
821
|
+
// Format sources
|
|
822
|
+
const sourceSummaries = researchData.sources.map((s, i) =>
|
|
823
|
+
`Source ${i + 1}: ${s.title}\n${s.content.slice(0, 500)}...`
|
|
824
|
+
).join('\n\n');
|
|
976
825
|
|
|
977
|
-
//
|
|
978
|
-
let searchScreen = await describeScreen();
|
|
979
|
-
const initialResults = await chat([{
|
|
980
|
-
role: 'user',
|
|
981
|
-
content: `Extract the key information from these Google search results about: "${researchQuery}"
|
|
982
|
-
Include any relevant facts, numbers, dates, or key points visible. Be thorough but concise.`
|
|
983
|
-
}]);
|
|
984
|
-
researchResults.push(`Search Results:\n${initialResults.content}`);
|
|
985
|
-
|
|
986
|
-
// Step 2: Click on first result (Tab to navigate, Enter to click)
|
|
987
|
-
await computer.pressKey('Tab');
|
|
988
|
-
await sleep(200);
|
|
989
|
-
await computer.pressKey('Tab');
|
|
990
|
-
await sleep(200);
|
|
991
|
-
await computer.pressKey('Return'); // Click first result
|
|
992
|
-
await sleep(4000); // Wait for page load
|
|
993
|
-
|
|
994
|
-
// Extract content from the page
|
|
995
|
-
searchScreen = await describeScreen();
|
|
996
|
-
const pageContent = await chat([{
|
|
997
|
-
role: 'user',
|
|
998
|
-
content: `Extract the main content and key information from this webpage about: "${researchQuery}"
|
|
999
|
-
Ignore ads, navigation, footers. Focus on the actual article/content.`
|
|
1000
|
-
}]);
|
|
1001
|
-
researchResults.push(`\nSource 1 Content:\n${pageContent.content}`);
|
|
1002
|
-
|
|
1003
|
-
// Step 3: Go back (Alt+Left) and check another source
|
|
1004
|
-
await computer.keyCombo(['alt', 'Left']); // Browser back
|
|
1005
|
-
await sleep(2000);
|
|
1006
|
-
|
|
1007
|
-
// Scroll down a bit to see more results
|
|
1008
|
-
await computer.scrollMouse(-3);
|
|
1009
|
-
await sleep(500);
|
|
1010
|
-
|
|
1011
|
-
// Navigate to second result
|
|
1012
|
-
await computer.pressKey('Tab');
|
|
1013
|
-
await computer.pressKey('Tab');
|
|
1014
|
-
await computer.pressKey('Tab');
|
|
1015
|
-
await computer.pressKey('Return');
|
|
1016
|
-
await sleep(4000);
|
|
1017
|
-
|
|
1018
|
-
searchScreen = await describeScreen();
|
|
1019
|
-
const pageContent2 = await chat([{
|
|
1020
|
-
role: 'user',
|
|
1021
|
-
content: `Extract additional information from this webpage about: "${researchQuery}"
|
|
1022
|
-
Look for details not covered in the previous source.`
|
|
1023
|
-
}]);
|
|
1024
|
-
researchResults.push(`\nSource 2 Content:\n${pageContent2.content}`);
|
|
1025
|
-
|
|
1026
|
-
// Step 4: Synthesize all gathered information
|
|
826
|
+
// Ask AI to synthesize
|
|
1027
827
|
const synthesis = await chat([{
|
|
1028
828
|
role: 'user',
|
|
1029
829
|
content: `Based on the following research gathered about "${researchQuery}", provide a comprehensive summary:
|
|
1030
830
|
|
|
1031
|
-
${
|
|
831
|
+
${sourceSummaries}
|
|
1032
832
|
|
|
1033
833
|
Create a well-organized summary with:
|
|
1034
834
|
1. Key findings
|
|
@@ -1043,6 +843,190 @@ Be thorough but concise.`
|
|
|
1043
843
|
break;
|
|
1044
844
|
}
|
|
1045
845
|
|
|
846
|
+
case 'ask_llm': {
|
|
847
|
+
// Use Playwright to ask another LLM for help with a screenshot
|
|
848
|
+
// Format: ask_llm:llm_name|question
|
|
849
|
+
const [llmName, ...questionParts] = params.split('|');
|
|
850
|
+
const question = questionParts.join('|');
|
|
851
|
+
|
|
852
|
+
// Take screenshot first to describe current context
|
|
853
|
+
const currentScreen = await describeScreen();
|
|
854
|
+
|
|
855
|
+
// Compose the question with screen context
|
|
856
|
+
const fullQuestion = `I'm looking at my screen and I need help. ${question}\n\nHere's what I see on my screen: ${currentScreen.description}`;
|
|
857
|
+
|
|
858
|
+
// Supported LLMs
|
|
859
|
+
const supportedLLMs = ['perplexity', 'chatgpt', 'claude', 'copilot'];
|
|
860
|
+
const llmLower = llmName.toLowerCase();
|
|
861
|
+
|
|
862
|
+
if (!supportedLLMs.includes(llmLower)) {
|
|
863
|
+
throw new Error(`Unknown LLM: ${llmName}. Supported: ${supportedLLMs.join(', ')}`);
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
// Use Playwright's AI chat helper
|
|
867
|
+
const result = await browser.askAI(llmLower as any, fullQuestion, false);
|
|
868
|
+
|
|
869
|
+
// Get full response if needed
|
|
870
|
+
const fullParts = await browser.getFullAIResponse(llmLower as any, 3);
|
|
871
|
+
const finalResponse = fullParts.length > 0 ? fullParts.join('\n\n') : result.response;
|
|
872
|
+
|
|
873
|
+
step.result = `🤖 ${llmName} says:\n\n${finalResponse}`;
|
|
874
|
+
break;
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
case 'learn_ui': {
|
|
878
|
+
// Take screenshot and analyze the UI to learn how to interact
|
|
879
|
+
const uiScreen = await describeScreen();
|
|
880
|
+
|
|
881
|
+
const uiAnalysis = await chat([{
|
|
882
|
+
role: 'user',
|
|
883
|
+
content: `Analyze this screenshot and identify all interactive UI elements. List:
|
|
884
|
+
1. All clickable buttons and their likely functions
|
|
885
|
+
2. Text input fields
|
|
886
|
+
3. Menus and dropdowns
|
|
887
|
+
4. Links
|
|
888
|
+
5. Any keyboard shortcuts visible
|
|
889
|
+
6. The main actions available in this interface
|
|
890
|
+
|
|
891
|
+
Question: ${params}
|
|
892
|
+
|
|
893
|
+
Be specific about locations (top-left, center, etc.) and what each element does.`
|
|
894
|
+
}]);
|
|
895
|
+
|
|
896
|
+
step.result = `🔍 UI Analysis:\n\n${uiAnalysis.content}`;
|
|
897
|
+
break;
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
case 'adaptive_do': {
|
|
901
|
+
// Adaptive agent using Playwright: try to accomplish something, ask LLMs if stuck
|
|
902
|
+
const goal = params;
|
|
903
|
+
const maxAttempts = 5;
|
|
904
|
+
const actionHistory: string[] = [];
|
|
905
|
+
let accomplished = false;
|
|
906
|
+
|
|
907
|
+
// Initialize browser
|
|
908
|
+
const page = await browser.getPage();
|
|
909
|
+
|
|
910
|
+
for (let attempt = 0; attempt < maxAttempts && !accomplished; attempt++) {
|
|
911
|
+
// Take screenshot and analyze current state
|
|
912
|
+
const screenshot = await browser.takeScreenshot();
|
|
913
|
+
const currentState = await chat([{
|
|
914
|
+
role: 'user',
|
|
915
|
+
content: `Describe what you see on this screen. What app/website is it? What elements are visible?`
|
|
916
|
+
}]);
|
|
917
|
+
|
|
918
|
+
// Ask our AI what to do next
|
|
919
|
+
const nextAction = await chat([{
|
|
920
|
+
role: 'user',
|
|
921
|
+
content: `GOAL: ${goal}
|
|
922
|
+
|
|
923
|
+
CURRENT SCREEN: ${currentState.content}
|
|
924
|
+
|
|
925
|
+
PREVIOUS ACTIONS TAKEN:
|
|
926
|
+
${actionHistory.length > 0 ? actionHistory.join('\n') : 'None yet'}
|
|
927
|
+
|
|
928
|
+
Based on what you see, what's the SINGLE next action to take?
|
|
929
|
+
Options:
|
|
930
|
+
- click: Click element (describe CSS selector or visible text)
|
|
931
|
+
- type: Type something (specify selector and text)
|
|
932
|
+
- press: Press a key (specify key)
|
|
933
|
+
- scroll: Scroll up/down
|
|
934
|
+
- navigate: Go to URL
|
|
935
|
+
- done: Goal is accomplished
|
|
936
|
+
- stuck: Can't figure out what to do
|
|
937
|
+
|
|
938
|
+
Respond in format:
|
|
939
|
+
ACTION: <action_type>
|
|
940
|
+
SELECTOR: <css selector or text to find>
|
|
941
|
+
VALUE: <text to type or URL>
|
|
942
|
+
REASONING: <why>`
|
|
943
|
+
}]);
|
|
944
|
+
|
|
945
|
+
const actionContent = nextAction.content;
|
|
946
|
+
|
|
947
|
+
// Parse the action
|
|
948
|
+
const actionMatch = actionContent.match(/ACTION:\s*(\w+)/i);
|
|
949
|
+
const selectorMatch = actionContent.match(/SELECTOR:\s*(.+?)(?:\n|$)/i);
|
|
950
|
+
const valueMatch = actionContent.match(/VALUE:\s*(.+?)(?:\n|$)/i);
|
|
951
|
+
|
|
952
|
+
if (!actionMatch) {
|
|
953
|
+
actionHistory.push(`Attempt ${attempt + 1}: Couldn't parse action`);
|
|
954
|
+
continue;
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
const action = actionMatch[1].toLowerCase();
|
|
958
|
+
const selector = selectorMatch?.[1]?.trim() || '';
|
|
959
|
+
const value = valueMatch?.[1]?.trim() || '';
|
|
960
|
+
|
|
961
|
+
if (action === 'done') {
|
|
962
|
+
accomplished = true;
|
|
963
|
+
actionHistory.push(`Attempt ${attempt + 1}: Goal accomplished!`);
|
|
964
|
+
break;
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
if (action === 'stuck') {
|
|
968
|
+
// Ask Perplexity for help using Playwright
|
|
969
|
+
actionHistory.push(`Attempt ${attempt + 1}: Got stuck, asking Perplexity for help...`);
|
|
970
|
+
|
|
971
|
+
const helpRequest = `I'm trying to: ${goal}\n\nI'm stuck. What should I do next? Be specific about what to click or type.`;
|
|
972
|
+
const advice = await browser.askAI('perplexity', helpRequest, false);
|
|
973
|
+
actionHistory.push(`Got advice: ${advice.response.slice(0, 200)}...`);
|
|
974
|
+
|
|
975
|
+
// Navigate back to continue
|
|
976
|
+
await browser.navigateTo(page.url());
|
|
977
|
+
continue;
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
// Execute the action using Playwright
|
|
981
|
+
try {
|
|
982
|
+
switch (action) {
|
|
983
|
+
case 'click':
|
|
984
|
+
// Try to click by selector or text
|
|
985
|
+
if (selector) {
|
|
986
|
+
const clicked = await browser.clickElement(selector);
|
|
987
|
+
if (!clicked) {
|
|
988
|
+
// Try by text
|
|
989
|
+
await page.getByText(selector).first().click({ timeout: 5000 });
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
actionHistory.push(`Attempt ${attempt + 1}: Clicked "${selector}"`);
|
|
993
|
+
break;
|
|
994
|
+
case 'type':
|
|
995
|
+
if (selector && value) {
|
|
996
|
+
const typed = await browser.typeInElement(selector, value);
|
|
997
|
+
if (!typed) {
|
|
998
|
+
await page.getByPlaceholder(selector).first().fill(value);
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
actionHistory.push(`Attempt ${attempt + 1}: Typed "${value}" in "${selector}"`);
|
|
1002
|
+
break;
|
|
1003
|
+
case 'press':
|
|
1004
|
+
await browser.pressKey(value || selector);
|
|
1005
|
+
actionHistory.push(`Attempt ${attempt + 1}: Pressed ${value || selector}`);
|
|
1006
|
+
break;
|
|
1007
|
+
case 'scroll':
|
|
1008
|
+
await browser.scroll(value.toLowerCase().includes('up') ? 'up' : 'down');
|
|
1009
|
+
actionHistory.push(`Attempt ${attempt + 1}: Scrolled ${value || 'down'}`);
|
|
1010
|
+
break;
|
|
1011
|
+
case 'navigate':
|
|
1012
|
+
const url = value.startsWith('http') ? value : `https://${value}`;
|
|
1013
|
+
await browser.navigateTo(url);
|
|
1014
|
+
actionHistory.push(`Attempt ${attempt + 1}: Navigated to ${url}`);
|
|
1015
|
+
break;
|
|
1016
|
+
default:
|
|
1017
|
+
actionHistory.push(`Attempt ${attempt + 1}: Unknown action ${action}`);
|
|
1018
|
+
}
|
|
1019
|
+
} catch (e) {
|
|
1020
|
+
actionHistory.push(`Attempt ${attempt + 1}: Action failed - ${e}`);
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
await sleep(2000); // Wait for UI to update
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
step.result = `🎯 Adaptive Agent Result:\n\nGoal: ${goal}\nAccomplished: ${accomplished ? 'Yes ✅' : 'Partial/No ❌'}\n\nAction Log:\n${actionHistory.join('\n')}`;
|
|
1027
|
+
break;
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1046
1030
|
case 'chat':
|
|
1047
1031
|
// This is a fallback - just describe what user wants
|
|
1048
1032
|
step.result = `Task noted: ${params}`;
|