@projectservan8n/cnapse 0.6.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib/tasks.ts CHANGED
@@ -4,11 +4,12 @@
4
4
  * Uses chain-of-thought prompting + learning from past tasks
5
5
  */
6
6
 
7
- import { chat, Message } from './api.js';
7
+ import { chat, chatWithVision, Message } from './api.js';
8
8
  import * as computer from '../tools/computer.js';
9
- import { describeScreen } from './vision.js';
9
+ import { describeScreen, captureScreenshot } from './vision.js';
10
10
  import * as filesystem from '../tools/filesystem.js';
11
11
  import { runCommand } from '../tools/shell.js';
12
+ import * as browser from '../services/browser.js';
12
13
  import * as fs from 'fs';
13
14
  import * as path from 'path';
14
15
  import * as os from 'os';
@@ -211,7 +212,27 @@ Before outputting steps, THINK through these questions:
211
212
  ### Web Browsing
212
213
  - open_url: Open URL in default browser (e.g., "open_url:https://perplexity.ai")
213
214
  - browse_and_ask: Open AI website, type question, wait for response (e.g., "browse_and_ask:perplexity|What is the capital of France?")
214
- - browse_and_ask: Supports: perplexity, chatgpt, claude, google
215
+ - browse_and_ask: Supports: perplexity, chatgpt, claude, google, copilot, bard
216
+ - web_search: Search Google and extract results (e.g., "web_search:best restaurants in NYC")
217
+
218
+ ### Email
219
+ - send_email: Send email via Gmail or Outlook web (e.g., "send_email:gmail|to@email.com|Subject|Body text here")
220
+ - send_email: Supports: gmail, outlook
221
+
222
+ ### Google Apps (via browser)
223
+ - google_sheets: Interact with Google Sheets (e.g., "google_sheets:new|My Spreadsheet" or "google_sheets:type|A1|Hello World")
224
+ - google_sheets: Commands: new (create), open (open existing), type (type in cell), read (screenshot current view)
225
+ - google_docs: Interact with Google Docs (e.g., "google_docs:new|My Document" or "google_docs:type|Hello World")
226
+ - google_docs: Commands: new (create), open (open existing), type (type text)
227
+
228
+ ### Research
229
+ - research: Multi-step web research - searches, gathers info, summarizes (e.g., "research:What are the latest AI trends in 2024?")
230
+
231
+ ### Adaptive/Learning
232
+ - ask_llm: Ask another LLM for help with a screenshot (e.g., "ask_llm:perplexity|How do I do X in this app?")
233
+ - ask_llm: Supports: perplexity, chatgpt, claude, copilot - sends screenshot + question, gets answer
234
+ - adaptive_do: Try to accomplish something, if stuck ask LLMs for help (e.g., "adaptive_do:book a flight to NYC on kayak.com")
235
+ - learn_ui: Take screenshot and learn how to interact with current UI (e.g., "learn_ui:What buttons can I click here?")
215
236
 
216
237
  ### Utility
217
238
  - wait: Wait N seconds (e.g., "wait:2" - use 1-3s for app loads)
@@ -302,14 +323,95 @@ Output:
302
323
  ### Example 7: "search google for weather today"
303
324
  Thinking:
304
325
  - Goal: Open Google and search for something
305
- - How: Use browse_and_ask with google target
306
- - Sequence: Open Google, search, capture results
326
+ - How: Use web_search for quick results extraction
327
+ - Sequence: Search and get results
307
328
 
308
329
  Output:
309
330
  [
310
- { "description": "Search Google", "action": "browse_and_ask:google|weather today" },
311
- { "description": "Wait for results", "action": "wait:2" },
312
- { "description": "Capture search results", "action": "screenshot" }
331
+ { "description": "Search Google for weather", "action": "web_search:weather today" }
332
+ ]
333
+
334
+ ### Example 8: "send an email to john@example.com about the meeting tomorrow"
335
+ Thinking:
336
+ - Goal: Compose and send an email via Gmail
337
+ - How: Use send_email with gmail, recipient, subject, body
338
+ - Sequence: Open Gmail, compose, fill fields, send
339
+
340
+ Output:
341
+ [
342
+ { "description": "Send email via Gmail", "action": "send_email:gmail|john@example.com|Meeting Tomorrow|Hi John, this is a reminder about our meeting tomorrow. Please let me know if you have any questions." }
343
+ ]
344
+
345
+ ### Example 9: "create a new google sheet called Sales Report and add headers"
346
+ Thinking:
347
+ - Goal: Create a new Google Sheet and add content
348
+ - How: Use google_sheets to create new, then type in cells
349
+ - Sequence: Create sheet -> Navigate to cells -> Type headers
350
+
351
+ Output:
352
+ [
353
+ { "description": "Create new Google Sheet", "action": "google_sheets:new|Sales Report" },
354
+ { "description": "Wait for sheet to load", "action": "wait:3" },
355
+ { "description": "Type header in A1", "action": "google_sheets:type|A1|Product" },
356
+ { "description": "Type header in B1", "action": "google_sheets:type|B1|Quantity" },
357
+ { "description": "Type header in C1", "action": "google_sheets:type|C1|Price" }
358
+ ]
359
+
360
+ ### Example 10: "research the latest news about AI regulations"
361
+ Thinking:
362
+ - Goal: Do multi-step research on a topic
363
+ - How: Use research action which handles searching, gathering, summarizing
364
+ - Sequence: Single research action does it all
365
+
366
+ Output:
367
+ [
368
+ { "description": "Research AI regulations news", "action": "research:latest news about AI regulations 2024" }
369
+ ]
370
+
371
+ ### Example 11: "write a document in google docs about project status"
372
+ Thinking:
373
+ - Goal: Create a Google Doc and write content
374
+ - How: Use google_docs to create and type
375
+ - Sequence: Create doc -> Type content
376
+
377
+ Output:
378
+ [
379
+ { "description": "Create new Google Doc", "action": "google_docs:new|Project Status Report" },
380
+ { "description": "Wait for doc to load", "action": "wait:3" },
381
+ { "description": "Type the content", "action": "google_docs:type|Project Status Report\n\nDate: Today\n\nSummary:\nThe project is on track. All milestones have been met.\n\nNext Steps:\n- Complete testing\n- Deploy to production" }
382
+ ]
383
+
384
+ ### Example 12: "I don't know how to use this app, can you figure it out?"
385
+ Thinking:
386
+ - Goal: Learn the current UI and understand how to use it
387
+ - How: Use learn_ui to take screenshot and analyze
388
+ - Sequence: Screenshot -> AI analysis -> report back
389
+
390
+ Output:
391
+ [
392
+ { "description": "Analyze current UI", "action": "learn_ui:What are all the buttons, menus, and interactive elements I can use?" }
393
+ ]
394
+
395
+ ### Example 13: "book a hotel on booking.com for next weekend"
396
+ Thinking:
397
+ - Goal: Complex task on unfamiliar website - need adaptive approach
398
+ - How: Use adaptive_do which will try, and if stuck ask LLMs for help
399
+ - Sequence: Single adaptive action handles the complexity
400
+
401
+ Output:
402
+ [
403
+ { "description": "Adaptively book hotel", "action": "adaptive_do:Go to booking.com and book a hotel for next weekend" }
404
+ ]
405
+
406
+ ### Example 14: "I'm stuck, ask Claude how to proceed"
407
+ Thinking:
408
+ - Goal: Get help from another LLM with current screen context
409
+ - How: Use ask_llm with claude and send screenshot
410
+ - Sequence: Screenshot + question -> Get answer
411
+
412
+ Output:
413
+ [
414
+ { "description": "Ask Claude for help with screenshot", "action": "ask_llm:claude|I'm stuck on this screen. What should I do next to accomplish my task?" }
313
415
  ]
314
416
 
315
417
  ## YOUR TASK
@@ -554,102 +656,377 @@ ${existingResult.output}`;
554
656
 
555
657
  case 'browse_and_ask': {
556
658
  // Format: browse_and_ask:site|question
659
+ // Using Playwright for reliable browser automation
557
660
  const [site, ...questionParts] = params.split('|');
558
661
  const question = questionParts.join('|');
559
662
 
560
- // Site-specific URLs and response wait times
561
- const sites: Record<string, { url: string; loadTime: number; responseTime: number }> = {
562
- perplexity: { url: 'https://www.perplexity.ai', loadTime: 3, responseTime: 10 },
563
- chatgpt: { url: 'https://chat.openai.com', loadTime: 4, responseTime: 15 },
564
- claude: { url: 'https://claude.ai', loadTime: 4, responseTime: 15 },
565
- google: { url: 'https://www.google.com', loadTime: 2, responseTime: 3 },
566
- bing: { url: 'https://www.bing.com', loadTime: 2, responseTime: 3 },
567
- bard: { url: 'https://bard.google.com', loadTime: 3, responseTime: 12 },
568
- copilot: { url: 'https://copilot.microsoft.com', loadTime: 3, responseTime: 12 },
569
- };
663
+ // Check if site is a supported AI chat
664
+ const supportedSites = ['perplexity', 'chatgpt', 'claude', 'copilot', 'google'];
665
+ const siteLower = site.toLowerCase();
570
666
 
571
- const siteConfig = sites[site.toLowerCase()] || { url: `https://${site}`, loadTime: 3, responseTime: 10 };
667
+ if (supportedSites.includes(siteLower)) {
668
+ // Use Playwright's AI chat helper
669
+ const result = await browser.askAI(siteLower as any, question, true);
572
670
 
573
- // Open the site
574
- if (process.platform === 'win32') {
575
- await runCommand(`start "" "${siteConfig.url}"`, 5000);
576
- } else if (process.platform === 'darwin') {
577
- await runCommand(`open "${siteConfig.url}"`, 5000);
671
+ // If response seems short, try getting full response by scrolling
672
+ if (result.response.length < 500) {
673
+ const fullParts = await browser.getFullAIResponse(siteLower as any, 5);
674
+ if (fullParts.length > 0) {
675
+ step.result = `📝 ${site.charAt(0).toUpperCase() + site.slice(1)} says:\n\n${fullParts.join('\n\n')}`;
676
+ break;
677
+ }
678
+ }
679
+
680
+ step.result = `📝 ${site.charAt(0).toUpperCase() + site.slice(1)} says:\n\n${result.response}`;
578
681
  } else {
579
- await runCommand(`xdg-open "${siteConfig.url}"`, 5000);
682
+ // Generic site - open and type
683
+ await browser.navigateTo(`https://${site}`);
684
+ await sleep(2000);
685
+
686
+ // Try to find and fill any input
687
+ const page = await browser.getPage();
688
+ const inputs = ['textarea', 'input[type="text"]', 'input[type="search"]', '[contenteditable="true"]'];
689
+
690
+ for (const selector of inputs) {
691
+ if (await browser.elementExists(selector)) {
692
+ await browser.typeInElement(selector, question);
693
+ await browser.pressKey('Enter');
694
+ break;
695
+ }
696
+ }
697
+
698
+ await sleep(5000);
699
+ const pageText = await browser.getPageText();
700
+ step.result = `📝 Response from ${site}:\n\n${pageText.slice(0, 3000)}`;
580
701
  }
702
+ break;
703
+ }
581
704
 
582
- // Wait for page to load
583
- await sleep(siteConfig.loadTime * 1000);
705
+ case 'screenshot':
706
+ const vision = await describeScreen();
707
+ step.result = vision.description;
708
+ break;
584
709
 
585
- // Type the question (most sites have autofocus on search/input)
586
- await computer.typeText(question);
587
- await sleep(300);
710
+ case 'web_search': {
711
+ // Use Playwright for reliable web search
712
+ const searchResults = await browser.webSearch(params, 'google');
588
713
 
589
- // Press Enter to submit
590
- await computer.pressKey('Return');
714
+ if (searchResults.length > 0) {
715
+ step.result = `🔍 Search results for "${params}":\n\n${searchResults.map((r, i) => `${i + 1}. ${r}`).join('\n')}`;
716
+ } else {
717
+ // Fallback: get page text
718
+ const pageText = await browser.getPageText();
719
+ step.result = `🔍 Search results for "${params}":\n\n${pageText.slice(0, 2000)}`;
720
+ }
721
+ break;
722
+ }
723
+
724
+ case 'send_email': {
725
+ // Use Playwright for reliable email sending
726
+ // Format: send_email:provider|to|subject|body
727
+ const [provider, to, subject, ...bodyParts] = params.split('|');
728
+ const body = bodyParts.join('|');
729
+
730
+ const emailData = { to, subject, body };
731
+
732
+ let success = false;
733
+ if (provider.toLowerCase() === 'gmail') {
734
+ success = await browser.sendGmail(emailData);
735
+ } else if (provider.toLowerCase() === 'outlook') {
736
+ success = await browser.sendOutlook(emailData);
737
+ } else {
738
+ throw new Error(`Unsupported email provider: ${provider}. Use gmail or outlook.`);
739
+ }
740
+
741
+ if (success) {
742
+ step.result = `📧 Email sent via ${provider} to ${to}`;
743
+ } else {
744
+ throw new Error(`Failed to send email via ${provider}. Make sure you're logged in.`);
745
+ }
746
+ break;
747
+ }
748
+
749
+ case 'google_sheets': {
750
+ // Use Playwright for Google Sheets
751
+ // Format: google_sheets:command|arg1|arg2...
752
+ const [sheetCmd, ...sheetArgs] = params.split('|');
753
+
754
+ switch (sheetCmd.toLowerCase()) {
755
+ case 'new': {
756
+ const sheetName = sheetArgs[0] || 'Untitled spreadsheet';
757
+ await browser.navigateTo('https://docs.google.com/spreadsheets/create');
758
+ await sleep(5000);
759
+ step.result = `📊 Created Google Sheet: ${sheetName}`;
760
+ break;
761
+ }
762
+ case 'type': {
763
+ const cell = sheetArgs[0] || 'A1';
764
+ const cellValue = sheetArgs.slice(1).join('|');
765
+ const success = await browser.googleSheetsType([{ cell, value: cellValue }]);
766
+ step.result = success
767
+ ? `📊 Typed "${cellValue}" in cell ${cell}`
768
+ : `📊 Could not type in cell ${cell}`;
769
+ break;
770
+ }
771
+ case 'read': {
772
+ const screenshot = await browser.takeScreenshot();
773
+ const analysis = await chat([{
774
+ role: 'user',
775
+ content: 'Describe the contents of this Google Sheet. List visible data in the cells.'
776
+ }]);
777
+ step.result = `📊 Current sheet view:\n${analysis.content}`;
778
+ break;
779
+ }
780
+ default:
781
+ throw new Error(`Unknown google_sheets command: ${sheetCmd}`);
782
+ }
783
+ break;
784
+ }
785
+
786
+ case 'google_docs': {
787
+ // Use Playwright for Google Docs
788
+ // Format: google_docs:command|arg1|arg2...
789
+ const [docCmd, ...docArgs] = params.split('|');
790
+
791
+ switch (docCmd.toLowerCase()) {
792
+ case 'new': {
793
+ const docName = docArgs[0] || 'Untitled document';
794
+ const success = await browser.googleDocsType('');
795
+ step.result = success
796
+ ? `📄 Created Google Doc: ${docName}`
797
+ : `📄 Could not create Google Doc`;
798
+ break;
799
+ }
800
+ case 'type': {
801
+ const docText = docArgs.join('|');
802
+ const success = await browser.googleDocsType(docText);
803
+ step.result = success
804
+ ? `📄 Typed content in Google Doc`
805
+ : `📄 Could not type in Google Doc`;
806
+ break;
807
+ }
808
+ default:
809
+ throw new Error(`Unknown google_docs command: ${docCmd}`);
810
+ }
811
+ break;
812
+ }
591
813
 
592
- // Wait for AI to generate response
593
- await sleep(siteConfig.responseTime * 1000);
814
+ case 'research': {
815
+ // Use Playwright for multi-step research
816
+ const researchQuery = params;
594
817
 
595
- // Capture multiple screenshots by scrolling to get full response
596
- const extractedParts: string[] = [];
597
- const maxScrolls = 5; // Maximum number of scroll captures
818
+ // Use browser.research which handles search, clicking, gathering
819
+ const researchData = await browser.research(researchQuery, 3);
598
820
 
599
- for (let scrollIndex = 0; scrollIndex < maxScrolls; scrollIndex++) {
600
- // Capture current view
601
- const screenResult = await describeScreen();
821
+ // Format sources
822
+ const sourceSummaries = researchData.sources.map((s, i) =>
823
+ `Source ${i + 1}: ${s.title}\n${s.content.slice(0, 500)}...`
824
+ ).join('\n\n');
602
825
 
603
- // Ask AI to extract just the response text from what it sees
604
- const extractPrompt = `You are looking at screenshot ${scrollIndex + 1} of ${site}. The user asked: "${question}"
826
+ // Ask AI to synthesize
827
+ const synthesis = await chat([{
828
+ role: 'user',
829
+ content: `Based on the following research gathered about "${researchQuery}", provide a comprehensive summary:
605
830
 
606
- Extract ONLY the AI's response/answer text visible on screen. Do NOT include:
607
- - The user's question
608
- - Any UI elements, buttons, navigation, or headers
609
- - Any disclaimers, suggestions, or "related questions"
610
- - Any "Sources" or citation links
611
- - Any text you already extracted (avoid duplicates)
831
+ ${sourceSummaries}
612
832
 
613
- ${scrollIndex > 0 ? `Previous parts already extracted:\n${extractedParts.join('\n---\n')}\n\nOnly extract NEW text that continues from where we left off.` : ''}
833
+ Create a well-organized summary with:
834
+ 1. Key findings
835
+ 2. Important details
836
+ 3. Any notable facts or statistics
837
+ 4. Conclusion
614
838
 
615
- Just give me the actual answer text, word for word as it appears. If there's no more response text visible, respond with exactly: "END_OF_RESPONSE"`;
839
+ Be thorough but concise.`
840
+ }]);
616
841
 
617
- const extractResponse = await chat([{ role: 'user', content: extractPrompt }]);
618
- const extracted = extractResponse.content.trim();
842
+ step.result = `🔬 Research Summary: ${researchQuery}\n\n${synthesis.content}`;
843
+ break;
844
+ }
619
845
 
620
- // Check if we've reached the end
621
- if (extracted === 'END_OF_RESPONSE' || extracted.includes('END_OF_RESPONSE')) {
622
- break;
846
+ case 'ask_llm': {
847
+ // Use Playwright to ask another LLM for help with a screenshot
848
+ // Format: ask_llm:llm_name|question
849
+ const [llmName, ...questionParts] = params.split('|');
850
+ const question = questionParts.join('|');
851
+
852
+ // Take screenshot first to describe current context
853
+ const currentScreen = await describeScreen();
854
+
855
+ // Compose the question with screen context
856
+ const fullQuestion = `I'm looking at my screen and I need help. ${question}\n\nHere's what I see on my screen: ${currentScreen.description}`;
857
+
858
+ // Supported LLMs
859
+ const supportedLLMs = ['perplexity', 'chatgpt', 'claude', 'copilot'];
860
+ const llmLower = llmName.toLowerCase();
861
+
862
+ if (!supportedLLMs.includes(llmLower)) {
863
+ throw new Error(`Unknown LLM: ${llmName}. Supported: ${supportedLLMs.join(', ')}`);
864
+ }
865
+
866
+ // Use Playwright's AI chat helper
867
+ const result = await browser.askAI(llmLower as any, fullQuestion, false);
868
+
869
+ // Get full response if needed
870
+ const fullParts = await browser.getFullAIResponse(llmLower as any, 3);
871
+ const finalResponse = fullParts.length > 0 ? fullParts.join('\n\n') : result.response;
872
+
873
+ step.result = `🤖 ${llmName} says:\n\n${finalResponse}`;
874
+ break;
875
+ }
876
+
877
+ case 'learn_ui': {
878
+ // Take screenshot and analyze the UI to learn how to interact
879
+ const uiScreen = await describeScreen();
880
+
881
+ const uiAnalysis = await chat([{
882
+ role: 'user',
883
+ content: `Analyze this screenshot and identify all interactive UI elements. List:
884
+ 1. All clickable buttons and their likely functions
885
+ 2. Text input fields
886
+ 3. Menus and dropdowns
887
+ 4. Links
888
+ 5. Any keyboard shortcuts visible
889
+ 6. The main actions available in this interface
890
+
891
+ Question: ${params}
892
+
893
+ Be specific about locations (top-left, center, etc.) and what each element does.`
894
+ }]);
895
+
896
+ step.result = `🔍 UI Analysis:\n\n${uiAnalysis.content}`;
897
+ break;
898
+ }
899
+
900
+ case 'adaptive_do': {
901
+ // Adaptive agent using Playwright: try to accomplish something, ask LLMs if stuck
902
+ const goal = params;
903
+ const maxAttempts = 5;
904
+ const actionHistory: string[] = [];
905
+ let accomplished = false;
906
+
907
+ // Initialize browser
908
+ const page = await browser.getPage();
909
+
910
+ for (let attempt = 0; attempt < maxAttempts && !accomplished; attempt++) {
911
+ // Take screenshot and analyze current state
912
+ const screenshot = await browser.takeScreenshot();
913
+ const currentState = await chat([{
914
+ role: 'user',
915
+ content: `Describe what you see on this screen. What app/website is it? What elements are visible?`
916
+ }]);
917
+
918
+ // Ask our AI what to do next
919
+ const nextAction = await chat([{
920
+ role: 'user',
921
+ content: `GOAL: ${goal}
922
+
923
+ CURRENT SCREEN: ${currentState.content}
924
+
925
+ PREVIOUS ACTIONS TAKEN:
926
+ ${actionHistory.length > 0 ? actionHistory.join('\n') : 'None yet'}
927
+
928
+ Based on what you see, what's the SINGLE next action to take?
929
+ Options:
930
+ - click: Click element (describe CSS selector or visible text)
931
+ - type: Type something (specify selector and text)
932
+ - press: Press a key (specify key)
933
+ - scroll: Scroll up/down
934
+ - navigate: Go to URL
935
+ - done: Goal is accomplished
936
+ - stuck: Can't figure out what to do
937
+
938
+ Respond in format:
939
+ ACTION: <action_type>
940
+ SELECTOR: <css selector or text to find>
941
+ VALUE: <text to type or URL>
942
+ REASONING: <why>`
943
+ }]);
944
+
945
+ const actionContent = nextAction.content;
946
+
947
+ // Parse the action
948
+ const actionMatch = actionContent.match(/ACTION:\s*(\w+)/i);
949
+ const selectorMatch = actionContent.match(/SELECTOR:\s*(.+?)(?:\n|$)/i);
950
+ const valueMatch = actionContent.match(/VALUE:\s*(.+?)(?:\n|$)/i);
951
+
952
+ if (!actionMatch) {
953
+ actionHistory.push(`Attempt ${attempt + 1}: Couldn't parse action`);
954
+ continue;
623
955
  }
624
956
 
625
- // Check for "no response" indicators
626
- if (extracted.toLowerCase().includes('response not ready') ||
627
- extracted.toLowerCase().includes('no response visible') ||
628
- extracted.toLowerCase().includes('no additional text')) {
629
- if (scrollIndex === 0) {
630
- extractedParts.push('Response not ready yet or page still loading.');
631
- }
957
+ const action = actionMatch[1].toLowerCase();
958
+ const selector = selectorMatch?.[1]?.trim() || '';
959
+ const value = valueMatch?.[1]?.trim() || '';
960
+
961
+ if (action === 'done') {
962
+ accomplished = true;
963
+ actionHistory.push(`Attempt ${attempt + 1}: Goal accomplished!`);
632
964
  break;
633
965
  }
634
966
 
635
- extractedParts.push(extracted);
967
+ if (action === 'stuck') {
968
+ // Ask Perplexity for help using Playwright
969
+ actionHistory.push(`Attempt ${attempt + 1}: Got stuck, asking Perplexity for help...`);
970
+
971
+ const helpRequest = `I'm trying to: ${goal}\n\nI'm stuck. What should I do next? Be specific about what to click or type.`;
972
+ const advice = await browser.askAI('perplexity', helpRequest, false);
973
+ actionHistory.push(`Got advice: ${advice.response.slice(0, 200)}...`);
974
+
975
+ // Navigate back to continue
976
+ await browser.navigateTo(page.url());
977
+ continue;
978
+ }
979
+
980
+ // Execute the action using Playwright
981
+ try {
982
+ switch (action) {
983
+ case 'click':
984
+ // Try to click by selector or text
985
+ if (selector) {
986
+ const clicked = await browser.clickElement(selector);
987
+ if (!clicked) {
988
+ // Try by text
989
+ await page.getByText(selector).first().click({ timeout: 5000 });
990
+ }
991
+ }
992
+ actionHistory.push(`Attempt ${attempt + 1}: Clicked "${selector}"`);
993
+ break;
994
+ case 'type':
995
+ if (selector && value) {
996
+ const typed = await browser.typeInElement(selector, value);
997
+ if (!typed) {
998
+ await page.getByPlaceholder(selector).first().fill(value);
999
+ }
1000
+ }
1001
+ actionHistory.push(`Attempt ${attempt + 1}: Typed "${value}" in "${selector}"`);
1002
+ break;
1003
+ case 'press':
1004
+ await browser.pressKey(value || selector);
1005
+ actionHistory.push(`Attempt ${attempt + 1}: Pressed ${value || selector}`);
1006
+ break;
1007
+ case 'scroll':
1008
+ await browser.scroll(value.toLowerCase().includes('up') ? 'up' : 'down');
1009
+ actionHistory.push(`Attempt ${attempt + 1}: Scrolled ${value || 'down'}`);
1010
+ break;
1011
+ case 'navigate':
1012
+ const url = value.startsWith('http') ? value : `https://${value}`;
1013
+ await browser.navigateTo(url);
1014
+ actionHistory.push(`Attempt ${attempt + 1}: Navigated to ${url}`);
1015
+ break;
1016
+ default:
1017
+ actionHistory.push(`Attempt ${attempt + 1}: Unknown action ${action}`);
1018
+ }
1019
+ } catch (e) {
1020
+ actionHistory.push(`Attempt ${attempt + 1}: Action failed - ${e}`);
1021
+ }
636
1022
 
637
- // Scroll down to see more content
638
- await computer.scrollMouse(-5); // Scroll down
639
- await sleep(1000); // Wait for scroll animation
1023
+ await sleep(2000); // Wait for UI to update
640
1024
  }
641
1025
 
642
- // Combine all extracted parts
643
- const fullResponse = extractedParts.join('\n\n');
644
- step.result = `📝 ${site.charAt(0).toUpperCase() + site.slice(1)} says:\n\n${fullResponse}`;
1026
+ step.result = `🎯 Adaptive Agent Result:\n\nGoal: ${goal}\nAccomplished: ${accomplished ? 'Yes ✅' : 'Partial/No ❌'}\n\nAction Log:\n${actionHistory.join('\n')}`;
645
1027
  break;
646
1028
  }
647
1029
 
648
- case 'screenshot':
649
- const vision = await describeScreen();
650
- step.result = vision.description;
651
- break;
652
-
653
1030
  case 'chat':
654
1031
  // This is a fallback - just describe what user wants
655
1032
  step.result = `Task noted: ${params}`;