@projectservan8n/cnapse 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +380 -14
- package/package.json +1 -1
- package/src/lib/tasks.ts +463 -16
package/dist/index.js
CHANGED
|
@@ -2380,7 +2380,21 @@ Before outputting steps, THINK through these questions:
|
|
|
2380
2380
|
### Web Browsing
|
|
2381
2381
|
- open_url: Open URL in default browser (e.g., "open_url:https://perplexity.ai")
|
|
2382
2382
|
- browse_and_ask: Open AI website, type question, wait for response (e.g., "browse_and_ask:perplexity|What is the capital of France?")
|
|
2383
|
-
- browse_and_ask: Supports: perplexity, chatgpt, claude, google
|
|
2383
|
+
- browse_and_ask: Supports: perplexity, chatgpt, claude, google, copilot, bard
|
|
2384
|
+
- web_search: Search Google and extract results (e.g., "web_search:best restaurants in NYC")
|
|
2385
|
+
|
|
2386
|
+
### Email
|
|
2387
|
+
- send_email: Send email via Gmail or Outlook web (e.g., "send_email:gmail|to@email.com|Subject|Body text here")
|
|
2388
|
+
- send_email: Supports: gmail, outlook
|
|
2389
|
+
|
|
2390
|
+
### Google Apps (via browser)
|
|
2391
|
+
- google_sheets: Interact with Google Sheets (e.g., "google_sheets:new|My Spreadsheet" or "google_sheets:type|A1|Hello World")
|
|
2392
|
+
- google_sheets: Commands: new (create), open (open existing), type (type in cell), read (screenshot current view)
|
|
2393
|
+
- google_docs: Interact with Google Docs (e.g., "google_docs:new|My Document" or "google_docs:type|Hello World")
|
|
2394
|
+
- google_docs: Commands: new (create), open (open existing), type (type text)
|
|
2395
|
+
|
|
2396
|
+
### Research
|
|
2397
|
+
- research: Multi-step web research - searches, gathers info, summarizes (e.g., "research:What are the latest AI trends in 2024?")
|
|
2384
2398
|
|
|
2385
2399
|
### Utility
|
|
2386
2400
|
- wait: Wait N seconds (e.g., "wait:2" - use 1-3s for app loads)
|
|
@@ -2471,14 +2485,71 @@ Output:
|
|
|
2471
2485
|
### Example 7: "search google for weather today"
|
|
2472
2486
|
Thinking:
|
|
2473
2487
|
- Goal: Open Google and search for something
|
|
2474
|
-
- How: Use
|
|
2475
|
-
- Sequence:
|
|
2488
|
+
- How: Use web_search for quick results extraction
|
|
2489
|
+
- Sequence: Search and get results
|
|
2490
|
+
|
|
2491
|
+
Output:
|
|
2492
|
+
[
|
|
2493
|
+
{ "description": "Search Google for weather", "action": "web_search:weather today" }
|
|
2494
|
+
]
|
|
2495
|
+
|
|
2496
|
+
### Example 8: "send an email to john@example.com about the meeting tomorrow"
|
|
2497
|
+
Thinking:
|
|
2498
|
+
- Goal: Compose and send an email via Gmail
|
|
2499
|
+
- How: Use send_email with gmail, recipient, subject, body
|
|
2500
|
+
- Sequence: Open Gmail, compose, fill fields, send
|
|
2501
|
+
|
|
2502
|
+
Output:
|
|
2503
|
+
[
|
|
2504
|
+
{ "description": "Send email via Gmail", "action": "send_email:gmail|john@example.com|Meeting Tomorrow|Hi John, this is a reminder about our meeting tomorrow. Please let me know if you have any questions." }
|
|
2505
|
+
]
|
|
2506
|
+
|
|
2507
|
+
### Example 9: "create a new google sheet called Sales Report and add headers"
|
|
2508
|
+
Thinking:
|
|
2509
|
+
- Goal: Create a new Google Sheet and add content
|
|
2510
|
+
- How: Use google_sheets to create new, then type in cells
|
|
2511
|
+
- Sequence: Create sheet -> Navigate to cells -> Type headers
|
|
2476
2512
|
|
|
2477
2513
|
Output:
|
|
2478
2514
|
[
|
|
2479
|
-
{ "description": "
|
|
2480
|
-
{ "description": "Wait for
|
|
2481
|
-
{ "description": "
|
|
2515
|
+
{ "description": "Create new Google Sheet", "action": "google_sheets:new|Sales Report" },
|
|
2516
|
+
{ "description": "Wait for sheet to load", "action": "wait:3" },
|
|
2517
|
+
{ "description": "Type header in A1", "action": "google_sheets:type|A1|Product" },
|
|
2518
|
+
{ "description": "Type header in B1", "action": "google_sheets:type|B1|Quantity" },
|
|
2519
|
+
{ "description": "Type header in C1", "action": "google_sheets:type|C1|Price" }
|
|
2520
|
+
]
|
|
2521
|
+
|
|
2522
|
+
### Example 10: "research the latest news about AI regulations"
|
|
2523
|
+
Thinking:
|
|
2524
|
+
- Goal: Do multi-step research on a topic
|
|
2525
|
+
- How: Use research action which handles searching, gathering, summarizing
|
|
2526
|
+
- Sequence: Single research action does it all
|
|
2527
|
+
|
|
2528
|
+
Output:
|
|
2529
|
+
[
|
|
2530
|
+
{ "description": "Research AI regulations news", "action": "research:latest news about AI regulations 2024" }
|
|
2531
|
+
]
|
|
2532
|
+
|
|
2533
|
+
### Example 11: "write a document in google docs about project status"
|
|
2534
|
+
Thinking:
|
|
2535
|
+
- Goal: Create a Google Doc and write content
|
|
2536
|
+
- How: Use google_docs to create and type
|
|
2537
|
+
- Sequence: Create doc -> Type content
|
|
2538
|
+
|
|
2539
|
+
Output:
|
|
2540
|
+
[
|
|
2541
|
+
{ "description": "Create new Google Doc", "action": "google_docs:new|Project Status Report" },
|
|
2542
|
+
{ "description": "Wait for doc to load", "action": "wait:3" },
|
|
2543
|
+
{ "description": "Type the content", "action": "google_docs:type|Project Status Report
|
|
2544
|
+
|
|
2545
|
+
Date: Today
|
|
2546
|
+
|
|
2547
|
+
Summary:
|
|
2548
|
+
The project is on track. All milestones have been met.
|
|
2549
|
+
|
|
2550
|
+
Next Steps:
|
|
2551
|
+
- Complete testing
|
|
2552
|
+
- Deploy to production" }
|
|
2482
2553
|
]
|
|
2483
2554
|
|
|
2484
2555
|
## YOUR TASK
|
|
@@ -2671,13 +2742,15 @@ ${existingResult.output}`;
|
|
|
2671
2742
|
const [site, ...questionParts] = params.split("|");
|
|
2672
2743
|
const question = questionParts.join("|");
|
|
2673
2744
|
const sites = {
|
|
2674
|
-
perplexity: { url: "https://www.perplexity.ai",
|
|
2675
|
-
chatgpt: { url: "https://chat.openai.com",
|
|
2676
|
-
claude: { url: "https://claude.ai",
|
|
2677
|
-
google: { url: "https://www.google.com",
|
|
2678
|
-
bing: { url: "https://www.bing.com",
|
|
2745
|
+
perplexity: { url: "https://www.perplexity.ai", loadTime: 3, responseTime: 10 },
|
|
2746
|
+
chatgpt: { url: "https://chat.openai.com", loadTime: 4, responseTime: 15 },
|
|
2747
|
+
claude: { url: "https://claude.ai", loadTime: 4, responseTime: 15 },
|
|
2748
|
+
google: { url: "https://www.google.com", loadTime: 2, responseTime: 3 },
|
|
2749
|
+
bing: { url: "https://www.bing.com", loadTime: 2, responseTime: 3 },
|
|
2750
|
+
bard: { url: "https://bard.google.com", loadTime: 3, responseTime: 12 },
|
|
2751
|
+
copilot: { url: "https://copilot.microsoft.com", loadTime: 3, responseTime: 12 }
|
|
2679
2752
|
};
|
|
2680
|
-
const siteConfig = sites[site.toLowerCase()] || { url: `https://${site}`,
|
|
2753
|
+
const siteConfig = sites[site.toLowerCase()] || { url: `https://${site}`, loadTime: 3, responseTime: 10 };
|
|
2681
2754
|
if (process.platform === "win32") {
|
|
2682
2755
|
await runCommand(`start "" "${siteConfig.url}"`, 5e3);
|
|
2683
2756
|
} else if (process.platform === "darwin") {
|
|
@@ -2685,17 +2758,310 @@ ${existingResult.output}`;
|
|
|
2685
2758
|
} else {
|
|
2686
2759
|
await runCommand(`xdg-open "${siteConfig.url}"`, 5e3);
|
|
2687
2760
|
}
|
|
2688
|
-
await sleep(siteConfig.
|
|
2761
|
+
await sleep(siteConfig.loadTime * 1e3);
|
|
2689
2762
|
await typeText(question);
|
|
2690
2763
|
await sleep(300);
|
|
2691
2764
|
await pressKey("Return");
|
|
2692
|
-
|
|
2765
|
+
await sleep(siteConfig.responseTime * 1e3);
|
|
2766
|
+
const extractedParts = [];
|
|
2767
|
+
const maxScrolls = 5;
|
|
2768
|
+
for (let scrollIndex = 0; scrollIndex < maxScrolls; scrollIndex++) {
|
|
2769
|
+
const screenResult = await describeScreen();
|
|
2770
|
+
const extractPrompt = `You are looking at screenshot ${scrollIndex + 1} of ${site}. The user asked: "${question}"
|
|
2771
|
+
|
|
2772
|
+
Extract ONLY the AI's response/answer text visible on screen. Do NOT include:
|
|
2773
|
+
- The user's question
|
|
2774
|
+
- Any UI elements, buttons, navigation, or headers
|
|
2775
|
+
- Any disclaimers, suggestions, or "related questions"
|
|
2776
|
+
- Any "Sources" or citation links
|
|
2777
|
+
- Any text you already extracted (avoid duplicates)
|
|
2778
|
+
|
|
2779
|
+
${scrollIndex > 0 ? `Previous parts already extracted:
|
|
2780
|
+
${extractedParts.join("\n---\n")}
|
|
2781
|
+
|
|
2782
|
+
Only extract NEW text that continues from where we left off.` : ""}
|
|
2783
|
+
|
|
2784
|
+
Just give me the actual answer text, word for word as it appears. If there's no more response text visible, respond with exactly: "END_OF_RESPONSE"`;
|
|
2785
|
+
const extractResponse = await chat([{ role: "user", content: extractPrompt }]);
|
|
2786
|
+
const extracted = extractResponse.content.trim();
|
|
2787
|
+
if (extracted === "END_OF_RESPONSE" || extracted.includes("END_OF_RESPONSE")) {
|
|
2788
|
+
break;
|
|
2789
|
+
}
|
|
2790
|
+
if (extracted.toLowerCase().includes("response not ready") || extracted.toLowerCase().includes("no response visible") || extracted.toLowerCase().includes("no additional text")) {
|
|
2791
|
+
if (scrollIndex === 0) {
|
|
2792
|
+
extractedParts.push("Response not ready yet or page still loading.");
|
|
2793
|
+
}
|
|
2794
|
+
break;
|
|
2795
|
+
}
|
|
2796
|
+
extractedParts.push(extracted);
|
|
2797
|
+
await scrollMouse(-5);
|
|
2798
|
+
await sleep(1e3);
|
|
2799
|
+
}
|
|
2800
|
+
const fullResponse = extractedParts.join("\n\n");
|
|
2801
|
+
step.result = `\u{1F4DD} ${site.charAt(0).toUpperCase() + site.slice(1)} says:
|
|
2802
|
+
|
|
2803
|
+
${fullResponse}`;
|
|
2693
2804
|
break;
|
|
2694
2805
|
}
|
|
2695
2806
|
case "screenshot":
|
|
2696
2807
|
const vision = await describeScreen();
|
|
2697
2808
|
step.result = vision.description;
|
|
2698
2809
|
break;
|
|
2810
|
+
case "web_search": {
|
|
2811
|
+
await keyCombo(["meta", "r"]);
|
|
2812
|
+
await sleep(500);
|
|
2813
|
+
await typeText("chrome");
|
|
2814
|
+
await pressKey("Return");
|
|
2815
|
+
await sleep(2e3);
|
|
2816
|
+
await keyCombo(["control", "l"]);
|
|
2817
|
+
await sleep(300);
|
|
2818
|
+
await typeText("google.com");
|
|
2819
|
+
await pressKey("Return");
|
|
2820
|
+
await sleep(2e3);
|
|
2821
|
+
await typeText(params);
|
|
2822
|
+
await sleep(300);
|
|
2823
|
+
await pressKey("Return");
|
|
2824
|
+
await sleep(3e3);
|
|
2825
|
+
const searchScreen = await describeScreen();
|
|
2826
|
+
const searchExtract = await chat([{
|
|
2827
|
+
role: "user",
|
|
2828
|
+
content: `Extract the top search results from this Google search page. For each result, include:
|
|
2829
|
+
- Title
|
|
2830
|
+
- Brief snippet/description
|
|
2831
|
+
- URL if visible
|
|
2832
|
+
|
|
2833
|
+
Format as a numbered list. Be concise.`
|
|
2834
|
+
}]);
|
|
2835
|
+
step.result = `\u{1F50D} Search results for "${params}":
|
|
2836
|
+
|
|
2837
|
+
${searchExtract.content}`;
|
|
2838
|
+
break;
|
|
2839
|
+
}
|
|
2840
|
+
case "send_email": {
|
|
2841
|
+
const [provider, to, subject, ...bodyParts] = params.split("|");
|
|
2842
|
+
const body = bodyParts.join("|");
|
|
2843
|
+
await keyCombo(["meta", "r"]);
|
|
2844
|
+
await sleep(500);
|
|
2845
|
+
await typeText("chrome");
|
|
2846
|
+
await pressKey("Return");
|
|
2847
|
+
await sleep(2e3);
|
|
2848
|
+
await keyCombo(["control", "l"]);
|
|
2849
|
+
await sleep(300);
|
|
2850
|
+
if (provider.toLowerCase() === "gmail") {
|
|
2851
|
+
await typeText("mail.google.com");
|
|
2852
|
+
await pressKey("Return");
|
|
2853
|
+
await sleep(4e3);
|
|
2854
|
+
await typeText("c");
|
|
2855
|
+
await sleep(2e3);
|
|
2856
|
+
await typeText(to);
|
|
2857
|
+
await sleep(300);
|
|
2858
|
+
await pressKey("Tab");
|
|
2859
|
+
await sleep(200);
|
|
2860
|
+
await typeText(subject);
|
|
2861
|
+
await sleep(300);
|
|
2862
|
+
await pressKey("Tab");
|
|
2863
|
+
await sleep(200);
|
|
2864
|
+
await typeText(body);
|
|
2865
|
+
await sleep(500);
|
|
2866
|
+
await keyCombo(["control", "Return"]);
|
|
2867
|
+
} else if (provider.toLowerCase() === "outlook") {
|
|
2868
|
+
await typeText("outlook.live.com");
|
|
2869
|
+
await pressKey("Return");
|
|
2870
|
+
await sleep(4e3);
|
|
2871
|
+
await typeText("n");
|
|
2872
|
+
await sleep(2e3);
|
|
2873
|
+
await typeText(to);
|
|
2874
|
+
await sleep(300);
|
|
2875
|
+
await pressKey("Tab");
|
|
2876
|
+
await sleep(200);
|
|
2877
|
+
await typeText(subject);
|
|
2878
|
+
await sleep(300);
|
|
2879
|
+
await pressKey("Tab");
|
|
2880
|
+
await sleep(200);
|
|
2881
|
+
await typeText(body);
|
|
2882
|
+
await sleep(500);
|
|
2883
|
+
await keyCombo(["control", "Return"]);
|
|
2884
|
+
} else {
|
|
2885
|
+
throw new Error(`Unsupported email provider: ${provider}. Use gmail or outlook.`);
|
|
2886
|
+
}
|
|
2887
|
+
await sleep(2e3);
|
|
2888
|
+
step.result = `\u{1F4E7} Email sent via ${provider} to ${to}`;
|
|
2889
|
+
break;
|
|
2890
|
+
}
|
|
2891
|
+
case "google_sheets": {
|
|
2892
|
+
const [sheetCmd, ...sheetArgs] = params.split("|");
|
|
2893
|
+
switch (sheetCmd.toLowerCase()) {
|
|
2894
|
+
case "new": {
|
|
2895
|
+
const sheetName = sheetArgs[0] || "Untitled spreadsheet";
|
|
2896
|
+
await keyCombo(["meta", "r"]);
|
|
2897
|
+
await sleep(500);
|
|
2898
|
+
await typeText("chrome");
|
|
2899
|
+
await pressKey("Return");
|
|
2900
|
+
await sleep(2e3);
|
|
2901
|
+
await keyCombo(["control", "l"]);
|
|
2902
|
+
await sleep(300);
|
|
2903
|
+
await typeText("sheets.google.com");
|
|
2904
|
+
await pressKey("Return");
|
|
2905
|
+
await sleep(3e3);
|
|
2906
|
+
await pressKey("Tab");
|
|
2907
|
+
await pressKey("Tab");
|
|
2908
|
+
await pressKey("Return");
|
|
2909
|
+
await sleep(3e3);
|
|
2910
|
+
await keyCombo(["alt", "f"]);
|
|
2911
|
+
await sleep(500);
|
|
2912
|
+
await typeText("r");
|
|
2913
|
+
await sleep(500);
|
|
2914
|
+
await keyCombo(["control", "a"]);
|
|
2915
|
+
await typeText(sheetName);
|
|
2916
|
+
await pressKey("Return");
|
|
2917
|
+
await sleep(500);
|
|
2918
|
+
await pressKey("Escape");
|
|
2919
|
+
step.result = `\u{1F4CA} Created Google Sheet: ${sheetName}`;
|
|
2920
|
+
break;
|
|
2921
|
+
}
|
|
2922
|
+
case "type": {
|
|
2923
|
+
const cell = sheetArgs[0] || "A1";
|
|
2924
|
+
const cellValue = sheetArgs.slice(1).join("|");
|
|
2925
|
+
await keyCombo(["control", "g"]);
|
|
2926
|
+
await sleep(500);
|
|
2927
|
+
await typeText(cell);
|
|
2928
|
+
await pressKey("Return");
|
|
2929
|
+
await sleep(300);
|
|
2930
|
+
await typeText(cellValue);
|
|
2931
|
+
await pressKey("Return");
|
|
2932
|
+
await sleep(200);
|
|
2933
|
+
step.result = `\u{1F4CA} Typed "${cellValue}" in cell ${cell}`;
|
|
2934
|
+
break;
|
|
2935
|
+
}
|
|
2936
|
+
case "read": {
|
|
2937
|
+
const readScreen = await describeScreen();
|
|
2938
|
+
step.result = `\u{1F4CA} Current sheet view:
|
|
2939
|
+
${readScreen.description}`;
|
|
2940
|
+
break;
|
|
2941
|
+
}
|
|
2942
|
+
default:
|
|
2943
|
+
throw new Error(`Unknown google_sheets command: ${sheetCmd}`);
|
|
2944
|
+
}
|
|
2945
|
+
break;
|
|
2946
|
+
}
|
|
2947
|
+
case "google_docs": {
|
|
2948
|
+
const [docCmd, ...docArgs] = params.split("|");
|
|
2949
|
+
switch (docCmd.toLowerCase()) {
|
|
2950
|
+
case "new": {
|
|
2951
|
+
const docName = docArgs[0] || "Untitled document";
|
|
2952
|
+
await keyCombo(["meta", "r"]);
|
|
2953
|
+
await sleep(500);
|
|
2954
|
+
await typeText("chrome");
|
|
2955
|
+
await pressKey("Return");
|
|
2956
|
+
await sleep(2e3);
|
|
2957
|
+
await keyCombo(["control", "l"]);
|
|
2958
|
+
await sleep(300);
|
|
2959
|
+
await typeText("docs.google.com");
|
|
2960
|
+
await pressKey("Return");
|
|
2961
|
+
await sleep(3e3);
|
|
2962
|
+
await pressKey("Tab");
|
|
2963
|
+
await pressKey("Tab");
|
|
2964
|
+
await pressKey("Return");
|
|
2965
|
+
await sleep(3e3);
|
|
2966
|
+
await keyCombo(["alt", "f"]);
|
|
2967
|
+
await sleep(500);
|
|
2968
|
+
await typeText("r");
|
|
2969
|
+
await sleep(500);
|
|
2970
|
+
await keyCombo(["control", "a"]);
|
|
2971
|
+
await typeText(docName);
|
|
2972
|
+
await pressKey("Return");
|
|
2973
|
+
await sleep(500);
|
|
2974
|
+
await pressKey("Escape");
|
|
2975
|
+
step.result = `\u{1F4C4} Created Google Doc: ${docName}`;
|
|
2976
|
+
break;
|
|
2977
|
+
}
|
|
2978
|
+
case "type": {
|
|
2979
|
+
const docText = docArgs.join("|");
|
|
2980
|
+
await typeText(docText);
|
|
2981
|
+
step.result = `\u{1F4C4} Typed content in Google Doc`;
|
|
2982
|
+
break;
|
|
2983
|
+
}
|
|
2984
|
+
default:
|
|
2985
|
+
throw new Error(`Unknown google_docs command: ${docCmd}`);
|
|
2986
|
+
}
|
|
2987
|
+
break;
|
|
2988
|
+
}
|
|
2989
|
+
case "research": {
|
|
2990
|
+
const researchQuery = params;
|
|
2991
|
+
const researchResults = [];
|
|
2992
|
+
await keyCombo(["meta", "r"]);
|
|
2993
|
+
await sleep(500);
|
|
2994
|
+
await typeText("chrome");
|
|
2995
|
+
await pressKey("Return");
|
|
2996
|
+
await sleep(2e3);
|
|
2997
|
+
await keyCombo(["control", "l"]);
|
|
2998
|
+
await sleep(300);
|
|
2999
|
+
await typeText("google.com");
|
|
3000
|
+
await pressKey("Return");
|
|
3001
|
+
await sleep(2e3);
|
|
3002
|
+
await typeText(researchQuery);
|
|
3003
|
+
await pressKey("Return");
|
|
3004
|
+
await sleep(3e3);
|
|
3005
|
+
let searchScreen = await describeScreen();
|
|
3006
|
+
const initialResults = await chat([{
|
|
3007
|
+
role: "user",
|
|
3008
|
+
content: `Extract the key information from these Google search results about: "${researchQuery}"
|
|
3009
|
+
Include any relevant facts, numbers, dates, or key points visible. Be thorough but concise.`
|
|
3010
|
+
}]);
|
|
3011
|
+
researchResults.push(`Search Results:
|
|
3012
|
+
${initialResults.content}`);
|
|
3013
|
+
await pressKey("Tab");
|
|
3014
|
+
await sleep(200);
|
|
3015
|
+
await pressKey("Tab");
|
|
3016
|
+
await sleep(200);
|
|
3017
|
+
await pressKey("Return");
|
|
3018
|
+
await sleep(4e3);
|
|
3019
|
+
searchScreen = await describeScreen();
|
|
3020
|
+
const pageContent = await chat([{
|
|
3021
|
+
role: "user",
|
|
3022
|
+
content: `Extract the main content and key information from this webpage about: "${researchQuery}"
|
|
3023
|
+
Ignore ads, navigation, footers. Focus on the actual article/content.`
|
|
3024
|
+
}]);
|
|
3025
|
+
researchResults.push(`
|
|
3026
|
+
Source 1 Content:
|
|
3027
|
+
${pageContent.content}`);
|
|
3028
|
+
await keyCombo(["alt", "Left"]);
|
|
3029
|
+
await sleep(2e3);
|
|
3030
|
+
await scrollMouse(-3);
|
|
3031
|
+
await sleep(500);
|
|
3032
|
+
await pressKey("Tab");
|
|
3033
|
+
await pressKey("Tab");
|
|
3034
|
+
await pressKey("Tab");
|
|
3035
|
+
await pressKey("Return");
|
|
3036
|
+
await sleep(4e3);
|
|
3037
|
+
searchScreen = await describeScreen();
|
|
3038
|
+
const pageContent2 = await chat([{
|
|
3039
|
+
role: "user",
|
|
3040
|
+
content: `Extract additional information from this webpage about: "${researchQuery}"
|
|
3041
|
+
Look for details not covered in the previous source.`
|
|
3042
|
+
}]);
|
|
3043
|
+
researchResults.push(`
|
|
3044
|
+
Source 2 Content:
|
|
3045
|
+
${pageContent2.content}`);
|
|
3046
|
+
const synthesis = await chat([{
|
|
3047
|
+
role: "user",
|
|
3048
|
+
content: `Based on the following research gathered about "${researchQuery}", provide a comprehensive summary:
|
|
3049
|
+
|
|
3050
|
+
${researchResults.join("\n\n")}
|
|
3051
|
+
|
|
3052
|
+
Create a well-organized summary with:
|
|
3053
|
+
1. Key findings
|
|
3054
|
+
2. Important details
|
|
3055
|
+
3. Any notable facts or statistics
|
|
3056
|
+
4. Conclusion
|
|
3057
|
+
|
|
3058
|
+
Be thorough but concise.`
|
|
3059
|
+
}]);
|
|
3060
|
+
step.result = `\u{1F52C} Research Summary: ${researchQuery}
|
|
3061
|
+
|
|
3062
|
+
${synthesis.content}`;
|
|
3063
|
+
break;
|
|
3064
|
+
}
|
|
2699
3065
|
case "chat":
|
|
2700
3066
|
step.result = `Task noted: ${params}`;
|
|
2701
3067
|
break;
|
package/package.json
CHANGED
package/src/lib/tasks.ts
CHANGED
|
@@ -211,7 +211,21 @@ Before outputting steps, THINK through these questions:
|
|
|
211
211
|
### Web Browsing
|
|
212
212
|
- open_url: Open URL in default browser (e.g., "open_url:https://perplexity.ai")
|
|
213
213
|
- browse_and_ask: Open AI website, type question, wait for response (e.g., "browse_and_ask:perplexity|What is the capital of France?")
|
|
214
|
-
- browse_and_ask: Supports: perplexity, chatgpt, claude, google
|
|
214
|
+
- browse_and_ask: Supports: perplexity, chatgpt, claude, google, copilot, bard
|
|
215
|
+
- web_search: Search Google and extract results (e.g., "web_search:best restaurants in NYC")
|
|
216
|
+
|
|
217
|
+
### Email
|
|
218
|
+
- send_email: Send email via Gmail or Outlook web (e.g., "send_email:gmail|to@email.com|Subject|Body text here")
|
|
219
|
+
- send_email: Supports: gmail, outlook
|
|
220
|
+
|
|
221
|
+
### Google Apps (via browser)
|
|
222
|
+
- google_sheets: Interact with Google Sheets (e.g., "google_sheets:new|My Spreadsheet" or "google_sheets:type|A1|Hello World")
|
|
223
|
+
- google_sheets: Commands: new (create), open (open existing), type (type in cell), read (screenshot current view)
|
|
224
|
+
- google_docs: Interact with Google Docs (e.g., "google_docs:new|My Document" or "google_docs:type|Hello World")
|
|
225
|
+
- google_docs: Commands: new (create), open (open existing), type (type text)
|
|
226
|
+
|
|
227
|
+
### Research
|
|
228
|
+
- research: Multi-step web research - searches, gathers info, summarizes (e.g., "research:What are the latest AI trends in 2024?")
|
|
215
229
|
|
|
216
230
|
### Utility
|
|
217
231
|
- wait: Wait N seconds (e.g., "wait:2" - use 1-3s for app loads)
|
|
@@ -302,14 +316,62 @@ Output:
|
|
|
302
316
|
### Example 7: "search google for weather today"
|
|
303
317
|
Thinking:
|
|
304
318
|
- Goal: Open Google and search for something
|
|
305
|
-
- How: Use
|
|
306
|
-
- Sequence:
|
|
319
|
+
- How: Use web_search for quick results extraction
|
|
320
|
+
- Sequence: Search and get results
|
|
321
|
+
|
|
322
|
+
Output:
|
|
323
|
+
[
|
|
324
|
+
{ "description": "Search Google for weather", "action": "web_search:weather today" }
|
|
325
|
+
]
|
|
326
|
+
|
|
327
|
+
### Example 8: "send an email to john@example.com about the meeting tomorrow"
|
|
328
|
+
Thinking:
|
|
329
|
+
- Goal: Compose and send an email via Gmail
|
|
330
|
+
- How: Use send_email with gmail, recipient, subject, body
|
|
331
|
+
- Sequence: Open Gmail, compose, fill fields, send
|
|
332
|
+
|
|
333
|
+
Output:
|
|
334
|
+
[
|
|
335
|
+
{ "description": "Send email via Gmail", "action": "send_email:gmail|john@example.com|Meeting Tomorrow|Hi John, this is a reminder about our meeting tomorrow. Please let me know if you have any questions." }
|
|
336
|
+
]
|
|
337
|
+
|
|
338
|
+
### Example 9: "create a new google sheet called Sales Report and add headers"
|
|
339
|
+
Thinking:
|
|
340
|
+
- Goal: Create a new Google Sheet and add content
|
|
341
|
+
- How: Use google_sheets to create new, then type in cells
|
|
342
|
+
- Sequence: Create sheet -> Navigate to cells -> Type headers
|
|
343
|
+
|
|
344
|
+
Output:
|
|
345
|
+
[
|
|
346
|
+
{ "description": "Create new Google Sheet", "action": "google_sheets:new|Sales Report" },
|
|
347
|
+
{ "description": "Wait for sheet to load", "action": "wait:3" },
|
|
348
|
+
{ "description": "Type header in A1", "action": "google_sheets:type|A1|Product" },
|
|
349
|
+
{ "description": "Type header in B1", "action": "google_sheets:type|B1|Quantity" },
|
|
350
|
+
{ "description": "Type header in C1", "action": "google_sheets:type|C1|Price" }
|
|
351
|
+
]
|
|
352
|
+
|
|
353
|
+
### Example 10: "research the latest news about AI regulations"
|
|
354
|
+
Thinking:
|
|
355
|
+
- Goal: Do multi-step research on a topic
|
|
356
|
+
- How: Use research action which handles searching, gathering, summarizing
|
|
357
|
+
- Sequence: Single research action does it all
|
|
358
|
+
|
|
359
|
+
Output:
|
|
360
|
+
[
|
|
361
|
+
{ "description": "Research AI regulations news", "action": "research:latest news about AI regulations 2024" }
|
|
362
|
+
]
|
|
363
|
+
|
|
364
|
+
### Example 11: "write a document in google docs about project status"
|
|
365
|
+
Thinking:
|
|
366
|
+
- Goal: Create a Google Doc and write content
|
|
367
|
+
- How: Use google_docs to create and type
|
|
368
|
+
- Sequence: Create doc -> Type content
|
|
307
369
|
|
|
308
370
|
Output:
|
|
309
371
|
[
|
|
310
|
-
{ "description": "
|
|
311
|
-
{ "description": "Wait for
|
|
312
|
-
{ "description": "
|
|
372
|
+
{ "description": "Create new Google Doc", "action": "google_docs:new|Project Status Report" },
|
|
373
|
+
{ "description": "Wait for doc to load", "action": "wait:3" },
|
|
374
|
+
{ "description": "Type the content", "action": "google_docs:type|Project Status Report\n\nDate: Today\n\nSummary:\nThe project is on track. All milestones have been met.\n\nNext Steps:\n- Complete testing\n- Deploy to production" }
|
|
313
375
|
]
|
|
314
376
|
|
|
315
377
|
## YOUR TASK
|
|
@@ -557,16 +619,18 @@ ${existingResult.output}`;
|
|
|
557
619
|
const [site, ...questionParts] = params.split('|');
|
|
558
620
|
const question = questionParts.join('|');
|
|
559
621
|
|
|
560
|
-
// Site-specific URLs and
|
|
561
|
-
const sites: Record<string, { url: string;
|
|
562
|
-
perplexity: { url: 'https://www.perplexity.ai',
|
|
563
|
-
chatgpt: { url: 'https://chat.openai.com',
|
|
564
|
-
claude: { url: 'https://claude.ai',
|
|
565
|
-
google: { url: 'https://www.google.com',
|
|
566
|
-
bing: { url: 'https://www.bing.com',
|
|
622
|
+
// Site-specific URLs and response wait times
|
|
623
|
+
const sites: Record<string, { url: string; loadTime: number; responseTime: number }> = {
|
|
624
|
+
perplexity: { url: 'https://www.perplexity.ai', loadTime: 3, responseTime: 10 },
|
|
625
|
+
chatgpt: { url: 'https://chat.openai.com', loadTime: 4, responseTime: 15 },
|
|
626
|
+
claude: { url: 'https://claude.ai', loadTime: 4, responseTime: 15 },
|
|
627
|
+
google: { url: 'https://www.google.com', loadTime: 2, responseTime: 3 },
|
|
628
|
+
bing: { url: 'https://www.bing.com', loadTime: 2, responseTime: 3 },
|
|
629
|
+
bard: { url: 'https://bard.google.com', loadTime: 3, responseTime: 12 },
|
|
630
|
+
copilot: { url: 'https://copilot.microsoft.com', loadTime: 3, responseTime: 12 },
|
|
567
631
|
};
|
|
568
632
|
|
|
569
|
-
const siteConfig = sites[site.toLowerCase()] || { url: `https://${site}`,
|
|
633
|
+
const siteConfig = sites[site.toLowerCase()] || { url: `https://${site}`, loadTime: 3, responseTime: 10 };
|
|
570
634
|
|
|
571
635
|
// Open the site
|
|
572
636
|
if (process.platform === 'win32') {
|
|
@@ -578,7 +642,7 @@ ${existingResult.output}`;
|
|
|
578
642
|
}
|
|
579
643
|
|
|
580
644
|
// Wait for page to load
|
|
581
|
-
await sleep(siteConfig.
|
|
645
|
+
await sleep(siteConfig.loadTime * 1000);
|
|
582
646
|
|
|
583
647
|
// Type the question (most sites have autofocus on search/input)
|
|
584
648
|
await computer.typeText(question);
|
|
@@ -587,7 +651,59 @@ ${existingResult.output}`;
|
|
|
587
651
|
// Press Enter to submit
|
|
588
652
|
await computer.pressKey('Return');
|
|
589
653
|
|
|
590
|
-
|
|
654
|
+
// Wait for AI to generate response
|
|
655
|
+
await sleep(siteConfig.responseTime * 1000);
|
|
656
|
+
|
|
657
|
+
// Capture multiple screenshots by scrolling to get full response
|
|
658
|
+
const extractedParts: string[] = [];
|
|
659
|
+
const maxScrolls = 5; // Maximum number of scroll captures
|
|
660
|
+
|
|
661
|
+
for (let scrollIndex = 0; scrollIndex < maxScrolls; scrollIndex++) {
|
|
662
|
+
// Capture current view
|
|
663
|
+
const screenResult = await describeScreen();
|
|
664
|
+
|
|
665
|
+
// Ask AI to extract just the response text from what it sees
|
|
666
|
+
const extractPrompt = `You are looking at screenshot ${scrollIndex + 1} of ${site}. The user asked: "${question}"
|
|
667
|
+
|
|
668
|
+
Extract ONLY the AI's response/answer text visible on screen. Do NOT include:
|
|
669
|
+
- The user's question
|
|
670
|
+
- Any UI elements, buttons, navigation, or headers
|
|
671
|
+
- Any disclaimers, suggestions, or "related questions"
|
|
672
|
+
- Any "Sources" or citation links
|
|
673
|
+
- Any text you already extracted (avoid duplicates)
|
|
674
|
+
|
|
675
|
+
${scrollIndex > 0 ? `Previous parts already extracted:\n${extractedParts.join('\n---\n')}\n\nOnly extract NEW text that continues from where we left off.` : ''}
|
|
676
|
+
|
|
677
|
+
Just give me the actual answer text, word for word as it appears. If there's no more response text visible, respond with exactly: "END_OF_RESPONSE"`;
|
|
678
|
+
|
|
679
|
+
const extractResponse = await chat([{ role: 'user', content: extractPrompt }]);
|
|
680
|
+
const extracted = extractResponse.content.trim();
|
|
681
|
+
|
|
682
|
+
// Check if we've reached the end
|
|
683
|
+
if (extracted === 'END_OF_RESPONSE' || extracted.includes('END_OF_RESPONSE')) {
|
|
684
|
+
break;
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
// Check for "no response" indicators
|
|
688
|
+
if (extracted.toLowerCase().includes('response not ready') ||
|
|
689
|
+
extracted.toLowerCase().includes('no response visible') ||
|
|
690
|
+
extracted.toLowerCase().includes('no additional text')) {
|
|
691
|
+
if (scrollIndex === 0) {
|
|
692
|
+
extractedParts.push('Response not ready yet or page still loading.');
|
|
693
|
+
}
|
|
694
|
+
break;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
extractedParts.push(extracted);
|
|
698
|
+
|
|
699
|
+
// Scroll down to see more content
|
|
700
|
+
await computer.scrollMouse(-5); // Scroll down
|
|
701
|
+
await sleep(1000); // Wait for scroll animation
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
// Combine all extracted parts
|
|
705
|
+
const fullResponse = extractedParts.join('\n\n');
|
|
706
|
+
step.result = `📝 ${site.charAt(0).toUpperCase() + site.slice(1)} says:\n\n${fullResponse}`;
|
|
591
707
|
break;
|
|
592
708
|
}
|
|
593
709
|
|
|
@@ -596,6 +712,337 @@ ${existingResult.output}`;
|
|
|
596
712
|
step.result = vision.description;
|
|
597
713
|
break;
|
|
598
714
|
|
|
715
|
+
case 'web_search': {
|
|
716
|
+
// Human-like Google search: open browser, go to google, type, search
|
|
717
|
+
// Open browser with Win+R -> chrome/edge or just open google.com
|
|
718
|
+
await computer.keyCombo(['meta', 'r']); // Win+R
|
|
719
|
+
await sleep(500);
|
|
720
|
+
await computer.typeText('chrome'); // Try Chrome first
|
|
721
|
+
await computer.pressKey('Return');
|
|
722
|
+
await sleep(2000);
|
|
723
|
+
|
|
724
|
+
// Go to Google (Ctrl+L to focus address bar)
|
|
725
|
+
await computer.keyCombo(['control', 'l']);
|
|
726
|
+
await sleep(300);
|
|
727
|
+
await computer.typeText('google.com');
|
|
728
|
+
await computer.pressKey('Return');
|
|
729
|
+
await sleep(2000);
|
|
730
|
+
|
|
731
|
+
// Type search query (Google search box should be focused)
|
|
732
|
+
await computer.typeText(params);
|
|
733
|
+
await sleep(300);
|
|
734
|
+
await computer.pressKey('Return');
|
|
735
|
+
await sleep(3000); // Wait for results
|
|
736
|
+
|
|
737
|
+
// Capture and extract search results
|
|
738
|
+
const searchScreen = await describeScreen();
|
|
739
|
+
const searchExtract = await chat([{
|
|
740
|
+
role: 'user',
|
|
741
|
+
content: `Extract the top search results from this Google search page. For each result, include:
|
|
742
|
+
- Title
|
|
743
|
+
- Brief snippet/description
|
|
744
|
+
- URL if visible
|
|
745
|
+
|
|
746
|
+
Format as a numbered list. Be concise.`
|
|
747
|
+
}]);
|
|
748
|
+
|
|
749
|
+
step.result = `🔍 Search results for "${params}":\n\n${searchExtract.content}`;
|
|
750
|
+
break;
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
case 'send_email': {
|
|
754
|
+
// Human-like email: open browser, navigate to Gmail/Outlook, compose
|
|
755
|
+
// Format: send_email:provider|to|subject|body
|
|
756
|
+
const [provider, to, subject, ...bodyParts] = params.split('|');
|
|
757
|
+
const body = bodyParts.join('|');
|
|
758
|
+
|
|
759
|
+
// Open browser
|
|
760
|
+
await computer.keyCombo(['meta', 'r']); // Win+R
|
|
761
|
+
await sleep(500);
|
|
762
|
+
await computer.typeText('chrome');
|
|
763
|
+
await computer.pressKey('Return');
|
|
764
|
+
await sleep(2000);
|
|
765
|
+
|
|
766
|
+
// Navigate to email service
|
|
767
|
+
await computer.keyCombo(['control', 'l']); // Focus address bar
|
|
768
|
+
await sleep(300);
|
|
769
|
+
|
|
770
|
+
if (provider.toLowerCase() === 'gmail') {
|
|
771
|
+
await computer.typeText('mail.google.com');
|
|
772
|
+
await computer.pressKey('Return');
|
|
773
|
+
await sleep(4000); // Wait for Gmail to load
|
|
774
|
+
|
|
775
|
+
// Click Compose button (use keyboard shortcut 'c')
|
|
776
|
+
await computer.typeText('c'); // Gmail shortcut for compose
|
|
777
|
+
await sleep(2000); // Wait for compose window
|
|
778
|
+
|
|
779
|
+
// Fill in fields
|
|
780
|
+
await computer.typeText(to); // To field is focused
|
|
781
|
+
await sleep(300);
|
|
782
|
+
await computer.pressKey('Tab'); // Move to subject
|
|
783
|
+
await sleep(200);
|
|
784
|
+
await computer.typeText(subject);
|
|
785
|
+
await sleep(300);
|
|
786
|
+
await computer.pressKey('Tab'); // Move to body
|
|
787
|
+
await sleep(200);
|
|
788
|
+
await computer.typeText(body);
|
|
789
|
+
await sleep(500);
|
|
790
|
+
|
|
791
|
+
// Send with Ctrl+Enter
|
|
792
|
+
await computer.keyCombo(['control', 'Return']);
|
|
793
|
+
|
|
794
|
+
} else if (provider.toLowerCase() === 'outlook') {
|
|
795
|
+
await computer.typeText('outlook.live.com');
|
|
796
|
+
await computer.pressKey('Return');
|
|
797
|
+
await sleep(4000); // Wait for Outlook to load
|
|
798
|
+
|
|
799
|
+
// Click New mail (use keyboard shortcut 'n')
|
|
800
|
+
await computer.typeText('n'); // Outlook shortcut for new mail
|
|
801
|
+
await sleep(2000);
|
|
802
|
+
|
|
803
|
+
// Fill in fields
|
|
804
|
+
await computer.typeText(to);
|
|
805
|
+
await sleep(300);
|
|
806
|
+
await computer.pressKey('Tab');
|
|
807
|
+
await sleep(200);
|
|
808
|
+
await computer.typeText(subject);
|
|
809
|
+
await sleep(300);
|
|
810
|
+
await computer.pressKey('Tab');
|
|
811
|
+
await sleep(200);
|
|
812
|
+
await computer.typeText(body);
|
|
813
|
+
await sleep(500);
|
|
814
|
+
|
|
815
|
+
// Send with Ctrl+Enter
|
|
816
|
+
await computer.keyCombo(['control', 'Return']);
|
|
817
|
+
} else {
|
|
818
|
+
throw new Error(`Unsupported email provider: ${provider}. Use gmail or outlook.`);
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
await sleep(2000);
|
|
822
|
+
step.result = `📧 Email sent via ${provider} to ${to}`;
|
|
823
|
+
break;
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
case 'google_sheets': {
|
|
827
|
+
// Human-like: open browser, go to sheets, interact
|
|
828
|
+
// Format: google_sheets:command|arg1|arg2...
|
|
829
|
+
const [sheetCmd, ...sheetArgs] = params.split('|');
|
|
830
|
+
|
|
831
|
+
switch (sheetCmd.toLowerCase()) {
|
|
832
|
+
case 'new': {
|
|
833
|
+
const sheetName = sheetArgs[0] || 'Untitled spreadsheet';
|
|
834
|
+
|
|
835
|
+
// Open browser and go to Google Sheets
|
|
836
|
+
await computer.keyCombo(['meta', 'r']);
|
|
837
|
+
await sleep(500);
|
|
838
|
+
await computer.typeText('chrome');
|
|
839
|
+
await computer.pressKey('Return');
|
|
840
|
+
await sleep(2000);
|
|
841
|
+
|
|
842
|
+
await computer.keyCombo(['control', 'l']);
|
|
843
|
+
await sleep(300);
|
|
844
|
+
await computer.typeText('sheets.google.com');
|
|
845
|
+
await computer.pressKey('Return');
|
|
846
|
+
await sleep(3000);
|
|
847
|
+
|
|
848
|
+
// Click "Blank" to create new (or use keyboard)
|
|
849
|
+
// Usually there's a + or Blank option, let's try clicking near top
|
|
850
|
+
await computer.pressKey('Tab'); // Navigate
|
|
851
|
+
await computer.pressKey('Tab');
|
|
852
|
+
await computer.pressKey('Return'); // Create blank
|
|
853
|
+
await sleep(3000);
|
|
854
|
+
|
|
855
|
+
// Rename: click on title or use File > Rename
|
|
856
|
+
await computer.keyCombo(['alt', 'f']); // File menu
|
|
857
|
+
await sleep(500);
|
|
858
|
+
await computer.typeText('r'); // Rename option
|
|
859
|
+
await sleep(500);
|
|
860
|
+
await computer.keyCombo(['control', 'a']); // Select all
|
|
861
|
+
await computer.typeText(sheetName);
|
|
862
|
+
await computer.pressKey('Return');
|
|
863
|
+
await sleep(500);
|
|
864
|
+
await computer.pressKey('Escape'); // Close any dialog
|
|
865
|
+
|
|
866
|
+
step.result = `📊 Created Google Sheet: ${sheetName}`;
|
|
867
|
+
break;
|
|
868
|
+
}
|
|
869
|
+
case 'type': {
|
|
870
|
+
const cell = sheetArgs[0] || 'A1';
|
|
871
|
+
const cellValue = sheetArgs.slice(1).join('|');
|
|
872
|
+
|
|
873
|
+
// Navigate to cell using Ctrl+G or F5 (Go to)
|
|
874
|
+
await computer.keyCombo(['control', 'g']); // Go to cell dialog
|
|
875
|
+
await sleep(500);
|
|
876
|
+
await computer.typeText(cell);
|
|
877
|
+
await computer.pressKey('Return');
|
|
878
|
+
await sleep(300);
|
|
879
|
+
|
|
880
|
+
// Type the value
|
|
881
|
+
await computer.typeText(cellValue);
|
|
882
|
+
await computer.pressKey('Return'); // Confirm and move down
|
|
883
|
+
await sleep(200);
|
|
884
|
+
|
|
885
|
+
step.result = `📊 Typed "${cellValue}" in cell ${cell}`;
|
|
886
|
+
break;
|
|
887
|
+
}
|
|
888
|
+
case 'read': {
|
|
889
|
+
const readScreen = await describeScreen();
|
|
890
|
+
step.result = `📊 Current sheet view:\n${readScreen.description}`;
|
|
891
|
+
break;
|
|
892
|
+
}
|
|
893
|
+
default:
|
|
894
|
+
throw new Error(`Unknown google_sheets command: ${sheetCmd}`);
|
|
895
|
+
}
|
|
896
|
+
break;
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
case 'google_docs': {
|
|
900
|
+
// Human-like: open browser, go to docs, interact
|
|
901
|
+
// Format: google_docs:command|arg1|arg2...
|
|
902
|
+
const [docCmd, ...docArgs] = params.split('|');
|
|
903
|
+
|
|
904
|
+
switch (docCmd.toLowerCase()) {
|
|
905
|
+
case 'new': {
|
|
906
|
+
const docName = docArgs[0] || 'Untitled document';
|
|
907
|
+
|
|
908
|
+
// Open browser and go to Google Docs
|
|
909
|
+
await computer.keyCombo(['meta', 'r']);
|
|
910
|
+
await sleep(500);
|
|
911
|
+
await computer.typeText('chrome');
|
|
912
|
+
await computer.pressKey('Return');
|
|
913
|
+
await sleep(2000);
|
|
914
|
+
|
|
915
|
+
await computer.keyCombo(['control', 'l']);
|
|
916
|
+
await sleep(300);
|
|
917
|
+
await computer.typeText('docs.google.com');
|
|
918
|
+
await computer.pressKey('Return');
|
|
919
|
+
await sleep(3000);
|
|
920
|
+
|
|
921
|
+
// Click "Blank" to create new
|
|
922
|
+
await computer.pressKey('Tab');
|
|
923
|
+
await computer.pressKey('Tab');
|
|
924
|
+
await computer.pressKey('Return');
|
|
925
|
+
await sleep(3000);
|
|
926
|
+
|
|
927
|
+
// Rename using File > Rename
|
|
928
|
+
await computer.keyCombo(['alt', 'f']); // File menu
|
|
929
|
+
await sleep(500);
|
|
930
|
+
await computer.typeText('r'); // Rename
|
|
931
|
+
await sleep(500);
|
|
932
|
+
await computer.keyCombo(['control', 'a']); // Select all
|
|
933
|
+
await computer.typeText(docName);
|
|
934
|
+
await computer.pressKey('Return');
|
|
935
|
+
await sleep(500);
|
|
936
|
+
await computer.pressKey('Escape'); // Close dialog, focus doc
|
|
937
|
+
|
|
938
|
+
step.result = `📄 Created Google Doc: ${docName}`;
|
|
939
|
+
break;
|
|
940
|
+
}
|
|
941
|
+
case 'type': {
|
|
942
|
+
const docText = docArgs.join('|');
|
|
943
|
+
// Just type - cursor should be in document
|
|
944
|
+
await computer.typeText(docText);
|
|
945
|
+
step.result = `📄 Typed content in Google Doc`;
|
|
946
|
+
break;
|
|
947
|
+
}
|
|
948
|
+
default:
|
|
949
|
+
throw new Error(`Unknown google_docs command: ${docCmd}`);
|
|
950
|
+
}
|
|
951
|
+
break;
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
case 'research': {
|
|
955
|
+
// Human-like multi-step research: open browser, search, click results, gather info
|
|
956
|
+
const researchQuery = params;
|
|
957
|
+
const researchResults: string[] = [];
|
|
958
|
+
|
|
959
|
+
// Step 1: Open browser and go to Google
|
|
960
|
+
await computer.keyCombo(['meta', 'r']); // Win+R
|
|
961
|
+
await sleep(500);
|
|
962
|
+
await computer.typeText('chrome');
|
|
963
|
+
await computer.pressKey('Return');
|
|
964
|
+
await sleep(2000);
|
|
965
|
+
|
|
966
|
+
await computer.keyCombo(['control', 'l']); // Focus address bar
|
|
967
|
+
await sleep(300);
|
|
968
|
+
await computer.typeText('google.com');
|
|
969
|
+
await computer.pressKey('Return');
|
|
970
|
+
await sleep(2000);
|
|
971
|
+
|
|
972
|
+
// Type search query
|
|
973
|
+
await computer.typeText(researchQuery);
|
|
974
|
+
await computer.pressKey('Return');
|
|
975
|
+
await sleep(3000);
|
|
976
|
+
|
|
977
|
+
// Capture initial search results
|
|
978
|
+
let searchScreen = await describeScreen();
|
|
979
|
+
const initialResults = await chat([{
|
|
980
|
+
role: 'user',
|
|
981
|
+
content: `Extract the key information from these Google search results about: "${researchQuery}"
|
|
982
|
+
Include any relevant facts, numbers, dates, or key points visible. Be thorough but concise.`
|
|
983
|
+
}]);
|
|
984
|
+
researchResults.push(`Search Results:\n${initialResults.content}`);
|
|
985
|
+
|
|
986
|
+
// Step 2: Click on first result (Tab to navigate, Enter to click)
|
|
987
|
+
await computer.pressKey('Tab');
|
|
988
|
+
await sleep(200);
|
|
989
|
+
await computer.pressKey('Tab');
|
|
990
|
+
await sleep(200);
|
|
991
|
+
await computer.pressKey('Return'); // Click first result
|
|
992
|
+
await sleep(4000); // Wait for page load
|
|
993
|
+
|
|
994
|
+
// Extract content from the page
|
|
995
|
+
searchScreen = await describeScreen();
|
|
996
|
+
const pageContent = await chat([{
|
|
997
|
+
role: 'user',
|
|
998
|
+
content: `Extract the main content and key information from this webpage about: "${researchQuery}"
|
|
999
|
+
Ignore ads, navigation, footers. Focus on the actual article/content.`
|
|
1000
|
+
}]);
|
|
1001
|
+
researchResults.push(`\nSource 1 Content:\n${pageContent.content}`);
|
|
1002
|
+
|
|
1003
|
+
// Step 3: Go back (Alt+Left) and check another source
|
|
1004
|
+
await computer.keyCombo(['alt', 'Left']); // Browser back
|
|
1005
|
+
await sleep(2000);
|
|
1006
|
+
|
|
1007
|
+
// Scroll down a bit to see more results
|
|
1008
|
+
await computer.scrollMouse(-3);
|
|
1009
|
+
await sleep(500);
|
|
1010
|
+
|
|
1011
|
+
// Navigate to second result
|
|
1012
|
+
await computer.pressKey('Tab');
|
|
1013
|
+
await computer.pressKey('Tab');
|
|
1014
|
+
await computer.pressKey('Tab');
|
|
1015
|
+
await computer.pressKey('Return');
|
|
1016
|
+
await sleep(4000);
|
|
1017
|
+
|
|
1018
|
+
searchScreen = await describeScreen();
|
|
1019
|
+
const pageContent2 = await chat([{
|
|
1020
|
+
role: 'user',
|
|
1021
|
+
content: `Extract additional information from this webpage about: "${researchQuery}"
|
|
1022
|
+
Look for details not covered in the previous source.`
|
|
1023
|
+
}]);
|
|
1024
|
+
researchResults.push(`\nSource 2 Content:\n${pageContent2.content}`);
|
|
1025
|
+
|
|
1026
|
+
// Step 4: Synthesize all gathered information
|
|
1027
|
+
const synthesis = await chat([{
|
|
1028
|
+
role: 'user',
|
|
1029
|
+
content: `Based on the following research gathered about "${researchQuery}", provide a comprehensive summary:
|
|
1030
|
+
|
|
1031
|
+
${researchResults.join('\n\n')}
|
|
1032
|
+
|
|
1033
|
+
Create a well-organized summary with:
|
|
1034
|
+
1. Key findings
|
|
1035
|
+
2. Important details
|
|
1036
|
+
3. Any notable facts or statistics
|
|
1037
|
+
4. Conclusion
|
|
1038
|
+
|
|
1039
|
+
Be thorough but concise.`
|
|
1040
|
+
}]);
|
|
1041
|
+
|
|
1042
|
+
step.result = `🔬 Research Summary: ${researchQuery}\n\n${synthesis.content}`;
|
|
1043
|
+
break;
|
|
1044
|
+
}
|
|
1045
|
+
|
|
599
1046
|
case 'chat':
|
|
600
1047
|
// This is a fallback - just describe what user wants
|
|
601
1048
|
step.result = `Task noted: ${params}`;
|