@eko-ai/eko 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +66 -23
- package/dist/extension/content/index.d.ts +1 -0
- package/dist/extension/tools/browser.d.ts +2 -1
- package/dist/extension/tools/index.d.ts +2 -1
- package/dist/extension/tools/request_login.d.ts +10 -0
- package/dist/extension/utils.d.ts +1 -0
- package/dist/extension.cjs.js +137 -20
- package/dist/extension.esm.js +137 -20
- package/dist/extension_content_script.js +129 -2
- package/dist/index.cjs.js +85 -5
- package/dist/index.esm.js +85 -5
- package/dist/models/action.d.ts +1 -0
- package/dist/nodejs/script/build_dom_tree.d.ts +1 -0
- package/dist/nodejs/tools/browser_use.d.ts +28 -0
- package/dist/nodejs/tools/index.d.ts +1 -0
- package/dist/nodejs.cjs.js +71428 -11
- package/dist/nodejs.esm.js +71422 -5
- package/dist/web/tools/browser.d.ts +2 -1
- package/dist/web.cjs.js +29 -17
- package/dist/web.esm.js +29 -17
- package/package.json +5 -7
package/README.md
CHANGED
|
@@ -1,17 +1,31 @@
|
|
|
1
|
-
# Eko
|
|
2
1
|
|
|
3
|
-
[](LICENSE) [](https://example.com/build-status) [](https://eko.fellou.ai/docs/release/versions/)
|
|
4
2
|
|
|
5
|
-
**Eko** is a revolutionary framework designed to empower developers and users alike to program their browser and operating system using natural language. With seamless integration of browser APIs, OS-level capabilities, and cutting-edge AI tools like Claude 3.5, Eko redefines how we interact with technology, making it intuitive, powerful, and accessible.
|
|
6
3
|
|
|
7
|
-
|
|
4
|
+
<h1 align="center">
|
|
5
|
+
<a href="https://github.com/FellouAI/eko" target="_blank">
|
|
6
|
+
<img src="https://github.com/user-attachments/assets/55dbdd6c-2b08-4e5f-a841-8fea7c2a0b92" alt="eko-logo" width="200" height="200">
|
|
7
|
+
</a>
|
|
8
|
+
<br>
|
|
9
|
+
<small>Eko - Build Production-ready Agentic Workflow with Natural Language</small>
|
|
10
|
+
</h1>
|
|
8
11
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
-
|
|
12
|
-
|
|
13
|
-
- **
|
|
14
|
-
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
[](LICENSE) [](https://example.com/build-status) [](https://eko.fellou.ai/docs/release/versions/)
|
|
15
|
+
|
|
16
|
+
Eko (pronounced like ‘echo’) is a production-ready JavaScript framework that enables developers to create reliable agents, **from simple commands to complex workflows**. It provides a unified interface for running agents in both **computer and browser environments**.
|
|
17
|
+
|
|
18
|
+
# Framework Comparison
|
|
19
|
+
|
|
20
|
+
| Feature | Eko | Langchain | Browser-use | Dify.ai | Coze |
|
|
21
|
+
|--------------------------------------|-------|------------|--------------|----------|--------|
|
|
22
|
+
| **Supported Platform** | **All platform** | Server side | Browser | Web | Web |
|
|
23
|
+
| **One sentence to multi-step workflow** | ✅ | ❌ | ✅ | ❌ | ❌ |
|
|
24
|
+
| **Intervenability** | ✅ | ✅ | ❌ | ❌ | ❌ |
|
|
25
|
+
| **Development Efficiency** | **High** | Low | Middle | Middle | Low |
|
|
26
|
+
| **Task Complexity** | High | High | Low | Middle | Middle | Middle |
|
|
27
|
+
| **Open-source** | ✅ | ✅ | ✅ | ✅ | ❌ |
|
|
28
|
+
| **Access to private web resources** | ✅ **(Coming soon)** | ❌ | ❌ | ❌ | ❌ |
|
|
15
29
|
|
|
16
30
|
## Quickstart
|
|
17
31
|
|
|
@@ -19,7 +33,7 @@
|
|
|
19
33
|
npm install @eko-ai/eko
|
|
20
34
|
```
|
|
21
35
|
|
|
22
|
-
>
|
|
36
|
+
> For detailed usage, please refer to the [Eko Quickstart guide](https://eko.fellou.ai/docs/getting-started/quickstart/).
|
|
23
37
|
|
|
24
38
|
```typescript
|
|
25
39
|
import { Eko } from '@eko-ai/eko';
|
|
@@ -38,6 +52,46 @@ await eko.execute(sysWorkflow);
|
|
|
38
52
|
|
|
39
53
|
```
|
|
40
54
|
|
|
55
|
+
## Demos
|
|
56
|
+
|
|
57
|
+
**Propmt:** `Collect the latest NASDAQ data on Yahoo Finance, including price changes, market capitalization, trading volume of major stocks, analyze the data and generate visualization reports`.
|
|
58
|
+
|
|
59
|
+
https://github.com/user-attachments/assets/4087b370-8eb8-4346-a549-c4ce4d1efec3
|
|
60
|
+
|
|
61
|
+
Click [here](https://github.com/FellouAI/eko-demos/tree/main/browser-extension-stock) to get the source code.
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
**Propmt:** `Based on the README of FellouAI/eko on github, search for competitors, highlight the key contributions of Eko, write a blog post advertising Eko, and post it on Write.as.`
|
|
66
|
+
|
|
67
|
+
https://github.com/user-attachments/assets/6feaea86-2fb9-4e5c-b510-479c2473d810
|
|
68
|
+
|
|
69
|
+
Click [here](https://github.com/FellouAI/eko-demos/tree/main/browser-extension-blog) to get the source code.
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
**Propmt:** `Clean up all files in the current directory larger than 1MB`
|
|
74
|
+
|
|
75
|
+
https://github.com/user-attachments/assets/ef7feb58-3ddd-4296-a1de-bb8b6c66e48b
|
|
76
|
+
|
|
77
|
+
Click [here](https://eko.fellou.ai/docs/computeruse/computer-node/#example-file-cleanup-workflow) to Learn more.
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
**Propmt:** Automatic software testing
|
|
82
|
+
```
|
|
83
|
+
Current login page automation test:
|
|
84
|
+
1. Correct account and password are: admin / 666666
|
|
85
|
+
2. Please randomly combine usernames and passwords for testing to verify if login validation works properly, such as: username cannot be empty, password cannot be empty, incorrect username, incorrect password
|
|
86
|
+
3. Finally, try to login with the correct account and password to verify if login is successful
|
|
87
|
+
4. Generate test report and export
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
https://github.com/user-attachments/assets/7716300a-c51d-41f1-8d4f-e3f593c1b6d5
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
Click [here](https://eko.fellou.ai/docs/browseruse/browser-web#example-login-automation-testing) to Learn more.
|
|
94
|
+
|
|
41
95
|
## Use Cases
|
|
42
96
|
|
|
43
97
|
- Browser automation and web scraping
|
|
@@ -64,24 +118,13 @@ Eko can be used in multiple environments:
|
|
|
64
118
|
- Browser Extension
|
|
65
119
|
- Web Applications
|
|
66
120
|
- Node.js Applications
|
|
67
|
-
- [Fellou AI Browser](https://fellou.ai)
|
|
68
121
|
|
|
69
122
|
## Community and Support
|
|
70
123
|
|
|
71
124
|
- Report issues on [GitHub Issues](https://github.com/FellouAI/eko/issues)
|
|
125
|
+
- Join our [slack community discussions](https://join.slack.com/t/eko-ai/shared_invite/zt-2xhvkudv9-nHvD1g8Smp227sM51x_Meg)
|
|
72
126
|
- Contribute tools and improvements
|
|
73
127
|
- Share your use cases and feedback
|
|
74
|
-
- Join our community discussions
|
|
75
|
-
|
|
76
|
-
## Contributing
|
|
77
|
-
|
|
78
|
-
We welcome contributions! See our [Contributing Guide](CONTRIBUTING.md) for details on:
|
|
79
|
-
|
|
80
|
-
- Setting up the development environment
|
|
81
|
-
- Code style guidelines
|
|
82
|
-
- Submission process
|
|
83
|
-
- Tool development
|
|
84
|
-
- Use case optimization
|
|
85
128
|
|
|
86
129
|
## License
|
|
87
130
|
|
|
@@ -10,7 +10,8 @@ export declare function right_click(tabId: number, coordinate?: [number, number]
|
|
|
10
10
|
export declare function right_click_by(tabId: number, xpath?: string, highlightIndex?: number): Promise<any>;
|
|
11
11
|
export declare function double_click(tabId: number, coordinate?: [number, number]): Promise<any>;
|
|
12
12
|
export declare function double_click_by(tabId: number, xpath?: string, highlightIndex?: number): Promise<any>;
|
|
13
|
-
export declare function screenshot(windowId: number): Promise<ScreenshotResult>;
|
|
13
|
+
export declare function screenshot(windowId: number, compress?: boolean): Promise<ScreenshotResult>;
|
|
14
|
+
export declare function compress_image(dataUrl: string, scale?: number, quality?: number): Promise<string>;
|
|
14
15
|
export declare function scroll_to(tabId: number, coordinate: [number, number]): Promise<any>;
|
|
15
16
|
export declare function scroll_to_by(tabId: number, xpath?: string, highlightIndex?: number): Promise<any>;
|
|
16
17
|
export declare function get_dropdown_options(tabId: number, xpath?: string, highlightIndex?: number): Promise<any>;
|
|
@@ -7,4 +7,5 @@ import { OpenUrl } from './open_url';
|
|
|
7
7
|
import { Screenshot } from './screenshot';
|
|
8
8
|
import { TabManagement } from './tab_management';
|
|
9
9
|
import { WebSearch } from './web_search';
|
|
10
|
-
|
|
10
|
+
import { RequestLogin } from './request_login';
|
|
11
|
+
export { BrowserUse, ElementClick, ExportFile, ExtractContent, FindElementPosition, OpenUrl, Screenshot, TabManagement, WebSearch, RequestLogin, };
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { Tool, InputSchema, ExecutionContext } from '../../types/action.types';
|
|
2
|
+
export declare class RequestLogin implements Tool<any, any> {
|
|
3
|
+
name: string;
|
|
4
|
+
description: string;
|
|
5
|
+
input_schema: InputSchema;
|
|
6
|
+
constructor();
|
|
7
|
+
execute(context: ExecutionContext, params: any): Promise<any>;
|
|
8
|
+
awaitLogin(tabId: number, task_id: string): Promise<boolean>;
|
|
9
|
+
isLoginIn(context: ExecutionContext): Promise<boolean>;
|
|
10
|
+
}
|
|
@@ -5,6 +5,7 @@ export declare function getCurrentTabId(windowId?: number | undefined): Promise<
|
|
|
5
5
|
export declare function open_new_tab(url: string, newWindow: boolean, windowId?: number): Promise<chrome.tabs.Tab>;
|
|
6
6
|
export declare function executeScript(tabId: number, func: any, args: any[]): Promise<any>;
|
|
7
7
|
export declare function waitForTabComplete(tabId: number, timeout?: number): Promise<chrome.tabs.Tab>;
|
|
8
|
+
export declare function doesTabExists(tabId: number): Promise<unknown>;
|
|
8
9
|
export declare function getPageSize(tabId?: number): Promise<[number, number]>;
|
|
9
10
|
export declare function sleep(time: number): Promise<void>;
|
|
10
11
|
export declare function injectScript(tabId: number, filename?: string): Promise<void>;
|
package/dist/extension.cjs.js
CHANGED
|
@@ -138,6 +138,19 @@ async function waitForTabComplete(tabId, timeout = 15000) {
|
|
|
138
138
|
chrome.tabs.onUpdated.addListener(listener);
|
|
139
139
|
});
|
|
140
140
|
}
|
|
141
|
+
async function doesTabExists(tabId) {
|
|
142
|
+
const tabExists = await new Promise((resolve) => {
|
|
143
|
+
chrome.tabs.get(tabId, (tab) => {
|
|
144
|
+
if (chrome.runtime.lastError) {
|
|
145
|
+
resolve(false);
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
resolve(true);
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
});
|
|
152
|
+
return tabExists;
|
|
153
|
+
}
|
|
141
154
|
async function getPageSize(tabId) {
|
|
142
155
|
if (!tabId) {
|
|
143
156
|
tabId = await getCurrentTabId();
|
|
@@ -235,6 +248,7 @@ var utils = /*#__PURE__*/Object.freeze({
|
|
|
235
248
|
__proto__: null,
|
|
236
249
|
CountDownLatch: CountDownLatch,
|
|
237
250
|
MsgEvent: MsgEvent,
|
|
251
|
+
doesTabExists: doesTabExists,
|
|
238
252
|
executeScript: executeScript,
|
|
239
253
|
getCurrentTabId: getCurrentTabId,
|
|
240
254
|
getPageSize: getPageSize,
|
|
@@ -339,11 +353,21 @@ async function double_click_by(tabId, xpath, highlightIndex) {
|
|
|
339
353
|
highlightIndex,
|
|
340
354
|
});
|
|
341
355
|
}
|
|
342
|
-
async function screenshot(windowId) {
|
|
343
|
-
let dataUrl
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
356
|
+
async function screenshot(windowId, compress) {
|
|
357
|
+
let dataUrl;
|
|
358
|
+
if (compress) {
|
|
359
|
+
dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
|
|
360
|
+
format: 'jpeg',
|
|
361
|
+
quality: 60, // 0-100
|
|
362
|
+
});
|
|
363
|
+
dataUrl = await compress_image(dataUrl, 0.7, 1);
|
|
364
|
+
}
|
|
365
|
+
else {
|
|
366
|
+
dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
|
|
367
|
+
format: 'jpeg',
|
|
368
|
+
quality: 50,
|
|
369
|
+
});
|
|
370
|
+
}
|
|
347
371
|
let data = dataUrl.substring(dataUrl.indexOf('base64,') + 7);
|
|
348
372
|
return {
|
|
349
373
|
image: {
|
|
@@ -353,6 +377,23 @@ async function screenshot(windowId) {
|
|
|
353
377
|
},
|
|
354
378
|
};
|
|
355
379
|
}
|
|
380
|
+
async function compress_image(dataUrl, scale = 0.8, quality = 0.8) {
|
|
381
|
+
const bitmap = await createImageBitmap(await (await fetch(dataUrl)).blob());
|
|
382
|
+
let width = bitmap.width * scale;
|
|
383
|
+
let height = bitmap.height * scale;
|
|
384
|
+
const canvas = new OffscreenCanvas(width, height);
|
|
385
|
+
const ctx = canvas.getContext('2d');
|
|
386
|
+
ctx.drawImage(bitmap, 0, 0, width, height);
|
|
387
|
+
const blob = await canvas.convertToBlob({
|
|
388
|
+
type: 'image/jpeg',
|
|
389
|
+
quality: quality,
|
|
390
|
+
});
|
|
391
|
+
return new Promise((resolve) => {
|
|
392
|
+
const reader = new FileReader();
|
|
393
|
+
reader.onloadend = () => resolve(reader.result);
|
|
394
|
+
reader.readAsDataURL(blob);
|
|
395
|
+
});
|
|
396
|
+
}
|
|
356
397
|
async function scroll_to(tabId, coordinate) {
|
|
357
398
|
let from_coordinate = (await cursor_position(tabId)).coordinate;
|
|
358
399
|
return await chrome.tabs.sendMessage(tabId, {
|
|
@@ -397,6 +438,7 @@ var browser = /*#__PURE__*/Object.freeze({
|
|
|
397
438
|
__proto__: null,
|
|
398
439
|
clear_input: clear_input,
|
|
399
440
|
clear_input_by: clear_input_by,
|
|
441
|
+
compress_image: compress_image,
|
|
400
442
|
cursor_position: cursor_position,
|
|
401
443
|
double_click: double_click,
|
|
402
444
|
double_click_by: double_click_by,
|
|
@@ -423,7 +465,6 @@ class BrowserUse {
|
|
|
423
465
|
this.name = 'browser_use';
|
|
424
466
|
this.description = `Use structured commands to interact with the browser, manipulating page elements through screenshots and webpage element extraction.
|
|
425
467
|
* This is a browser GUI interface where you need to analyze webpages by taking screenshots and extracting page element structures, and specify action sequences to complete designated tasks.
|
|
426
|
-
* Some operations may need time to process, so you might need to wait and continuously take screenshots and extract element structures to check the operation results.
|
|
427
468
|
* Before any operation, you must first call the \`screenshot_extract_element\` command, which will return the browser page screenshot and structured element information, both specially processed.
|
|
428
469
|
* ELEMENT INTERACTION:
|
|
429
470
|
- Only use indexes that exist in the provided element list
|
|
@@ -433,17 +474,7 @@ class BrowserUse {
|
|
|
433
474
|
- If no suitable elements exist, use other functions to complete the task
|
|
434
475
|
- If stuck, try alternative approaches
|
|
435
476
|
- Handle popups/cookies by accepting or closing them
|
|
436
|
-
- Use scroll to find elements you are looking for
|
|
437
|
-
* Form filling:
|
|
438
|
-
- If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
|
|
439
|
-
* ACTION SEQUENCING:
|
|
440
|
-
- Actions are executed in the order they appear in the list
|
|
441
|
-
- Each action should logically follow from the previous one
|
|
442
|
-
- If the page changes after an action, the sequence is interrupted and you get the new state.
|
|
443
|
-
- If content only disappears the sequence continues.
|
|
444
|
-
- Only provide the action sequence until you think the page will change.
|
|
445
|
-
- Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
|
|
446
|
-
- only use multiple actions if it makes sense.`;
|
|
477
|
+
- Use scroll to find elements you are looking for`;
|
|
447
478
|
this.input_schema = {
|
|
448
479
|
type: 'object',
|
|
449
480
|
properties: {
|
|
@@ -585,7 +616,7 @@ class BrowserUse {
|
|
|
585
616
|
return window.get_clickable_elements(true);
|
|
586
617
|
}, []);
|
|
587
618
|
context.selector_map = element_result.selector_map;
|
|
588
|
-
let screenshot$1 = await screenshot(windowId);
|
|
619
|
+
let screenshot$1 = await screenshot(windowId, true);
|
|
589
620
|
await executeScript(tabId, () => {
|
|
590
621
|
return window.remove_highlight();
|
|
591
622
|
}, []);
|
|
@@ -817,7 +848,7 @@ ${pseudoHtml}
|
|
|
817
848
|
async function executeWithBrowserUse$1(context, task_prompt) {
|
|
818
849
|
let tabId = await getTabId(context);
|
|
819
850
|
let windowId = await getWindowId(context);
|
|
820
|
-
let screenshot_result = await screenshot(windowId);
|
|
851
|
+
let screenshot_result = await screenshot(windowId, false);
|
|
821
852
|
let messages = [
|
|
822
853
|
{
|
|
823
854
|
role: 'user',
|
|
@@ -1065,7 +1096,7 @@ ${pseudoHtml}
|
|
|
1065
1096
|
async function executeWithBrowserUse(context, task_prompt) {
|
|
1066
1097
|
await getTabId(context);
|
|
1067
1098
|
let windowId = await getWindowId(context);
|
|
1068
|
-
let screenshot_result = await screenshot(windowId);
|
|
1099
|
+
let screenshot_result = await screenshot(windowId, false);
|
|
1069
1100
|
let messages = [
|
|
1070
1101
|
{
|
|
1071
1102
|
role: 'user',
|
|
@@ -1643,6 +1674,91 @@ async function doPageContent(taskId, detailLinkGroups, window) {
|
|
|
1643
1674
|
return searchInfo;
|
|
1644
1675
|
}
|
|
1645
1676
|
|
|
1677
|
+
class RequestLogin {
|
|
1678
|
+
constructor() {
|
|
1679
|
+
this.name = 'request_login';
|
|
1680
|
+
this.description =
|
|
1681
|
+
'Login to this website, assist with identity verification when manual intervention is needed, guide users through the login process, and wait for their confirmation of successful login.';
|
|
1682
|
+
this.input_schema = {
|
|
1683
|
+
type: 'object',
|
|
1684
|
+
properties: {},
|
|
1685
|
+
};
|
|
1686
|
+
}
|
|
1687
|
+
async execute(context, params) {
|
|
1688
|
+
if (!params.force && await this.isLoginIn(context)) {
|
|
1689
|
+
return true;
|
|
1690
|
+
}
|
|
1691
|
+
let tabId = await getTabId(context);
|
|
1692
|
+
let task_id = 'login_required_' + tabId;
|
|
1693
|
+
const request_user_help = async () => {
|
|
1694
|
+
await chrome.tabs.sendMessage(tabId, {
|
|
1695
|
+
type: 'request_user_help',
|
|
1696
|
+
task_id,
|
|
1697
|
+
failure_type: 'login_required',
|
|
1698
|
+
failure_message: 'Access page require user authentication.',
|
|
1699
|
+
});
|
|
1700
|
+
};
|
|
1701
|
+
const login_interval = setInterval(async () => {
|
|
1702
|
+
try {
|
|
1703
|
+
request_user_help();
|
|
1704
|
+
}
|
|
1705
|
+
catch (e) {
|
|
1706
|
+
clearInterval(login_interval);
|
|
1707
|
+
}
|
|
1708
|
+
}, 2000);
|
|
1709
|
+
try {
|
|
1710
|
+
return await this.awaitLogin(tabId, task_id);
|
|
1711
|
+
}
|
|
1712
|
+
finally {
|
|
1713
|
+
clearInterval(login_interval);
|
|
1714
|
+
}
|
|
1715
|
+
}
|
|
1716
|
+
async awaitLogin(tabId, task_id) {
|
|
1717
|
+
return new Promise((resolve) => {
|
|
1718
|
+
const checkTabClosedInterval = setInterval(async () => {
|
|
1719
|
+
const tabExists = await doesTabExists(tabId);
|
|
1720
|
+
if (!tabExists) {
|
|
1721
|
+
clearInterval(checkTabClosedInterval);
|
|
1722
|
+
resolve(false);
|
|
1723
|
+
chrome.runtime.onMessage.removeListener(listener);
|
|
1724
|
+
}
|
|
1725
|
+
}, 1000);
|
|
1726
|
+
const listener = (message) => {
|
|
1727
|
+
if (message.type === 'issue_resolved' && message.task_id === task_id) {
|
|
1728
|
+
resolve(true);
|
|
1729
|
+
clearInterval(checkTabClosedInterval);
|
|
1730
|
+
}
|
|
1731
|
+
};
|
|
1732
|
+
chrome.runtime.onMessage.addListener(listener);
|
|
1733
|
+
});
|
|
1734
|
+
}
|
|
1735
|
+
async isLoginIn(context) {
|
|
1736
|
+
let windowId = await getWindowId(context);
|
|
1737
|
+
let screenshot_result = await screenshot(windowId, true);
|
|
1738
|
+
let messages = [
|
|
1739
|
+
{
|
|
1740
|
+
role: 'user',
|
|
1741
|
+
content: [
|
|
1742
|
+
{
|
|
1743
|
+
type: 'image',
|
|
1744
|
+
source: screenshot_result.image,
|
|
1745
|
+
},
|
|
1746
|
+
{
|
|
1747
|
+
type: 'text',
|
|
1748
|
+
text: 'Check if the current website is logged in. If not logged in, output `NOT_LOGIN`. If logged in, output `LOGGED_IN`. Output directly without explanation.',
|
|
1749
|
+
},
|
|
1750
|
+
],
|
|
1751
|
+
},
|
|
1752
|
+
];
|
|
1753
|
+
let response = await context.llmProvider.generateText(messages, { maxTokens: 256 });
|
|
1754
|
+
let text = response.textContent;
|
|
1755
|
+
if (!text) {
|
|
1756
|
+
text = JSON.stringify(response.content);
|
|
1757
|
+
}
|
|
1758
|
+
return text.indexOf('LOGGED_IN') > -1;
|
|
1759
|
+
}
|
|
1760
|
+
}
|
|
1761
|
+
|
|
1646
1762
|
var tools = /*#__PURE__*/Object.freeze({
|
|
1647
1763
|
__proto__: null,
|
|
1648
1764
|
BrowserUse: BrowserUse,
|
|
@@ -1651,6 +1767,7 @@ var tools = /*#__PURE__*/Object.freeze({
|
|
|
1651
1767
|
ExtractContent: ExtractContent,
|
|
1652
1768
|
FindElementPosition: FindElementPosition,
|
|
1653
1769
|
OpenUrl: OpenUrl,
|
|
1770
|
+
RequestLogin: RequestLogin,
|
|
1654
1771
|
Screenshot: Screenshot,
|
|
1655
1772
|
TabManagement: TabManagement,
|
|
1656
1773
|
WebSearch: WebSearch
|
package/dist/extension.esm.js
CHANGED
|
@@ -136,6 +136,19 @@ async function waitForTabComplete(tabId, timeout = 15000) {
|
|
|
136
136
|
chrome.tabs.onUpdated.addListener(listener);
|
|
137
137
|
});
|
|
138
138
|
}
|
|
139
|
+
async function doesTabExists(tabId) {
|
|
140
|
+
const tabExists = await new Promise((resolve) => {
|
|
141
|
+
chrome.tabs.get(tabId, (tab) => {
|
|
142
|
+
if (chrome.runtime.lastError) {
|
|
143
|
+
resolve(false);
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
resolve(true);
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
return tabExists;
|
|
151
|
+
}
|
|
139
152
|
async function getPageSize(tabId) {
|
|
140
153
|
if (!tabId) {
|
|
141
154
|
tabId = await getCurrentTabId();
|
|
@@ -233,6 +246,7 @@ var utils = /*#__PURE__*/Object.freeze({
|
|
|
233
246
|
__proto__: null,
|
|
234
247
|
CountDownLatch: CountDownLatch,
|
|
235
248
|
MsgEvent: MsgEvent,
|
|
249
|
+
doesTabExists: doesTabExists,
|
|
236
250
|
executeScript: executeScript,
|
|
237
251
|
getCurrentTabId: getCurrentTabId,
|
|
238
252
|
getPageSize: getPageSize,
|
|
@@ -337,11 +351,21 @@ async function double_click_by(tabId, xpath, highlightIndex) {
|
|
|
337
351
|
highlightIndex,
|
|
338
352
|
});
|
|
339
353
|
}
|
|
340
|
-
async function screenshot(windowId) {
|
|
341
|
-
let dataUrl
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
354
|
+
async function screenshot(windowId, compress) {
|
|
355
|
+
let dataUrl;
|
|
356
|
+
if (compress) {
|
|
357
|
+
dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
|
|
358
|
+
format: 'jpeg',
|
|
359
|
+
quality: 60, // 0-100
|
|
360
|
+
});
|
|
361
|
+
dataUrl = await compress_image(dataUrl, 0.7, 1);
|
|
362
|
+
}
|
|
363
|
+
else {
|
|
364
|
+
dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
|
|
365
|
+
format: 'jpeg',
|
|
366
|
+
quality: 50,
|
|
367
|
+
});
|
|
368
|
+
}
|
|
345
369
|
let data = dataUrl.substring(dataUrl.indexOf('base64,') + 7);
|
|
346
370
|
return {
|
|
347
371
|
image: {
|
|
@@ -351,6 +375,23 @@ async function screenshot(windowId) {
|
|
|
351
375
|
},
|
|
352
376
|
};
|
|
353
377
|
}
|
|
378
|
+
async function compress_image(dataUrl, scale = 0.8, quality = 0.8) {
|
|
379
|
+
const bitmap = await createImageBitmap(await (await fetch(dataUrl)).blob());
|
|
380
|
+
let width = bitmap.width * scale;
|
|
381
|
+
let height = bitmap.height * scale;
|
|
382
|
+
const canvas = new OffscreenCanvas(width, height);
|
|
383
|
+
const ctx = canvas.getContext('2d');
|
|
384
|
+
ctx.drawImage(bitmap, 0, 0, width, height);
|
|
385
|
+
const blob = await canvas.convertToBlob({
|
|
386
|
+
type: 'image/jpeg',
|
|
387
|
+
quality: quality,
|
|
388
|
+
});
|
|
389
|
+
return new Promise((resolve) => {
|
|
390
|
+
const reader = new FileReader();
|
|
391
|
+
reader.onloadend = () => resolve(reader.result);
|
|
392
|
+
reader.readAsDataURL(blob);
|
|
393
|
+
});
|
|
394
|
+
}
|
|
354
395
|
async function scroll_to(tabId, coordinate) {
|
|
355
396
|
let from_coordinate = (await cursor_position(tabId)).coordinate;
|
|
356
397
|
return await chrome.tabs.sendMessage(tabId, {
|
|
@@ -395,6 +436,7 @@ var browser = /*#__PURE__*/Object.freeze({
|
|
|
395
436
|
__proto__: null,
|
|
396
437
|
clear_input: clear_input,
|
|
397
438
|
clear_input_by: clear_input_by,
|
|
439
|
+
compress_image: compress_image,
|
|
398
440
|
cursor_position: cursor_position,
|
|
399
441
|
double_click: double_click,
|
|
400
442
|
double_click_by: double_click_by,
|
|
@@ -421,7 +463,6 @@ class BrowserUse {
|
|
|
421
463
|
this.name = 'browser_use';
|
|
422
464
|
this.description = `Use structured commands to interact with the browser, manipulating page elements through screenshots and webpage element extraction.
|
|
423
465
|
* This is a browser GUI interface where you need to analyze webpages by taking screenshots and extracting page element structures, and specify action sequences to complete designated tasks.
|
|
424
|
-
* Some operations may need time to process, so you might need to wait and continuously take screenshots and extract element structures to check the operation results.
|
|
425
466
|
* Before any operation, you must first call the \`screenshot_extract_element\` command, which will return the browser page screenshot and structured element information, both specially processed.
|
|
426
467
|
* ELEMENT INTERACTION:
|
|
427
468
|
- Only use indexes that exist in the provided element list
|
|
@@ -431,17 +472,7 @@ class BrowserUse {
|
|
|
431
472
|
- If no suitable elements exist, use other functions to complete the task
|
|
432
473
|
- If stuck, try alternative approaches
|
|
433
474
|
- Handle popups/cookies by accepting or closing them
|
|
434
|
-
- Use scroll to find elements you are looking for
|
|
435
|
-
* Form filling:
|
|
436
|
-
- If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
|
|
437
|
-
* ACTION SEQUENCING:
|
|
438
|
-
- Actions are executed in the order they appear in the list
|
|
439
|
-
- Each action should logically follow from the previous one
|
|
440
|
-
- If the page changes after an action, the sequence is interrupted and you get the new state.
|
|
441
|
-
- If content only disappears the sequence continues.
|
|
442
|
-
- Only provide the action sequence until you think the page will change.
|
|
443
|
-
- Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
|
|
444
|
-
- only use multiple actions if it makes sense.`;
|
|
475
|
+
- Use scroll to find elements you are looking for`;
|
|
445
476
|
this.input_schema = {
|
|
446
477
|
type: 'object',
|
|
447
478
|
properties: {
|
|
@@ -583,7 +614,7 @@ class BrowserUse {
|
|
|
583
614
|
return window.get_clickable_elements(true);
|
|
584
615
|
}, []);
|
|
585
616
|
context.selector_map = element_result.selector_map;
|
|
586
|
-
let screenshot$1 = await screenshot(windowId);
|
|
617
|
+
let screenshot$1 = await screenshot(windowId, true);
|
|
587
618
|
await executeScript(tabId, () => {
|
|
588
619
|
return window.remove_highlight();
|
|
589
620
|
}, []);
|
|
@@ -815,7 +846,7 @@ ${pseudoHtml}
|
|
|
815
846
|
async function executeWithBrowserUse$1(context, task_prompt) {
|
|
816
847
|
let tabId = await getTabId(context);
|
|
817
848
|
let windowId = await getWindowId(context);
|
|
818
|
-
let screenshot_result = await screenshot(windowId);
|
|
849
|
+
let screenshot_result = await screenshot(windowId, false);
|
|
819
850
|
let messages = [
|
|
820
851
|
{
|
|
821
852
|
role: 'user',
|
|
@@ -1063,7 +1094,7 @@ ${pseudoHtml}
|
|
|
1063
1094
|
async function executeWithBrowserUse(context, task_prompt) {
|
|
1064
1095
|
await getTabId(context);
|
|
1065
1096
|
let windowId = await getWindowId(context);
|
|
1066
|
-
let screenshot_result = await screenshot(windowId);
|
|
1097
|
+
let screenshot_result = await screenshot(windowId, false);
|
|
1067
1098
|
let messages = [
|
|
1068
1099
|
{
|
|
1069
1100
|
role: 'user',
|
|
@@ -1641,6 +1672,91 @@ async function doPageContent(taskId, detailLinkGroups, window) {
|
|
|
1641
1672
|
return searchInfo;
|
|
1642
1673
|
}
|
|
1643
1674
|
|
|
1675
|
+
class RequestLogin {
|
|
1676
|
+
constructor() {
|
|
1677
|
+
this.name = 'request_login';
|
|
1678
|
+
this.description =
|
|
1679
|
+
'Login to this website, assist with identity verification when manual intervention is needed, guide users through the login process, and wait for their confirmation of successful login.';
|
|
1680
|
+
this.input_schema = {
|
|
1681
|
+
type: 'object',
|
|
1682
|
+
properties: {},
|
|
1683
|
+
};
|
|
1684
|
+
}
|
|
1685
|
+
async execute(context, params) {
|
|
1686
|
+
if (!params.force && await this.isLoginIn(context)) {
|
|
1687
|
+
return true;
|
|
1688
|
+
}
|
|
1689
|
+
let tabId = await getTabId(context);
|
|
1690
|
+
let task_id = 'login_required_' + tabId;
|
|
1691
|
+
const request_user_help = async () => {
|
|
1692
|
+
await chrome.tabs.sendMessage(tabId, {
|
|
1693
|
+
type: 'request_user_help',
|
|
1694
|
+
task_id,
|
|
1695
|
+
failure_type: 'login_required',
|
|
1696
|
+
failure_message: 'Access page require user authentication.',
|
|
1697
|
+
});
|
|
1698
|
+
};
|
|
1699
|
+
const login_interval = setInterval(async () => {
|
|
1700
|
+
try {
|
|
1701
|
+
request_user_help();
|
|
1702
|
+
}
|
|
1703
|
+
catch (e) {
|
|
1704
|
+
clearInterval(login_interval);
|
|
1705
|
+
}
|
|
1706
|
+
}, 2000);
|
|
1707
|
+
try {
|
|
1708
|
+
return await this.awaitLogin(tabId, task_id);
|
|
1709
|
+
}
|
|
1710
|
+
finally {
|
|
1711
|
+
clearInterval(login_interval);
|
|
1712
|
+
}
|
|
1713
|
+
}
|
|
1714
|
+
async awaitLogin(tabId, task_id) {
|
|
1715
|
+
return new Promise((resolve) => {
|
|
1716
|
+
const checkTabClosedInterval = setInterval(async () => {
|
|
1717
|
+
const tabExists = await doesTabExists(tabId);
|
|
1718
|
+
if (!tabExists) {
|
|
1719
|
+
clearInterval(checkTabClosedInterval);
|
|
1720
|
+
resolve(false);
|
|
1721
|
+
chrome.runtime.onMessage.removeListener(listener);
|
|
1722
|
+
}
|
|
1723
|
+
}, 1000);
|
|
1724
|
+
const listener = (message) => {
|
|
1725
|
+
if (message.type === 'issue_resolved' && message.task_id === task_id) {
|
|
1726
|
+
resolve(true);
|
|
1727
|
+
clearInterval(checkTabClosedInterval);
|
|
1728
|
+
}
|
|
1729
|
+
};
|
|
1730
|
+
chrome.runtime.onMessage.addListener(listener);
|
|
1731
|
+
});
|
|
1732
|
+
}
|
|
1733
|
+
async isLoginIn(context) {
|
|
1734
|
+
let windowId = await getWindowId(context);
|
|
1735
|
+
let screenshot_result = await screenshot(windowId, true);
|
|
1736
|
+
let messages = [
|
|
1737
|
+
{
|
|
1738
|
+
role: 'user',
|
|
1739
|
+
content: [
|
|
1740
|
+
{
|
|
1741
|
+
type: 'image',
|
|
1742
|
+
source: screenshot_result.image,
|
|
1743
|
+
},
|
|
1744
|
+
{
|
|
1745
|
+
type: 'text',
|
|
1746
|
+
text: 'Check if the current website is logged in. If not logged in, output `NOT_LOGIN`. If logged in, output `LOGGED_IN`. Output directly without explanation.',
|
|
1747
|
+
},
|
|
1748
|
+
],
|
|
1749
|
+
},
|
|
1750
|
+
];
|
|
1751
|
+
let response = await context.llmProvider.generateText(messages, { maxTokens: 256 });
|
|
1752
|
+
let text = response.textContent;
|
|
1753
|
+
if (!text) {
|
|
1754
|
+
text = JSON.stringify(response.content);
|
|
1755
|
+
}
|
|
1756
|
+
return text.indexOf('LOGGED_IN') > -1;
|
|
1757
|
+
}
|
|
1758
|
+
}
|
|
1759
|
+
|
|
1644
1760
|
var tools = /*#__PURE__*/Object.freeze({
|
|
1645
1761
|
__proto__: null,
|
|
1646
1762
|
BrowserUse: BrowserUse,
|
|
@@ -1649,6 +1765,7 @@ var tools = /*#__PURE__*/Object.freeze({
|
|
|
1649
1765
|
ExtractContent: ExtractContent,
|
|
1650
1766
|
FindElementPosition: FindElementPosition,
|
|
1651
1767
|
OpenUrl: OpenUrl,
|
|
1768
|
+
RequestLogin: RequestLogin,
|
|
1652
1769
|
Screenshot: Screenshot,
|
|
1653
1770
|
TabManagement: TabManagement,
|
|
1654
1771
|
WebSearch: WebSearch
|