@eko-ai/eko 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +72 -21
  2. package/dist/core/eko.d.ts +3 -2
  3. package/dist/extension/content/index.d.ts +1 -0
  4. package/dist/extension/tools/browser.d.ts +2 -1
  5. package/dist/extension/tools/get_all_tabs.d.ts +9 -0
  6. package/dist/extension/tools/index.d.ts +4 -1
  7. package/dist/extension/tools/request_login.d.ts +10 -0
  8. package/dist/extension/tools/tab_management.d.ts +1 -1
  9. package/dist/extension/utils.d.ts +2 -1
  10. package/dist/extension.cjs.js +797 -209
  11. package/dist/extension.esm.js +797 -209
  12. package/dist/extension_content_script.js +129 -2
  13. package/dist/index.cjs.js +518 -114
  14. package/dist/index.d.ts +2 -1
  15. package/dist/index.esm.js +518 -115
  16. package/dist/models/action.d.ts +9 -4
  17. package/dist/models/workflow.d.ts +8 -3
  18. package/dist/nodejs/script/build_dom_tree.d.ts +1 -0
  19. package/dist/nodejs/tools/browser_use.d.ts +28 -0
  20. package/dist/nodejs/tools/index.d.ts +2 -0
  21. package/dist/nodejs.cjs.js +71638 -12
  22. package/dist/nodejs.esm.js +71632 -6
  23. package/dist/schemas/workflow.schema.d.ts +2 -13
  24. package/dist/services/llm/claude-provider.d.ts +2 -1
  25. package/dist/services/llm/openai-provider.d.ts +2 -1
  26. package/dist/services/parser/workflow-parser.d.ts +0 -7
  27. package/dist/types/action.types.d.ts +8 -3
  28. package/dist/types/tools.types.d.ts +44 -1
  29. package/dist/types/workflow.types.d.ts +22 -9
  30. package/dist/universal_tools/cancel_workflow.d.ts +9 -0
  31. package/dist/universal_tools/human.d.ts +30 -0
  32. package/dist/universal_tools/index.d.ts +4 -0
  33. package/dist/universal_tools/summary_workflow.d.ts +9 -0
  34. package/dist/utils/execution-logger.d.ts +69 -0
  35. package/dist/web/tools/browser.d.ts +2 -1
  36. package/dist/web.cjs.js +29 -17
  37. package/dist/web.esm.js +29 -17
  38. package/package.json +6 -9
@@ -30,24 +30,13 @@ export declare const workflowSchema: {
30
30
  type: string;
31
31
  };
32
32
  };
33
- input: {
34
- type: string;
35
- properties: {
36
- type: {
37
- type: string;
38
- };
39
- schema: {
40
- type: string;
41
- };
42
- };
43
- };
44
33
  output: {
45
34
  type: string;
46
35
  properties: {
47
- type: {
36
+ name: {
48
37
  type: string;
49
38
  };
50
- schema: {
39
+ description: {
51
40
  type: string;
52
41
  };
53
42
  };
@@ -1,8 +1,9 @@
1
- import { ClientOptions } from '@anthropic-ai/sdk';
1
+ import Anthropic, { ClientOptions } from '@anthropic-ai/sdk';
2
2
  import { LLMProvider, LLMParameters, LLMResponse, Message, LLMStreamHandler } from '../../types/llm.types';
3
3
  export declare class ClaudeProvider implements LLMProvider {
4
4
  private client;
5
5
  private defaultModel;
6
+ constructor(options: Anthropic, defaultModel?: string);
6
7
  constructor(options: ClientOptions, defaultModel?: string);
7
8
  constructor(apiKey: string, defaultModel?: string | null, options?: ClientOptions);
8
9
  private processResponse;
@@ -1,8 +1,9 @@
1
- import { ClientOptions } from 'openai';
1
+ import OpenAI, { ClientOptions } from 'openai';
2
2
  import { LLMProvider, LLMParameters, LLMResponse, Message, LLMStreamHandler } from '../../types/llm.types';
3
3
  export declare class OpenaiProvider implements LLMProvider {
4
4
  private client;
5
5
  private defaultModel;
6
+ constructor(client: OpenAI, defaultModel?: string);
6
7
  constructor(options: ClientOptions, defaultModel?: string);
7
8
  constructor(apiKey: string, defaultModel?: string | null, options?: ClientOptions);
8
9
  private buildParams;
@@ -14,16 +14,9 @@ export declare class WorkflowParser {
14
14
  * Validate workflow JSON structure against schema
15
15
  */
16
16
  static validate(json: unknown): ValidationResult;
17
- /**
18
- * Convert parsed JSON to runtime Workflow object
19
- */
20
17
  private static toRuntime;
21
18
  /**
22
19
  * Convert runtime Workflow object to JSON structure
23
20
  */
24
21
  private static fromRuntime;
25
- /**
26
- * Helper to convert IO definitions
27
- */
28
- private static convertIO;
29
22
  }
@@ -1,5 +1,7 @@
1
+ import { Workflow } from "./workflow.types";
1
2
  import { LLMProvider } from "./llm.types";
2
- import { WorkflowCallback } from "./workflow.types";
3
+ import { NodeOutput, WorkflowCallback } from "./workflow.types";
4
+ import { NodeInput } from "./workflow.types";
3
5
  export interface Tool<T, R> {
4
6
  name: string;
5
7
  description: string;
@@ -25,14 +27,17 @@ export interface Property {
25
27
  export interface ExecutionContext {
26
28
  llmProvider: LLMProvider;
27
29
  variables: Map<string, unknown>;
30
+ workflow?: Workflow;
28
31
  tools?: Map<string, Tool<any, any>>;
29
32
  callback?: WorkflowCallback;
33
+ signal?: AbortSignal;
30
34
  [key: string]: any;
31
35
  }
32
36
  export interface Action {
33
37
  type: 'prompt' | 'script' | 'hybrid';
34
38
  name: string;
35
39
  description: string;
36
- execute: (input: unknown, context: ExecutionContext) => Promise<unknown>;
37
- tools: Tool<any, any>[];
40
+ execute: (input: NodeInput, output: NodeOutput, context: ExecutionContext) => Promise<unknown>;
41
+ tools: Array<Tool<any, any>>;
42
+ llmProvider?: LLMProvider;
38
43
  }
@@ -41,6 +41,13 @@ export interface OpenUrlResult {
41
41
  windowId: number;
42
42
  title?: string;
43
43
  }
44
+ export interface BrowserTab {
45
+ id: number;
46
+ url?: string;
47
+ title?: string;
48
+ content: string;
49
+ description: string;
50
+ }
44
51
  export interface ScreenshotResult {
45
52
  image: ScreenshotImage;
46
53
  }
@@ -50,7 +57,7 @@ export interface ScreenshotImage {
50
57
  data: string;
51
58
  }
52
59
  export interface TabManagementParam {
53
- commond: string;
60
+ command: string;
54
61
  }
55
62
  export type TabManagementResult = TabInfo | CloseTabInfo | TabInfo[];
56
63
  export interface TabInfo {
@@ -86,3 +93,39 @@ export interface ElementRect {
86
93
  width?: number;
87
94
  height?: number;
88
95
  }
96
+ export interface CancelWorkflowInput {
97
+ reason: string;
98
+ }
99
+ export interface HumanInputTextInput {
100
+ question: string;
101
+ }
102
+ export interface HumanInputTextResult {
103
+ status: string;
104
+ answer: string;
105
+ }
106
+ export interface HumanInputSingleChoiceInput {
107
+ question: string;
108
+ choices: string[];
109
+ }
110
+ export interface HumanInputSingleChoiceResult {
111
+ status: string;
112
+ answer: string;
113
+ }
114
+ export interface HumanInputMultipleChoiceInput {
115
+ question: string;
116
+ choices: string[];
117
+ }
118
+ export interface HumanInputMultipleChoiceResult {
119
+ status: string;
120
+ answer: string[];
121
+ }
122
+ export interface HumanOperateInput {
123
+ reason: string;
124
+ }
125
+ export interface HumanOperateResult {
126
+ status: string;
127
+ userOperation: string;
128
+ }
129
+ export interface SummaryWorkflowInput {
130
+ summary: string;
131
+ }
@@ -1,18 +1,22 @@
1
1
  import { Action, ExecutionContext, Tool } from "./action.types";
2
2
  import { LLMProvider } from "./llm.types";
3
+ import { ExecutionLogger } from "@/utils/execution-logger";
4
+ export interface NodeOutput {
5
+ name: string;
6
+ description: string;
7
+ value?: unknown;
8
+ }
9
+ export interface NodeInput {
10
+ items: NodeOutput[];
11
+ }
3
12
  export interface WorkflowNode {
4
13
  id: string;
5
14
  name: string;
6
15
  description?: string;
7
- input: NodeIO;
8
- output: NodeIO;
9
- action: Action;
10
16
  dependencies: string[];
11
- }
12
- export interface NodeIO {
13
- type: string;
14
- schema: object;
15
- value: unknown;
17
+ action: Action;
18
+ input: NodeInput;
19
+ output: NodeOutput;
16
20
  }
17
21
  export interface Workflow {
18
22
  id: string;
@@ -21,7 +25,9 @@ export interface Workflow {
21
25
  nodes: WorkflowNode[];
22
26
  variables: Map<string, any>;
23
27
  llmProvider?: LLMProvider;
24
- execute(callback?: WorkflowCallback): Promise<void>;
28
+ setLogger(logger: ExecutionLogger): void;
29
+ execute(callback?: WorkflowCallback): Promise<NodeOutput[]>;
30
+ cancel(): Promise<void>;
25
31
  addNode(node: WorkflowNode): void;
26
32
  removeNode(nodeId: string): void;
27
33
  getNode(nodeId: string): WorkflowNode;
@@ -35,5 +41,12 @@ export interface WorkflowCallback {
35
41
  afterToolUse?: (tool: Tool<any, any>, context: ExecutionContext, result: any) => Promise<any>;
36
42
  afterSubtask?: (subtask: WorkflowNode, context: ExecutionContext, result: any) => Promise<void>;
37
43
  afterWorkflow?: (workflow: Workflow, variables: Map<string, unknown>) => Promise<void>;
44
+ onTabCreated?: (tabId: number) => Promise<void>;
45
+ onLlmMessage?: (textContent: string) => Promise<void>;
46
+ onHumanInputText?: (question: string) => Promise<string>;
47
+ onHumanInputSingleChoice?: (question: string, choices: string[]) => Promise<string>;
48
+ onHumanInputMultipleChoice?: (question: string, choices: string[]) => Promise<string[]>;
49
+ onHumanOperate?: (reason: string) => Promise<string>;
50
+ onSummaryWorkflow?: (summary: string) => Promise<void>;
38
51
  };
39
52
  }
@@ -0,0 +1,9 @@
1
+ import { CancelWorkflowInput } from '../types/tools.types';
2
+ import { Tool, InputSchema, ExecutionContext } from '../types/action.types';
3
+ export declare class CancelWorkflow implements Tool<CancelWorkflowInput, void> {
4
+ name: string;
5
+ description: string;
6
+ input_schema: InputSchema;
7
+ constructor();
8
+ execute(context: ExecutionContext, params: CancelWorkflowInput): Promise<void>;
9
+ }
@@ -0,0 +1,30 @@
1
+ import { HumanInputTextInput, HumanInputTextResult, HumanInputSingleChoiceInput, HumanInputSingleChoiceResult, HumanInputMultipleChoiceInput, HumanInputMultipleChoiceResult, HumanOperateInput, HumanOperateResult } from '../types/tools.types';
2
+ import { Tool, InputSchema, ExecutionContext } from '../types/action.types';
3
+ export declare class HumanInputText implements Tool<HumanInputTextInput, HumanInputTextResult> {
4
+ name: string;
5
+ description: string;
6
+ input_schema: InputSchema;
7
+ constructor();
8
+ execute(context: ExecutionContext, params: HumanInputTextInput): Promise<HumanInputTextResult>;
9
+ }
10
+ export declare class HumanInputSingleChoice implements Tool<HumanInputSingleChoiceInput, HumanInputSingleChoiceResult> {
11
+ name: string;
12
+ description: string;
13
+ input_schema: InputSchema;
14
+ constructor();
15
+ execute(context: ExecutionContext, params: HumanInputSingleChoiceInput): Promise<HumanInputSingleChoiceResult>;
16
+ }
17
+ export declare class HumanInputMultipleChoice implements Tool<HumanInputMultipleChoiceInput, HumanInputMultipleChoiceResult> {
18
+ name: string;
19
+ description: string;
20
+ input_schema: InputSchema;
21
+ constructor();
22
+ execute(context: ExecutionContext, params: HumanInputMultipleChoiceInput): Promise<HumanInputMultipleChoiceResult>;
23
+ }
24
+ export declare class HumanOperate implements Tool<HumanOperateInput, HumanOperateResult> {
25
+ name: string;
26
+ description: string;
27
+ input_schema: InputSchema;
28
+ constructor();
29
+ execute(context: ExecutionContext, params: HumanOperateInput): Promise<HumanOperateResult>;
30
+ }
@@ -0,0 +1,4 @@
1
+ import { CancelWorkflow } from "./cancel_workflow";
2
+ import { HumanInputText, HumanInputSingleChoice, HumanInputMultipleChoice, HumanOperate } from "./human";
3
+ import { SummaryWorkflow } from "./summary_workflow";
4
+ export { CancelWorkflow, HumanInputText, HumanInputSingleChoice, HumanInputMultipleChoice, HumanOperate, SummaryWorkflow, };
@@ -0,0 +1,9 @@
1
+ import { SummaryWorkflowInput } from '../types/tools.types';
2
+ import { Tool, InputSchema, ExecutionContext } from '../types/action.types';
3
+ export declare class SummaryWorkflow implements Tool<SummaryWorkflowInput, any> {
4
+ name: string;
5
+ description: string;
6
+ input_schema: InputSchema;
7
+ constructor();
8
+ execute(context: ExecutionContext, params: SummaryWorkflowInput): Promise<any>;
9
+ }
@@ -0,0 +1,69 @@
1
+ import { Message } from '../types/llm.types';
2
+ import { ExecutionContext } from '../types/action.types';
3
+ interface ImageData {
4
+ type: 'base64';
5
+ media_type: string;
6
+ data: string;
7
+ }
8
+ export interface LogOptions {
9
+ maxHistoryLength?: number;
10
+ logLevel?: 'debug' | 'info' | 'warn' | 'error';
11
+ includeTimestamp?: boolean;
12
+ debugImagePath?: string;
13
+ imageSaver?: (imageData: ImageData, filename: string) => Promise<string>;
14
+ }
15
+ /**
16
+ * Manages logging for action execution, providing a cleaner view of the execution
17
+ * flow while maintaining important context and history.
18
+ */
19
+ export declare class ExecutionLogger {
20
+ private history;
21
+ private readonly maxHistoryLength;
22
+ private readonly logLevel;
23
+ private readonly includeTimestamp;
24
+ private readonly debugImagePath?;
25
+ private readonly imageSaver?;
26
+ private readonly isNode;
27
+ constructor(options?: LogOptions);
28
+ /**
29
+ * Logs a message with execution context
30
+ */
31
+ log(level: string, message: string, context?: ExecutionContext): void;
32
+ /**
33
+ * Updates conversation history while maintaining size limit
34
+ */
35
+ updateHistory(messages: Message[]): void;
36
+ /**
37
+ * Gets current conversation history
38
+ */
39
+ getHistory(): Message[];
40
+ /**
41
+ * Summarizes the execution context for logging
42
+ */
43
+ private summarizeContext;
44
+ /**
45
+ * Checks if message should be logged based on log level
46
+ */
47
+ private shouldLog;
48
+ /**
49
+ * Logs the start of an action execution
50
+ */
51
+ logActionStart(actionName: string, input: unknown, context?: ExecutionContext): void;
52
+ /**
53
+ * Logs the completion of an action execution
54
+ */
55
+ logActionComplete(actionName: string, result: unknown, context?: ExecutionContext): void;
56
+ /**
57
+ * Logs a tool execution
58
+ */
59
+ logToolExecution(toolName: string, input: unknown, context?: ExecutionContext): void;
60
+ /**
61
+ * Logs an error that occurred during execution
62
+ */
63
+ logError(error: Error, context?: ExecutionContext): void;
64
+ private extractFromDataUrl;
65
+ private saveDebugImage;
66
+ private formatToolResult;
67
+ logToolResult(toolName: string, result: unknown, context?: ExecutionContext): Promise<void>;
68
+ }
69
+ export {};
@@ -4,7 +4,8 @@ export declare function clear_input(xpath?: string, highlightIndex?: number): bo
4
4
  export declare function left_click(xpath?: string, highlightIndex?: number): boolean;
5
5
  export declare function right_click(xpath?: string, highlightIndex?: number): boolean;
6
6
  export declare function double_click(xpath?: string, highlightIndex?: number): boolean;
7
- export declare function screenshot(): Promise<ScreenshotResult>;
7
+ export declare function screenshot(compress?: boolean): Promise<ScreenshotResult>;
8
+ export declare function compress_image(dataUrl: string, scale?: number, quality?: number): Promise<string>;
8
9
  export declare function scroll_to(xpath?: string, highlightIndex?: number): boolean;
9
10
  export declare function get_dropdown_options(xpath?: string, highlightIndex?: number): {
10
11
  options: Array<{
package/dist/web.cjs.js CHANGED
@@ -8419,7 +8419,7 @@ function right_click(xpath, highlightIndex) {
8419
8419
  function double_click(xpath, highlightIndex) {
8420
8420
  return simulateMouseEvent(['mousedown', 'mouseup', 'click', 'mousedown', 'mouseup', 'click', 'dblclick'], 0, xpath, highlightIndex);
8421
8421
  }
8422
- async function screenshot() {
8422
+ async function screenshot(compress) {
8423
8423
  const [width, height] = size();
8424
8424
  const scrollX = window.scrollX || window.pageXOffset;
8425
8425
  const scrollY = window.scrollY || window.pageYOffset;
@@ -8437,7 +8437,10 @@ async function screenshot() {
8437
8437
  // backgroundColor: 'white',
8438
8438
  // scale: window.devicePixelRatio || 1,
8439
8439
  });
8440
- const dataUrl = canvas.toDataURL('image/png');
8440
+ let dataUrl = canvas.toDataURL('image/png');
8441
+ if (compress) {
8442
+ dataUrl = await compress_image(dataUrl, 0.7, 0.8);
8443
+ }
8441
8444
  let data = dataUrl.substring(dataUrl.indexOf('base64,') + 7);
8442
8445
  return {
8443
8446
  image: {
@@ -8447,6 +8450,25 @@ async function screenshot() {
8447
8450
  },
8448
8451
  };
8449
8452
  }
8453
+ function compress_image(dataUrl, scale = 0.8, quality = 0.8) {
8454
+ return new Promise((resolve) => {
8455
+ const img = new Image();
8456
+ img.onload = function () {
8457
+ const canvas = document.createElement('canvas');
8458
+ const ctx = canvas.getContext('2d');
8459
+ let width = img.width * scale;
8460
+ let height = img.height * scale;
8461
+ canvas.width = width;
8462
+ canvas.height = height;
8463
+ ctx.drawImage(img, 0, 0, width, height);
8464
+ resolve(canvas.toDataURL('image/jpeg', quality));
8465
+ };
8466
+ img.onerror = function () {
8467
+ resolve(dataUrl);
8468
+ };
8469
+ img.src = dataUrl;
8470
+ });
8471
+ }
8450
8472
  function scroll_to(xpath, highlightIndex) {
8451
8473
  let element = null;
8452
8474
  if (highlightIndex != null) {
@@ -8633,6 +8655,7 @@ function simulateMouseEvent(eventTypes, button, xpath, highlightIndex) {
8633
8655
  var browser = /*#__PURE__*/Object.freeze({
8634
8656
  __proto__: null,
8635
8657
  clear_input: clear_input,
8658
+ compress_image: compress_image,
8636
8659
  double_click: double_click,
8637
8660
  extractHtmlContent: extractHtmlContent,
8638
8661
  get_dropdown_options: get_dropdown_options,
@@ -8653,7 +8676,6 @@ class BrowserUse {
8653
8676
  this.name = 'browser_use';
8654
8677
  this.description = `Use structured commands to interact with the browser, manipulating page elements through screenshots and webpage element extraction.
8655
8678
  * This is a browser GUI interface where you need to analyze webpages by taking screenshots and extracting page element structures, and specify action sequences to complete designated tasks.
8656
- * Some operations may need time to process, so you might need to wait and continuously take screenshots and extract element structures to check the operation results.
8657
8679
  * Before any operation, you must first call the \`screenshot_extract_element\` command, which will return the browser page screenshot and structured element information, both specially processed.
8658
8680
  * ELEMENT INTERACTION:
8659
8681
  - Only use indexes that exist in the provided element list
@@ -8663,17 +8685,7 @@ class BrowserUse {
8663
8685
  - If no suitable elements exist, use other functions to complete the task
8664
8686
  - If stuck, try alternative approaches
8665
8687
  - Handle popups/cookies by accepting or closing them
8666
- - Use scroll to find elements you are looking for
8667
- * Form filling:
8668
- - If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
8669
- * ACTION SEQUENCING:
8670
- - Actions are executed in the order they appear in the list
8671
- - Each action should logically follow from the previous one
8672
- - If the page changes after an action, the sequence is interrupted and you get the new state.
8673
- - If content only disappears the sequence continues.
8674
- - Only provide the action sequence until you think the page will change.
8675
- - Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
8676
- - only use multiple actions if it makes sense.`;
8688
+ - Use scroll to find elements you are looking for`;
8677
8689
  this.input_schema = {
8678
8690
  type: 'object',
8679
8691
  properties: {
@@ -8804,7 +8816,7 @@ class BrowserUse {
8804
8816
  await sleep(100);
8805
8817
  let element_result = get_clickable_elements(true, null);
8806
8818
  context.selector_map = element_result.selector_map;
8807
- let screenshot$1 = await screenshot();
8819
+ let screenshot$1 = await screenshot(true);
8808
8820
  remove_highlight();
8809
8821
  result = { image: screenshot$1.image, text: element_result.element_str };
8810
8822
  break;
@@ -9074,7 +9086,7 @@ ${pseudoHtml}
9074
9086
  return false;
9075
9087
  }
9076
9088
  async function executeWithBrowserUse$1(context, task_prompt) {
9077
- let screenshot_result = await screenshot();
9089
+ let screenshot_result = await screenshot(false);
9078
9090
  let messages = [
9079
9091
  {
9080
9092
  role: 'user',
@@ -9289,7 +9301,7 @@ ${pseudoHtml}
9289
9301
  return null;
9290
9302
  }
9291
9303
  async function executeWithBrowserUse(context, task_prompt) {
9292
- let screenshot_result = await screenshot();
9304
+ let screenshot_result = await screenshot(false);
9293
9305
  let messages = [
9294
9306
  {
9295
9307
  role: 'user',
package/dist/web.esm.js CHANGED
@@ -8417,7 +8417,7 @@ function right_click(xpath, highlightIndex) {
8417
8417
  function double_click(xpath, highlightIndex) {
8418
8418
  return simulateMouseEvent(['mousedown', 'mouseup', 'click', 'mousedown', 'mouseup', 'click', 'dblclick'], 0, xpath, highlightIndex);
8419
8419
  }
8420
- async function screenshot() {
8420
+ async function screenshot(compress) {
8421
8421
  const [width, height] = size();
8422
8422
  const scrollX = window.scrollX || window.pageXOffset;
8423
8423
  const scrollY = window.scrollY || window.pageYOffset;
@@ -8435,7 +8435,10 @@ async function screenshot() {
8435
8435
  // backgroundColor: 'white',
8436
8436
  // scale: window.devicePixelRatio || 1,
8437
8437
  });
8438
- const dataUrl = canvas.toDataURL('image/png');
8438
+ let dataUrl = canvas.toDataURL('image/png');
8439
+ if (compress) {
8440
+ dataUrl = await compress_image(dataUrl, 0.7, 0.8);
8441
+ }
8439
8442
  let data = dataUrl.substring(dataUrl.indexOf('base64,') + 7);
8440
8443
  return {
8441
8444
  image: {
@@ -8445,6 +8448,25 @@ async function screenshot() {
8445
8448
  },
8446
8449
  };
8447
8450
  }
8451
+ function compress_image(dataUrl, scale = 0.8, quality = 0.8) {
8452
+ return new Promise((resolve) => {
8453
+ const img = new Image();
8454
+ img.onload = function () {
8455
+ const canvas = document.createElement('canvas');
8456
+ const ctx = canvas.getContext('2d');
8457
+ let width = img.width * scale;
8458
+ let height = img.height * scale;
8459
+ canvas.width = width;
8460
+ canvas.height = height;
8461
+ ctx.drawImage(img, 0, 0, width, height);
8462
+ resolve(canvas.toDataURL('image/jpeg', quality));
8463
+ };
8464
+ img.onerror = function () {
8465
+ resolve(dataUrl);
8466
+ };
8467
+ img.src = dataUrl;
8468
+ });
8469
+ }
8448
8470
  function scroll_to(xpath, highlightIndex) {
8449
8471
  let element = null;
8450
8472
  if (highlightIndex != null) {
@@ -8631,6 +8653,7 @@ function simulateMouseEvent(eventTypes, button, xpath, highlightIndex) {
8631
8653
  var browser = /*#__PURE__*/Object.freeze({
8632
8654
  __proto__: null,
8633
8655
  clear_input: clear_input,
8656
+ compress_image: compress_image,
8634
8657
  double_click: double_click,
8635
8658
  extractHtmlContent: extractHtmlContent,
8636
8659
  get_dropdown_options: get_dropdown_options,
@@ -8651,7 +8674,6 @@ class BrowserUse {
8651
8674
  this.name = 'browser_use';
8652
8675
  this.description = `Use structured commands to interact with the browser, manipulating page elements through screenshots and webpage element extraction.
8653
8676
  * This is a browser GUI interface where you need to analyze webpages by taking screenshots and extracting page element structures, and specify action sequences to complete designated tasks.
8654
- * Some operations may need time to process, so you might need to wait and continuously take screenshots and extract element structures to check the operation results.
8655
8677
  * Before any operation, you must first call the \`screenshot_extract_element\` command, which will return the browser page screenshot and structured element information, both specially processed.
8656
8678
  * ELEMENT INTERACTION:
8657
8679
  - Only use indexes that exist in the provided element list
@@ -8661,17 +8683,7 @@ class BrowserUse {
8661
8683
  - If no suitable elements exist, use other functions to complete the task
8662
8684
  - If stuck, try alternative approaches
8663
8685
  - Handle popups/cookies by accepting or closing them
8664
- - Use scroll to find elements you are looking for
8665
- * Form filling:
8666
- - If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
8667
- * ACTION SEQUENCING:
8668
- - Actions are executed in the order they appear in the list
8669
- - Each action should logically follow from the previous one
8670
- - If the page changes after an action, the sequence is interrupted and you get the new state.
8671
- - If content only disappears the sequence continues.
8672
- - Only provide the action sequence until you think the page will change.
8673
- - Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
8674
- - only use multiple actions if it makes sense.`;
8686
+ - Use scroll to find elements you are looking for`;
8675
8687
  this.input_schema = {
8676
8688
  type: 'object',
8677
8689
  properties: {
@@ -8802,7 +8814,7 @@ class BrowserUse {
8802
8814
  await sleep(100);
8803
8815
  let element_result = get_clickable_elements(true, null);
8804
8816
  context.selector_map = element_result.selector_map;
8805
- let screenshot$1 = await screenshot();
8817
+ let screenshot$1 = await screenshot(true);
8806
8818
  remove_highlight();
8807
8819
  result = { image: screenshot$1.image, text: element_result.element_str };
8808
8820
  break;
@@ -9072,7 +9084,7 @@ ${pseudoHtml}
9072
9084
  return false;
9073
9085
  }
9074
9086
  async function executeWithBrowserUse$1(context, task_prompt) {
9075
- let screenshot_result = await screenshot();
9087
+ let screenshot_result = await screenshot(false);
9076
9088
  let messages = [
9077
9089
  {
9078
9090
  role: 'user',
@@ -9287,7 +9299,7 @@ ${pseudoHtml}
9287
9299
  return null;
9288
9300
  }
9289
9301
  async function executeWithBrowserUse(context, task_prompt) {
9290
- let screenshot_result = await screenshot();
9302
+ let screenshot_result = await screenshot(false);
9291
9303
  let messages = [
9292
9304
  {
9293
9305
  role: 'user',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@eko-ai/eko",
3
- "version": "1.0.7",
3
+ "version": "1.0.9",
4
4
  "description": "Empowering language to transform human words into action.",
5
5
  "main": "dist/index.cjs.js",
6
6
  "module": "dist/index.esm.js",
@@ -51,12 +51,6 @@
51
51
  "dev": "tsc --watch",
52
52
  "clean": "rimraf dist",
53
53
  "test": "jest",
54
- "test:watch": "jest --watch",
55
- "test:coverage": "jest --coverage",
56
- "lint": "eslint src/**/*.ts",
57
- "lint:fix": "eslint src/**/*.ts --fix",
58
- "format": "prettier --write \"src/**/*.ts\"",
59
- "prepublishOnly": "npm run build",
60
54
  "docs": "typedoc"
61
55
  },
62
56
  "author": "FellouAI",
@@ -75,15 +69,19 @@
75
69
  },
76
70
  "dependencies": {
77
71
  "@anthropic-ai/sdk": "^0.33.0",
72
+ "chromium-bidi": "^0.12.0",
78
73
  "dotenv": "^16.0.0",
79
74
  "html2canvas": "^1.4.1",
80
75
  "openai": "^4.77.0",
76
+ "playwright": "^1.49.1",
81
77
  "uuid": "^11.0.3",
82
78
  "zod": "^3.22.4"
83
79
  },
84
80
  "devDependencies": {
85
81
  "@rollup/plugin-commonjs": "^28.0.2",
82
+ "@rollup/plugin-json": "^6.1.0",
86
83
  "@rollup/plugin-node-resolve": "^16.0.0",
84
+ "@rollup/plugin-replace": "^6.0.2",
87
85
  "@rollup/plugin-typescript": "^12.1.2",
88
86
  "@types/chrome": "0.0.158",
89
87
  "@types/jest": "^29.5.12",
@@ -104,6 +102,5 @@
104
102
  },
105
103
  "engines": {
106
104
  "node": ">=18.0.0"
107
- },
108
- "packageManager": "yarn@1.22.22+sha512.a6b2f7906b721bba3d67d4aff083df04dad64c399707841b7acf00f6b133b7ac24255f2652fa22ae3534329dc6180534e98d17432037ff6fd140556e2bb3137e"
105
+ }
109
106
  }