@eko-ai/eko 2.1.2 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.esm.js CHANGED
@@ -6,7 +6,7 @@ const config = {
6
6
  compressThreshold: 80,
7
7
  largeTextLength: 5000,
8
8
  fileTextMaxLength: 20000,
9
- maxDialogueImgFileNum: 2,
9
+ maxDialogueImgFileNum: 1,
10
10
  };
11
11
 
12
12
  var LogLevel;
@@ -283,6 +283,9 @@ function sub(str, maxLength, appendPoint = true) {
283
283
  return str;
284
284
  }
285
285
  function fixXmlTag(code) {
286
+ if (code.indexOf('&') > -1) {
287
+ code = code.replace(/&(?![a-zA-Z0-9#]+;)/g, '&');
288
+ }
286
289
  function fixDoubleChar(code) {
287
290
  const stack = [];
288
291
  for (let i = 0; i < code.length; i++) {
@@ -17895,7 +17898,7 @@ class HumanInteractTool {
17895
17898
  confirm: Ask the user to confirm whether to execute an operation, especially when performing dangerous actions such as deleting system files, users will choose Yes or No.
17896
17899
  input: Prompt the user to enter text; for example, when a task is ambiguous, the AI can choose to ask the user for details, and the user can respond by inputting.
17897
17900
  select: Allow the user to make a choice; in situations that require selection, the AI can ask the user to make a decision.
17898
- request_help: Request assistance from the user; for instance, when an operation is blocked, the AI can ask the user for help, such as needing to log into a website or solve a CAPTCHA.`;
17901
+ request_help: Request assistance from the user; for instance, when an operation is blocked, the AI can ask the user for help, such as needing to log into a website or solve a CAPTCHA or Scan the QR code.`;
17899
17902
  this.parameters = {
17900
17903
  type: "object",
17901
17904
  properties: {
@@ -18157,6 +18160,22 @@ class VariableStorageTool {
18157
18160
  }
18158
18161
 
18159
18162
  const TOOL_NAME = "watch_trigger";
18163
+ const watch_system_prompt = `You are a tool for detecting element changes. Given a task description, compare two images to determine whether the changes described in the task have occurred.
18164
+ If the changes have occurred, return an json with \`changed\` set to true and \`changeInfo\` containing a description of the changes. If no changes have occurred, return an object with \`changed\` set to false.
18165
+
18166
+ ## Example
18167
+ User: Monitor new messages in group chat
18168
+ ### No changes detected
18169
+ Output:
18170
+ {
18171
+ "changed": false
18172
+ }
18173
+ ### Change detected
18174
+ Output:
18175
+ {
18176
+ "changed": true,
18177
+ "changeInfo": "New message received in the group chat. The message content is: 'Hello, how are you?'"
18178
+ }`;
18160
18179
  class WatchTriggerTool {
18161
18180
  constructor() {
18162
18181
  this.name = TOOL_NAME;
@@ -18221,19 +18240,22 @@ class WatchTriggerTool {
18221
18240
  ],
18222
18241
  };
18223
18242
  }
18224
- const screenshot = agentContext.agent["screenshot"];
18225
- const image1Result = (await screenshot.call(agentContext.agent, agentContext));
18226
- const image1 = toImage(image1Result.imageBase64);
18243
+ await this.init_eko_observer(agentContext);
18244
+ const image1 = await this.get_screenshot(agentContext);
18227
18245
  const start = new Date().getTime();
18228
18246
  const timeout = (args.timeout || 5) * 60000;
18229
- const frequency = Math.max(500, (args.frequency = args.frequency || 1) * 1000);
18247
+ const frequency = Math.max(500, (args.frequency || 1) * 1000);
18230
18248
  let rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms);
18231
18249
  while (new Date().getTime() - start < timeout) {
18232
18250
  await agentContext.context.checkAborted();
18233
18251
  await new Promise((resolve) => setTimeout(resolve, frequency));
18234
- const image2Result = (await screenshot.call(agentContext.agent, agentContext));
18235
- const image2 = toImage(image2Result.imageBase64);
18236
- const changeResult = await this.is_dom_change(agentContext, rlm, image1, image1Result.imageType, image2, image2Result.imageType, task_description);
18252
+ let changed = await this.has_eko_changed(agentContext);
18253
+ if (changed == "false") {
18254
+ continue;
18255
+ }
18256
+ await this.init_eko_observer(agentContext);
18257
+ const image2 = await this.get_screenshot(agentContext);
18258
+ const changeResult = await this.is_dom_change(agentContext, rlm, image1, image2, task_description);
18237
18259
  if (changeResult.changed) {
18238
18260
  return {
18239
18261
  content: [
@@ -18254,41 +18276,73 @@ class WatchTriggerTool {
18254
18276
  ],
18255
18277
  };
18256
18278
  }
18257
- async is_dom_change(agentContext, rlm, image1, image1Type, image2, image2Type, task_description) {
18279
+ async get_screenshot(agentContext) {
18280
+ const screenshot = agentContext.agent["screenshot"];
18281
+ const imageResult = (await screenshot.call(agentContext.agent, agentContext));
18282
+ const image = toImage(imageResult.imageBase64);
18283
+ return {
18284
+ image: image,
18285
+ imageType: imageResult.imageType,
18286
+ };
18287
+ }
18288
+ async init_eko_observer(agentContext) {
18289
+ try {
18290
+ const screenshot = agentContext.agent["execute_script"];
18291
+ await screenshot.call(agentContext.agent, agentContext, () => {
18292
+ let _window = window;
18293
+ _window.has_eko_changed = false;
18294
+ _window.eko_observer && _window.eko_observer.disconnect();
18295
+ let eko_observer = new MutationObserver(function (mutations) {
18296
+ _window.has_eko_changed = true;
18297
+ });
18298
+ eko_observer.observe(document.body, {
18299
+ childList: true,
18300
+ subtree: true,
18301
+ attributes: true,
18302
+ attributeOldValue: true,
18303
+ characterData: true,
18304
+ characterDataOldValue: true,
18305
+ });
18306
+ _window.eko_observer = eko_observer;
18307
+ }, []);
18308
+ }
18309
+ catch (error) {
18310
+ console.error("Error initializing Eko observer:", error);
18311
+ }
18312
+ }
18313
+ async has_eko_changed(agentContext) {
18314
+ try {
18315
+ const screenshot = agentContext.agent["execute_script"];
18316
+ let result = (await screenshot.call(agentContext.agent, agentContext, () => {
18317
+ return window.has_eko_changed + "";
18318
+ }, []));
18319
+ return result;
18320
+ }
18321
+ catch (e) {
18322
+ console.error("Error checking Eko change:", e);
18323
+ return "undefined";
18324
+ }
18325
+ }
18326
+ async is_dom_change(agentContext, rlm, image1, image2, task_description) {
18258
18327
  try {
18259
18328
  let request = {
18260
18329
  messages: [
18261
18330
  {
18262
18331
  role: "system",
18263
- content: `You are a tool for detecting element changes. Given a task description, compare two images to determine whether the changes described in the task have occurred.
18264
- If the changes have occurred, return an json with \`changed\` set to true and \`changeInfo\` containing a description of the changes. If no changes have occurred, return an object with \`changed\` set to false.
18265
-
18266
- ## Example
18267
- User: Monitor new messages in group chat
18268
- ### No changes detected
18269
- Output:
18270
- {
18271
- "changed": false
18272
- }
18273
- ### Change detected
18274
- Output:
18275
- {
18276
- "changed": true,
18277
- "changeInfo": "New message received in the group chat. The message content is: 'Hello, how are you?'"
18278
- }`,
18332
+ content: watch_system_prompt,
18279
18333
  },
18280
18334
  {
18281
18335
  role: "user",
18282
18336
  content: [
18283
18337
  {
18284
18338
  type: "image",
18285
- image: image1,
18286
- mimeType: image1Type,
18339
+ image: image1.image,
18340
+ mimeType: image1.imageType,
18287
18341
  },
18288
18342
  {
18289
18343
  type: "image",
18290
- image: image2,
18291
- mimeType: image2Type,
18344
+ image: image2.image,
18345
+ mimeType: image2.imageType,
18292
18346
  },
18293
18347
  {
18294
18348
  type: "text",
@@ -18352,9 +18406,8 @@ const HUMAN_PROMPT = `
18352
18406
  * HUMAN INTERACT
18353
18407
  During the task execution process, you can use the \`${TOOL_NAME$3}\` tool to interact with humans, please call it in the following situations:
18354
18408
  - When performing dangerous operations such as deleting files, confirmation from humans is required.
18355
- - When encountering obstacles while visiting a website, such as requiring user login or captcha, you need to request for manual assistance.
18356
- - When requesting login, please only call the function when a login dialog box is clearly displayed.
18357
- - Try to minimize the use of \`${TOOL_NAME$3}\` tool.
18409
+ - When encountering obstacles while accessing websites, such as requiring user login, captcha verification, QR code scanning, or human verification, you need to request manual assistance.
18410
+ - Please do not use the \`${TOOL_NAME$3}\` tool frequently.
18358
18411
  `;
18359
18412
  const VARIABLE_PROMPT = `
18360
18413
  * VARIABLE STORAGE
@@ -19336,6 +19389,9 @@ class Eko {
19336
19389
  if (!context) {
19337
19390
  throw new Error("The task does not exist");
19338
19391
  }
19392
+ if (context.paused) {
19393
+ context.paused = false;
19394
+ }
19339
19395
  if (context.controller.signal.aborted) {
19340
19396
  context.controller = new AbortController();
19341
19397
  }
@@ -20409,7 +20465,8 @@ function run_build_dom_tree() {
20409
20465
  return { element_str, selector_map };
20410
20466
  }
20411
20467
  function get_highlight_element(highlightIndex) {
20412
- return window.clickable_elements[highlightIndex];
20468
+ let element = document.querySelector(`[eko-user-highlight-id="eko-highlight-${highlightIndex}"]`);
20469
+ return element || window.clickable_elements[highlightIndex];
20413
20470
  }
20414
20471
  function remove_highlight() {
20415
20472
  let highlight = document.getElementById('eko-highlight-container');
@@ -21000,6 +21057,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21000
21057
  - Only use indexes that exist in the provided element list
21001
21058
  - Each element has a unique index number (e.g., "[33]:<button>")
21002
21059
  - Elements marked with "[]:" are non-interactive (for context only)
21060
+ - Use the latest element index, do not rely on historical outdated element indexes
21003
21061
  * ERROR HANDLING:
21004
21062
  - If no suitable elements exist, use other functions to complete the task
21005
21063
  - If stuck, try alternative approaches, don't refuse tasks
@@ -21057,7 +21115,8 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21057
21115
  }
21058
21116
  if (extract_page_content) {
21059
21117
  let page_content = await this.extract_page_content(agentContext);
21060
- return "The current page content has been extracted, latest page content:\n" + page_content;
21118
+ return ("The current page content has been extracted, latest page content:\n" +
21119
+ page_content);
21061
21120
  }
21062
21121
  }
21063
21122
  async hover_to_element(agentContext, index) {
@@ -21413,27 +21472,34 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21413
21472
  ],
21414
21473
  });
21415
21474
  }
21416
- if (messages.length > 10) {
21417
- // compressed pseudoHtml
21418
- for (let i = 2; i < messages.length - 3; i++) {
21419
- let message = messages[i];
21420
- if (message.role == "user" && message.content.length == 2) {
21421
- let content = message.content;
21422
- for (let j = 0; j < content.length; j++) {
21423
- let _content = content[j];
21424
- if (_content.type == "text" &&
21425
- _content.text.startsWith(pseudoHtmlDescription)) {
21426
- _content.text = this.removePseudoHtmlAttr(_content.text, [
21427
- "class",
21428
- "src",
21429
- "href",
21430
- ]);
21431
- }
21475
+ super.handleMessages(agentContext, messages, tools);
21476
+ this.handlePseudoHtmlText(messages, pseudoHtmlDescription);
21477
+ }
21478
+ handlePseudoHtmlText(messages, pseudoHtmlDescription) {
21479
+ for (let i = 0; i < messages.length; i++) {
21480
+ let message = messages[i];
21481
+ if (message.role !== "user" || message.content.length <= 1) {
21482
+ continue;
21483
+ }
21484
+ let content = message.content;
21485
+ for (let j = 0; j < content.length; j++) {
21486
+ let _content = content[j];
21487
+ if (_content.type == "text" &&
21488
+ _content.text.startsWith(pseudoHtmlDescription)) {
21489
+ if (i >= 2 && i < messages.length - 3) {
21490
+ _content.text = this.removePseudoHtmlAttr(_content.text, [
21491
+ "class",
21492
+ "src",
21493
+ "href",
21494
+ ]);
21432
21495
  }
21433
21496
  }
21434
21497
  }
21498
+ if (content[0].text == "[image]" &&
21499
+ content[1].text == "[image]") {
21500
+ content.splice(0, 1);
21501
+ }
21435
21502
  }
21436
- super.handleMessages(agentContext, messages, tools);
21437
21503
  }
21438
21504
  removePseudoHtmlAttr(pseudoHtml, remove_attrs) {
21439
21505
  return pseudoHtml
@@ -21442,6 +21508,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21442
21508
  if (!line.startsWith("[") || line.indexOf("]:<") == -1) {
21443
21509
  return line;
21444
21510
  }
21511
+ line = line.substring(line.indexOf("]:<") + 2);
21445
21512
  for (let i = 0; i < remove_attrs.length; i++) {
21446
21513
  let sIdx = line.indexOf(remove_attrs[i] + '="');
21447
21514
  if (sIdx == -1) {
@@ -21453,12 +21520,9 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21453
21520
  }
21454
21521
  line =
21455
21522
  line.substring(0, sIdx) +
21456
- line
21457
- .substring(eIdx + 1)
21458
- .trim()
21459
- .replace('" >', '">');
21523
+ line.substring(eIdx + 1).trim();
21460
21524
  }
21461
- return line;
21525
+ return line.replace('" >', '">').replace(" >", ">");
21462
21526
  })
21463
21527
  .join("\n");
21464
21528
  }