@eko-ai/eko 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs.js CHANGED
@@ -7,10 +7,11 @@ var buffer = require('buffer');
7
7
  const config = {
8
8
  name: "Eko",
9
9
  platform: "mac",
10
- maxReactNum: 200,
10
+ maxReactNum: 500,
11
11
  maxTokens: 16000,
12
12
  compressThreshold: 80,
13
13
  largeTextLength: 5000,
14
+ fileTextMaxLength: 20000,
14
15
  maxDialogueImgFileNum: 2,
15
16
  };
16
17
 
@@ -232,6 +233,7 @@ function mergeTools(tools1, tools2) {
232
233
  map[tool.name] = tool;
233
234
  return map;
234
235
  }, {});
236
+ let names = [];
235
237
  for (let i = 0; i < tools1.length; i++) {
236
238
  let tool1 = tools1[i];
237
239
  let tool2 = toolMap2[tool1.name];
@@ -245,8 +247,9 @@ function mergeTools(tools1, tools2) {
245
247
  }
246
248
  for (let i = 0; i < tools2.length; i++) {
247
249
  let tool2 = tools2[i];
248
- if (toolMap2[tool2.name]) {
250
+ if (toolMap2[tool2.name] && names.indexOf(tool2.name) === -1) {
249
251
  tools.push(tool2);
252
+ names.push(tool2.name);
250
253
  }
251
254
  }
252
255
  return tools;
@@ -17519,10 +17522,17 @@ function extractAgentXmlNode(agentXml, nodeId) {
17519
17522
  let nodesNode = doc.getElementsByTagName("nodes");
17520
17523
  if (nodesNode.length > 0) {
17521
17524
  let nodes = nodesNode[0].childNodes;
17525
+ let _nodeId = 0;
17522
17526
  for (let i = 0; i < nodes.length; i++) {
17523
17527
  let node = nodes[i];
17524
- if (node.nodeType == 1 && node.getAttribute("id") == nodeId + "") {
17525
- return node;
17528
+ if (node.nodeType == 1) {
17529
+ if (node.getAttribute("id") == null || node.getAttribute("id") == "") {
17530
+ node.setAttribute("id", _nodeId + "");
17531
+ }
17532
+ _nodeId++;
17533
+ if (node.getAttribute("id") == nodeId + "") {
17534
+ return node;
17535
+ }
17526
17536
  }
17527
17537
  }
17528
17538
  }
@@ -17856,7 +17866,7 @@ const TOOL_NAME$4 = "foreach_task";
17856
17866
  class ForeachTaskTool {
17857
17867
  constructor() {
17858
17868
  this.name = TOOL_NAME$4;
17859
- this.description = `When executing the \`forEach\` node, please use it to complete the tasks corresponding to that forEach node, which will complete all tasks under the entire forEach node.`;
17869
+ this.description = `When executing the \`forEach\` node, please use the current tool for counting to ensure tasks are executed sequentially, the tool needs to be called with each loop iteration.`;
17860
17870
  this.parameters = {
17861
17871
  type: "object",
17862
17872
  properties: {
@@ -17864,12 +17874,19 @@ class ForeachTaskTool {
17864
17874
  type: "number",
17865
17875
  description: "forEach node ID.",
17866
17876
  },
17877
+ progress: {
17878
+ type: "string",
17879
+ description: "Current execution progress.",
17880
+ },
17881
+ next_step: {
17882
+ type: "string",
17883
+ description: "Next task description.",
17884
+ },
17867
17885
  },
17868
- required: ["nodeId"],
17886
+ required: ["nodeId", "progress", "next_step"],
17869
17887
  };
17870
17888
  }
17871
17889
  async execute(args, agentContext) {
17872
- // 调用 forEach Agent 单独逻辑, 根据上下文判断并循环执行
17873
17890
  let nodeId = args.nodeId;
17874
17891
  let agentXml = agentContext.agentChain.agent.xml;
17875
17892
  let node = extractAgentXmlNode(agentXml, nodeId);
@@ -17880,10 +17897,27 @@ class ForeachTaskTool {
17880
17897
  throw new Error("Node ID is not a forEach node: " + nodeId);
17881
17898
  }
17882
17899
  let items = node.getAttribute("items");
17900
+ let varValue = null;
17901
+ let resultText = "Recorded";
17883
17902
  if (items && items != "list") {
17884
- agentContext.context.variables.get(items.trim());
17903
+ varValue = agentContext.context.variables.get(items.trim());
17904
+ if (varValue) {
17905
+ let key = "foreach_" + nodeId;
17906
+ let loop_count = agentContext.variables.get(key) || 0;
17907
+ if (loop_count % 5 == 0) {
17908
+ resultText = `Variable information associated with the current loop task.\nvariable_name: ${items.trim()}\nvariable_value: ${varValue}`;
17909
+ }
17910
+ agentContext.variables.set(key, ++loop_count);
17911
+ }
17885
17912
  }
17886
- return null;
17913
+ return {
17914
+ content: [
17915
+ {
17916
+ type: "text",
17917
+ text: resultText,
17918
+ },
17919
+ ],
17920
+ };
17887
17921
  }
17888
17922
  }
17889
17923
 
@@ -17955,6 +17989,11 @@ request_help: Request assistance from the user; for instance, when an operation
17955
17989
  break;
17956
17990
  case "request_help":
17957
17991
  if (callback.onHumanHelp) {
17992
+ if (args.helpType == "request_login" &&
17993
+ (await this.checkIsLogined(agentContext))) {
17994
+ resultText = "Already logged in";
17995
+ break;
17996
+ }
17958
17997
  let result = await callback.onHumanHelp(agentContext, (args.helpType || "request_assistance"), args.prompt);
17959
17998
  resultText = `request_help result: ${result ? "Solved" : "Unresolved"}`;
17960
17999
  }
@@ -17983,6 +18022,42 @@ request_help: Request assistance from the user; for instance, when an operation
17983
18022
  };
17984
18023
  }
17985
18024
  }
18025
+ async checkIsLogined(agentContext) {
18026
+ let screenshot = agentContext.agent["screenshot"];
18027
+ if (!screenshot) {
18028
+ return false;
18029
+ }
18030
+ try {
18031
+ let imageResult = (await screenshot.call(agentContext.agent, agentContext));
18032
+ let rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms);
18033
+ let image = toImage(imageResult.imageBase64);
18034
+ let request = {
18035
+ messages: [
18036
+ {
18037
+ role: "user",
18038
+ content: [
18039
+ {
18040
+ type: "image",
18041
+ image: image,
18042
+ mimeType: imageResult.imageType,
18043
+ },
18044
+ {
18045
+ type: "text",
18046
+ text: "Check if the current website is logged in. If not logged in, output `NOT_LOGIN`. If logged in, output `LOGGED_IN`. Output directly without explanation.",
18047
+ },
18048
+ ],
18049
+ },
18050
+ ],
18051
+ abortSignal: agentContext.context.controller.signal,
18052
+ };
18053
+ let result = await rlm.call(request);
18054
+ return result.text && result.text.indexOf("LOGGED_IN") > -1;
18055
+ }
18056
+ catch (error) {
18057
+ console.error("Error auto checking login status:", error);
18058
+ return false;
18059
+ }
18060
+ }
17986
18061
  }
17987
18062
 
17988
18063
  const TOOL_NAME$2 = "task_node_status";
@@ -18120,21 +18195,156 @@ const TOOL_NAME = "watch_trigger";
18120
18195
  class WatchTriggerTool {
18121
18196
  constructor() {
18122
18197
  this.name = TOOL_NAME;
18123
- this.description = `When executing the \`watch\` node, please use it to complete the tasks corresponding to that watch node. It will complete all tasks under the entire watch node.`;
18198
+ this.description = `When executing the \`watch\` node, please use it to monitor DOM element changes, it will block the listener until the element changes or times out.`;
18124
18199
  this.parameters = {
18125
18200
  type: "object",
18126
18201
  properties: {
18127
18202
  nodeId: {
18128
18203
  type: "number",
18129
- description: "forEach node ID."
18204
+ description: "watch node ID.",
18205
+ },
18206
+ watch_area: {
18207
+ type: "array",
18208
+ description: "Element changes in monitoring area, eg: [x, y, width, height].",
18209
+ items: {
18210
+ type: "number",
18211
+ },
18212
+ },
18213
+ watch_index: {
18214
+ type: "array",
18215
+ description: "The index of elements to be monitoring multiple elements simultaneously.",
18216
+ items: {
18217
+ type: "number",
18218
+ },
18219
+ },
18220
+ frequency: {
18221
+ type: "number",
18222
+ description: "Check frequency, how many seconds between each check, default 1 seconds.",
18223
+ default: 1,
18224
+ minimum: 0.5,
18225
+ maximum: 30,
18226
+ },
18227
+ timeout: {
18228
+ type: "number",
18229
+ description: "Timeout in minute, default 5 minutes.",
18230
+ default: 5,
18231
+ minimum: 1,
18232
+ maximum: 30,
18130
18233
  },
18131
18234
  },
18132
18235
  required: ["nodeId"],
18133
18236
  };
18134
18237
  }
18135
18238
  async execute(args, agentContext) {
18136
- // TODO Listen for changes to the DOM or file, and execute nodes
18137
- return null;
18239
+ let nodeId = args.nodeId;
18240
+ let agentXml = agentContext.agentChain.agent.xml;
18241
+ let node = extractAgentXmlNode(agentXml, nodeId);
18242
+ if (node == null) {
18243
+ throw new Error("Node ID does not exist: " + nodeId);
18244
+ }
18245
+ if (node.tagName !== "watch") {
18246
+ throw new Error("Node ID is not a watch node: " + nodeId);
18247
+ }
18248
+ let task_description = node.getElementsByTagName("description")[0]?.textContent || "";
18249
+ if (!task_description) {
18250
+ return {
18251
+ content: [
18252
+ {
18253
+ type: "text",
18254
+ text: "The watch node does not have a description, skip.",
18255
+ },
18256
+ ],
18257
+ };
18258
+ }
18259
+ const screenshot = agentContext.agent["screenshot"];
18260
+ const image1Result = (await screenshot.call(agentContext.agent, agentContext));
18261
+ const image1 = toImage(image1Result.imageBase64);
18262
+ const start = new Date().getTime();
18263
+ const timeout = (args.timeout || 5) * 60000;
18264
+ const frequency = Math.max(500, (args.frequency = args.frequency || 1) * 1000);
18265
+ let rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms);
18266
+ while (new Date().getTime() - start < timeout) {
18267
+ await agentContext.context.checkAborted();
18268
+ await new Promise((resolve) => setTimeout(resolve, frequency));
18269
+ const image2Result = (await screenshot.call(agentContext.agent, agentContext));
18270
+ const image2 = toImage(image2Result.imageBase64);
18271
+ const changeResult = await this.is_dom_change(agentContext, rlm, image1, image1Result.imageType, image2, image2Result.imageType, task_description);
18272
+ if (changeResult.changed) {
18273
+ return {
18274
+ content: [
18275
+ {
18276
+ type: "text",
18277
+ text: changeResult.changeInfo || "DOM change detected.",
18278
+ },
18279
+ ],
18280
+ };
18281
+ }
18282
+ }
18283
+ return {
18284
+ content: [
18285
+ {
18286
+ type: "text",
18287
+ text: "Timeout reached, no DOM changes detected.",
18288
+ },
18289
+ ],
18290
+ };
18291
+ }
18292
+ async is_dom_change(agentContext, rlm, image1, image1Type, image2, image2Type, task_description) {
18293
+ try {
18294
+ let request = {
18295
+ messages: [
18296
+ {
18297
+ role: "system",
18298
+ content: `You are a tool for detecting element changes. Given a task description, compare two images to determine whether the changes described in the task have occurred.
18299
+ If the changes have occurred, return an json with \`changed\` set to true and \`changeInfo\` containing a description of the changes. If no changes have occurred, return an object with \`changed\` set to false.
18300
+
18301
+ ## Example
18302
+ User: Monitor new messages in group chat
18303
+ ### No changes detected
18304
+ Output:
18305
+ {
18306
+ "changed": false
18307
+ }
18308
+ ### Change detected
18309
+ Output:
18310
+ {
18311
+ "changed": true,
18312
+ "changeInfo": "New message received in the group chat. The message content is: 'Hello, how are you?'"
18313
+ }`,
18314
+ },
18315
+ {
18316
+ role: "user",
18317
+ content: [
18318
+ {
18319
+ type: "image",
18320
+ image: image1,
18321
+ mimeType: image1Type,
18322
+ },
18323
+ {
18324
+ type: "image",
18325
+ image: image2,
18326
+ mimeType: image2Type,
18327
+ },
18328
+ {
18329
+ type: "text",
18330
+ text: task_description,
18331
+ },
18332
+ ],
18333
+ },
18334
+ ],
18335
+ abortSignal: agentContext.context.controller.signal,
18336
+ };
18337
+ const result = await rlm.call(request);
18338
+ let resultText = result.text || "{}";
18339
+ resultText = resultText.substring(resultText.indexOf("{"), resultText.lastIndexOf("}") + 1);
18340
+ return JSON.parse(resultText);
18341
+ }
18342
+ catch (error) {
18343
+ Log.error("Error in is_dom_change:", error);
18344
+ }
18345
+ return {
18346
+ changed: false,
18347
+ };
18138
18348
  }
18139
18349
  }
18140
18350
 
@@ -18196,7 +18406,7 @@ repetitive tasks, when executing to the forEach node, require the use of the \`$
18196
18406
  `;
18197
18407
  const WATCH_NODE = `
18198
18408
  <!-- monitor task node, the loop attribute specifies whether to listen in a loop or listen once -->
18199
- <watch event="dom or file" loop="true">
18409
+ <watch event="dom" loop="true">
18200
18410
  <description>Monitor task description</description>
18201
18411
  <trigger>
18202
18412
  <node>Trigger step node</node>
@@ -18205,7 +18415,7 @@ const WATCH_NODE = `
18205
18415
  </watch>`;
18206
18416
  const WATCH_PROMPT = `
18207
18417
  * watch node
18208
- monitor changes in webpage DOM or file content, when executing to the watch node, require the use of the \`${TOOL_NAME}\` tool.
18418
+ monitor changes in webpage DOM elements, when executing to the watch node, require the use of the \`${TOOL_NAME}\` tool.
18209
18419
  `;
18210
18420
  function getAgentSystemPrompt(agent, agentNode, context, tools, extSysPrompt) {
18211
18421
  let prompt = "";
@@ -18416,6 +18626,14 @@ class Agent {
18416
18626
  if (hasVariable) {
18417
18627
  tools.push(new VariableStorageTool());
18418
18628
  }
18629
+ let hasForeach = agentNodeXml.indexOf("</forEach>") > -1;
18630
+ if (hasForeach) {
18631
+ tools.push(new ForeachTaskTool());
18632
+ }
18633
+ let hasWatch = agentNodeXml.indexOf("</watch>") > -1;
18634
+ if (hasWatch) {
18635
+ tools.push(new WatchTriggerTool());
18636
+ }
18419
18637
  let toolNames = this.tools.map((tool) => tool.name);
18420
18638
  return tools.filter((tool) => toolNames.indexOf(tool.name) == -1);
18421
18639
  }
@@ -18582,6 +18800,9 @@ class Agent {
18582
18800
  addTool(tool) {
18583
18801
  this.tools.push(tool);
18584
18802
  }
18803
+ get Llms() {
18804
+ return this.llms;
18805
+ }
18585
18806
  get Name() {
18586
18807
  return this.name;
18587
18808
  }
@@ -18822,8 +19043,8 @@ Your task is to understand the user's requirements, dynamically plan the user's
18822
19043
  <forEach items="list or variable name">
18823
19044
  <node>forEach step node</node>
18824
19045
  </forEach>
18825
- <!-- When you need to monitor changes in webpage DOM or file content, you can use \`Watch\`, the loop attribute specifies whether to listen in a loop or listen once. -->
18826
- <watch event="dom or file" loop="true">
19046
+ <!-- When you need to monitor changes in webpage DOM elements, you can use \`Watch\`, the loop attribute specifies whether to listen in a loop or listen once. -->
19047
+ <watch event="dom" loop="true">
18827
19048
  <description>Monitor task description</description>
18828
19049
  <trigger>
18829
19050
  <node>Trigger step node</node>
@@ -19498,6 +19719,9 @@ class BaseFileAgent extends Agent {
19498
19719
  }
19499
19720
  async do_file_read(agentContext, path, write_variable) {
19500
19721
  let file_context = await this.file_read(agentContext, path);
19722
+ if (file_context && file_context.length > config.fileTextMaxLength) {
19723
+ file_context = file_context.substring(0, config.fileTextMaxLength) + "...";
19724
+ }
19501
19725
  if (write_variable) {
19502
19726
  agentContext.context.variables.set(write_variable, file_context);
19503
19727
  }
@@ -19993,7 +20217,7 @@ This is a computer GUI interface, observe the execution through screenshots, and
19993
20217
  description: "Duration in millisecond",
19994
20218
  default: 500,
19995
20219
  minimum: 200,
19996
- maximum: 2000,
20220
+ maximum: 10000,
19997
20221
  },
19998
20222
  },
19999
20223
  required: ["duration"],
@@ -20856,17 +21080,19 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20856
21080
  await sleep(200);
20857
21081
  if (!extract_page_content) {
20858
21082
  const tools = this.toolUseNames(agentContext.agentChain.agentRequest?.messages);
20859
- if (tools.length > 3 &&
20860
- tools[tools.length - 1] == "scroll_mouse_wheel" &&
20861
- tools[tools.length - 2] == "scroll_mouse_wheel" &&
20862
- tools[tools.length - 3] == "scroll_mouse_wheel") {
20863
- let page_content = await this.extract_page_content(agentContext);
20864
- return "The current page content has been extracted, latest page content:\n" + page_content;
21083
+ let scroll_count = 0;
21084
+ for (let i = tools.length - 1; i >= Math.max(tools.length - 8, 0); i--) {
21085
+ if (tools[i] == "scroll_mouse_wheel") {
21086
+ scroll_count++;
21087
+ }
21088
+ }
21089
+ if (scroll_count >= 3) {
21090
+ extract_page_content = true;
20865
21091
  }
20866
21092
  }
20867
21093
  if (extract_page_content) {
20868
21094
  let page_content = await this.extract_page_content(agentContext);
20869
- return "This is the latest page content:\n" + page_content;
21095
+ return "The current page content has been extracted, latest page content:\n" + page_content;
20870
21096
  }
20871
21097
  }
20872
21098
  async hover_to_element(agentContext, index) {
@@ -21011,23 +21237,30 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21011
21237
  return await this.callInnerTool(() => this.click_element(agentContext, args.index, (args.num_clicks || 1), (args.button || "left")));
21012
21238
  },
21013
21239
  },
21240
+ /*
21014
21241
  {
21015
- name: "scroll_to_element",
21016
- description: "Scroll to the element",
21017
- parameters: {
21018
- type: "object",
21019
- properties: {
21020
- index: {
21021
- type: "number",
21022
- description: "The index of the element to input text into",
21023
- },
21024
- },
21025
- required: ["index"],
21026
- },
21027
- execute: async (args, agentContext) => {
21028
- return await this.callInnerTool(() => this.scroll_to_element(agentContext, args.index));
21242
+ name: "scroll_to_element",
21243
+ description: "Scroll to the element",
21244
+ parameters: {
21245
+ type: "object",
21246
+ properties: {
21247
+ index: {
21248
+ type: "number",
21249
+ description: "The index of the element to input text into",
21250
+ },
21029
21251
  },
21252
+ required: ["index"],
21253
+ },
21254
+ execute: async (
21255
+ args: Record<string, unknown>,
21256
+ agentContext: AgentContext
21257
+ ): Promise<ToolResult> => {
21258
+ return await this.callInnerTool(() =>
21259
+ this.scroll_to_element(agentContext, args.index as number)
21260
+ );
21261
+ },
21030
21262
  },
21263
+ */
21031
21264
  {
21032
21265
  name: "scroll_mouse_wheel",
21033
21266
  description: "Scroll the mouse wheel at current position, only scroll when you need to load more content",
@@ -21165,7 +21398,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21165
21398
  description: "Duration in millisecond",
21166
21399
  default: 500,
21167
21400
  minimum: 200,
21168
- maximum: 2000,
21401
+ maximum: 10000,
21169
21402
  },
21170
21403
  },
21171
21404
  required: ["duration"],
@@ -21176,6 +21409,9 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21176
21409
  },
21177
21410
  ];
21178
21411
  }
21412
+ async double_screenshots(agentContext, messages, tools) {
21413
+ return true;
21414
+ }
21179
21415
  async handleMessages(agentContext, messages, tools) {
21180
21416
  const pseudoHtmlDescription = "This is the latest screenshot and page element information.\nindex and element:\n";
21181
21417
  let lastTool = this.lastToolResult(messages);
@@ -21184,16 +21420,27 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21184
21420
  lastTool.toolName !== "get_all_tabs" &&
21185
21421
  lastTool.toolName !== "variable_storage") {
21186
21422
  await sleep(300);
21423
+ let image_contents = [];
21424
+ if (await this.double_screenshots(agentContext, messages, tools)) {
21425
+ let imageResult = await this.screenshot(agentContext);
21426
+ let image = toImage(imageResult.imageBase64);
21427
+ image_contents.push({
21428
+ type: "image",
21429
+ image: image,
21430
+ mimeType: imageResult.imageType,
21431
+ });
21432
+ }
21187
21433
  let result = await this.screenshot_and_html(agentContext);
21188
21434
  let image = toImage(result.imageBase64);
21435
+ image_contents.push({
21436
+ type: "image",
21437
+ image: image,
21438
+ mimeType: result.imageType,
21439
+ });
21189
21440
  messages.push({
21190
21441
  role: "user",
21191
21442
  content: [
21192
- {
21193
- type: "image",
21194
- image: image,
21195
- mimeType: result.imageType,
21196
- },
21443
+ ...image_contents,
21197
21444
  {
21198
21445
  type: "text",
21199
21446
  text: pseudoHtmlDescription + result.pseudoHtml,
@@ -21239,7 +21486,12 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21239
21486
  if (eIdx == -1) {
21240
21487
  continue;
21241
21488
  }
21242
- line = line.substring(0, sIdx) + line.substring(eIdx + 1).trim().replace('" >', '">');
21489
+ line =
21490
+ line.substring(0, sIdx) +
21491
+ line
21492
+ .substring(eIdx + 1)
21493
+ .trim()
21494
+ .replace('" >', '">');
21243
21495
  }
21244
21496
  return line;
21245
21497
  })
@@ -21406,7 +21658,7 @@ function scroll_by(params) {
21406
21658
  }
21407
21659
  function findNodes(element = document, nodes = []) {
21408
21660
  for (const node of Array.from(element.querySelectorAll("*"))) {
21409
- if (node.tagName === 'IFRAME' && node.contentDocument) {
21661
+ if (node.tagName === "IFRAME" && node.contentDocument) {
21410
21662
  findNodes(node.contentDocument, nodes);
21411
21663
  }
21412
21664
  else {
@@ -21427,7 +21679,9 @@ function scroll_by(params) {
21427
21679
  elements = allElements.filter((el) => {
21428
21680
  const style = window.getComputedStyle(el);
21429
21681
  const overflowY = style.getPropertyValue("overflow-y");
21430
- return (overflowY === "auto" || overflowY === "scroll" || el.scrollHeight > el.clientHeight);
21682
+ return (overflowY === "auto" ||
21683
+ overflowY === "scroll" ||
21684
+ el.scrollHeight > el.clientHeight);
21431
21685
  });
21432
21686
  }
21433
21687
  return elements;
@@ -21730,7 +21984,7 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
21730
21984
  description: "Duration in millisecond",
21731
21985
  default: 500,
21732
21986
  minimum: 200,
21733
- maximum: 2000,
21987
+ maximum: 10000,
21734
21988
  },
21735
21989
  },
21736
21990
  required: ["duration"],