@eko-ai/eko 2.0.2 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs.js CHANGED
@@ -7,7 +7,8 @@ var buffer = require('buffer');
7
7
  const config = {
8
8
  name: "Fellou",
9
9
  platform: "mac",
10
- maxReactNum: 100
10
+ maxReactNum: 100,
11
+ maxTokens: 16000
11
12
  };
12
13
 
13
14
  var LogLevel;
@@ -8562,6 +8563,15 @@ function mergeAgents(agents1, agents2) {
8562
8563
  }
8563
8564
  return tools;
8564
8565
  }
8566
+ function sub(str, maxLength, appendPoint = true) {
8567
+ if (!str) {
8568
+ return "";
8569
+ }
8570
+ if (str.length > maxLength) {
8571
+ return str.substring(0, maxLength) + (appendPoint ? "..." : "");
8572
+ }
8573
+ return str;
8574
+ }
8565
8575
  function fixXmlTag(code) {
8566
8576
  function fixDoubleChar(code) {
8567
8577
  const stack = [];
@@ -12763,7 +12773,7 @@ class RetryLanguageModel {
12763
12773
  constructor(llms, names, stream_first_timeout) {
12764
12774
  this.llms = llms;
12765
12775
  this.names = names || [];
12766
- this.stream_first_timeout = stream_first_timeout || 20000;
12776
+ this.stream_first_timeout = stream_first_timeout || 30000;
12767
12777
  if (this.names.indexOf("default") == -1) {
12768
12778
  this.names.push("default");
12769
12779
  }
@@ -12777,7 +12787,7 @@ class RetryLanguageModel {
12777
12787
  toolChoice: request.toolChoice,
12778
12788
  },
12779
12789
  prompt: request.messages,
12780
- maxTokens: request.maxTokens,
12790
+ maxTokens: request.maxTokens || config.maxTokens,
12781
12791
  temperature: request.temperature,
12782
12792
  topP: request.topP,
12783
12793
  topK: request.topK,
@@ -12800,6 +12810,9 @@ class RetryLanguageModel {
12800
12810
  return result;
12801
12811
  }
12802
12812
  catch (e) {
12813
+ if (e?.name === "AbortError") {
12814
+ throw e;
12815
+ }
12803
12816
  if (Log.isEnableInfo()) {
12804
12817
  Log.info(`LLM nonstream request, name: ${name} => `, {
12805
12818
  tools: options.mode?.tools,
@@ -12820,7 +12833,7 @@ class RetryLanguageModel {
12820
12833
  toolChoice: request.toolChoice,
12821
12834
  },
12822
12835
  prompt: request.messages,
12823
- maxTokens: request.maxTokens,
12836
+ maxTokens: request.maxTokens || config.maxTokens,
12824
12837
  temperature: request.temperature,
12825
12838
  topP: request.topP,
12826
12839
  topK: request.topK,
@@ -12836,12 +12849,19 @@ class RetryLanguageModel {
12836
12849
  continue;
12837
12850
  }
12838
12851
  try {
12839
- const result = await call_timeout(async () => await llm.doStream(options), this.stream_first_timeout);
12852
+ const controller = new AbortController();
12853
+ const signal = options.abortSignal
12854
+ ? AbortSignal.any([options.abortSignal, controller.signal])
12855
+ : controller.signal;
12856
+ const result = await call_timeout(async () => await llm.doStream({ ...options, abortSignal: signal }), this.stream_first_timeout, (e) => {
12857
+ controller.abort();
12858
+ });
12840
12859
  const stream = result.stream;
12841
12860
  const reader = stream.getReader();
12842
12861
  const { done, value } = await call_timeout(async () => await reader.read(), this.stream_first_timeout, (e) => {
12843
12862
  reader.cancel();
12844
12863
  reader.releaseLock();
12864
+ controller.abort();
12845
12865
  });
12846
12866
  if (done) {
12847
12867
  Log.warn(`LLM stream done, name: ${name} => `, { done, value });
@@ -12861,6 +12881,9 @@ class RetryLanguageModel {
12861
12881
  return result;
12862
12882
  }
12863
12883
  catch (e) {
12884
+ if (e?.name === "AbortError") {
12885
+ throw e;
12886
+ }
12864
12887
  if (Log.isEnableInfo()) {
12865
12888
  Log.info(`LLM stream request, name: ${name} => `, {
12866
12889
  tools: options.mode?.tools,
@@ -15659,6 +15682,7 @@ const TOOL_NAME$3 = "human_interact";
15659
15682
  class HumanInteractTool {
15660
15683
  constructor() {
15661
15684
  this.name = TOOL_NAME$3;
15685
+ this.noPlan = true;
15662
15686
  this.description = `AI interacts with humans:
15663
15687
  confirm: Ask the user to confirm whether to execute an operation, especially when performing dangerous actions such as deleting system files.
15664
15688
  input: Prompt the user to enter text; for example, when a task is ambiguous, the AI can choose to ask the user for details, and the user can respond by inputting.
@@ -15672,93 +15696,57 @@ request_help: Request assistance from the user; for instance, when an operation
15672
15696
  description: "The type of interaction with users.",
15673
15697
  enum: ["confirm", "input", "select", "request_help"],
15674
15698
  },
15675
- confirm: {
15676
- type: "object",
15677
- properties: {
15678
- prompt: {
15679
- type: "string",
15680
- description: "Display prompts to users",
15681
- },
15682
- },
15683
- required: ["prompt"],
15699
+ prompt: {
15700
+ type: "string",
15701
+ description: "Display prompts to users",
15684
15702
  },
15685
- input: {
15686
- type: "object",
15687
- properties: {
15688
- prompt: {
15689
- type: "string",
15690
- description: "Display prompts to users",
15691
- },
15703
+ selectOptions: {
15704
+ type: "array",
15705
+ description: "Options provided to users, this parameter is required when interactType is select.",
15706
+ items: {
15707
+ type: "string",
15692
15708
  },
15693
- required: ["prompt"],
15694
15709
  },
15695
- select: {
15696
- type: "object",
15697
- properties: {
15698
- prompt: {
15699
- type: "string",
15700
- description: "Display prompts to users",
15701
- },
15702
- options: {
15703
- type: "array",
15704
- description: "Options provided to the user",
15705
- items: {
15706
- type: "string",
15707
- },
15708
- },
15709
- multiple: {
15710
- type: "boolean",
15711
- },
15712
- },
15713
- required: ["prompt", "options"],
15710
+ selectMultiple: {
15711
+ type: "boolean",
15712
+ description: "isMultiple, used when interactType is select",
15714
15713
  },
15715
- request_help: {
15716
- type: "object",
15717
- properties: {
15718
- helpType: {
15719
- type: "string",
15720
- description: "Display prompts to users",
15721
- enum: ["request_login", "request_assistance"],
15722
- },
15723
- prompt: {
15724
- type: "string",
15725
- description: "Display prompts to users",
15726
- },
15727
- },
15728
- required: ["helpType", "prompt"],
15714
+ helpType: {
15715
+ type: "string",
15716
+ description: "Help type, required when interactType is request_help.",
15717
+ enum: ["request_login", "request_assistance"],
15729
15718
  },
15730
15719
  },
15731
- required: ["interactType"],
15720
+ required: ["interactType", "prompt"],
15732
15721
  };
15733
15722
  }
15734
15723
  async execute(args, agentContext) {
15735
15724
  let interactType = args.interactType;
15736
- let interact = args[interactType];
15737
15725
  let callback = agentContext.context.config.callback;
15738
15726
  let resultText = "";
15739
15727
  if (callback) {
15740
15728
  switch (interactType) {
15741
15729
  case "confirm":
15742
15730
  if (callback.onHumanConfirm) {
15743
- let result = await callback.onHumanConfirm(agentContext, interact.prompt);
15731
+ let result = await callback.onHumanConfirm(agentContext, args.prompt);
15744
15732
  resultText = `confirm result: ${result ? "Yes" : "No"}`;
15745
15733
  }
15746
15734
  break;
15747
15735
  case "input":
15748
15736
  if (callback.onHumanInput) {
15749
- let result = await callback.onHumanInput(agentContext, interact.prompt);
15737
+ let result = await callback.onHumanInput(agentContext, args.prompt);
15750
15738
  resultText = `input result: ${result}`;
15751
15739
  }
15752
15740
  break;
15753
15741
  case "select":
15754
15742
  if (callback.onHumanSelect) {
15755
- let result = await callback.onHumanSelect(agentContext, interact.prompt, interact.options, interact.multiple);
15743
+ let result = await callback.onHumanSelect(agentContext, args.prompt, (args.selectOptions || []), (args.selectMultiple || false));
15756
15744
  resultText = `select result: ${JSON.stringify(result)}`;
15757
15745
  }
15758
15746
  break;
15759
15747
  case "request_help":
15760
15748
  if (callback.onHumanHelp) {
15761
- let result = await callback.onHumanHelp(agentContext, interact.helpType, interact.prompt);
15749
+ let result = await callback.onHumanHelp(agentContext, (args.helpType || "request_assistance"), args.prompt);
15762
15750
  resultText = `request_help result: ${result ? "Solved" : "Unresolved"}`;
15763
15751
  }
15764
15752
  break;
@@ -15932,7 +15920,7 @@ class WatchTriggerTool {
15932
15920
  };
15933
15921
  }
15934
15922
  async execute(args, agentContext) {
15935
- // TODO 监听 dom 文件 改变,执行节点
15923
+ // TODO Listen for changes to the DOM or file, and execute nodes
15936
15924
  return null;
15937
15925
  }
15938
15926
  }
@@ -15971,11 +15959,15 @@ UTC datetime: {datetime}
15971
15959
  </root>
15972
15960
  `;
15973
15961
  const HUMAN_PROMPT = `
15974
- During the task execution process, you can use the \`${TOOL_NAME$3}\` tool to interact with humans. Please do not abuse this tool to harass humans. Please call it in the following situations:
15962
+ * HUMAN INTERACT
15963
+ During the task execution process, you can use the \`${TOOL_NAME$3}\` tool to interact with humans, please call it in the following situations:
15975
15964
  - When performing dangerous operations such as deleting files, confirmation from humans is required
15976
15965
  - When encountering obstacles while accessing websites, such as requiring user login, you need to request human assistance
15966
+ - When requesting login, please only call the function when a login dialog box is clearly displayed.
15967
+ - Try not to use the \`${TOOL_NAME$3}\` tool
15977
15968
  `;
15978
15969
  const VARIABLE_PROMPT = `
15970
+ * VARIABLE STORAGE
15979
15971
  If you need to read and write the input/output variables in the node, require the use of the \`${TOOL_NAME$1}\` tool.
15980
15972
  `;
15981
15973
  const FOR_EACH_NODE = `
@@ -16000,14 +15992,18 @@ const WATCH_NODE = `
16000
15992
  const WATCH_PROMPT = `
16001
15993
  \`watch\`: monitor changes in webpage DOM or file content, when executing to the watch node, require the use of the \`${TOOL_NAME}\` tool.
16002
15994
  `;
16003
- function getAgentSystemPrompt(agent, agentNode, context, systemPrompt) {
16004
- let prompt = "";
15995
+ function getAgentSystemPrompt(agent, agentNode, context, tools, extSysPrompt) {
15996
+ let prompt = extSysPrompt || "";
16005
15997
  let nodePrompt = "";
16006
15998
  let agentNodeXml = agentNode.xml;
16007
- let hasForEach = agentNodeXml.indexOf("</forEach>") > -1;
16008
15999
  let hasWatch = agentNodeXml.indexOf("</watch>") > -1;
16009
- let hasVariable = agentNodeXml.indexOf(" input=") > -1 || agentNodeXml.indexOf(" output=") > -1;
16010
- let hasHumanTool = agent.Tools.filter((tool) => tool.name == TOOL_NAME$3).length > 0;
16000
+ let hasForEach = agentNodeXml.indexOf("</forEach>") > -1;
16001
+ let hasHumanTool = (tools || agent.Tools).filter((tool) => tool.name == TOOL_NAME$3)
16002
+ .length > 0;
16003
+ let hasVariable = agentNodeXml.indexOf("input=") > -1 ||
16004
+ agentNodeXml.indexOf("output=") > -1 ||
16005
+ (tools || agent.Tools).filter((tool) => tool.name == TOOL_NAME$1)
16006
+ .length > 0;
16011
16007
  if (hasHumanTool) {
16012
16008
  prompt += HUMAN_PROMPT;
16013
16009
  }
@@ -16022,8 +16018,17 @@ function getAgentSystemPrompt(agent, agentNode, context, systemPrompt) {
16022
16018
  prompt += WATCH_PROMPT;
16023
16019
  nodePrompt += WATCH_NODE;
16024
16020
  }
16025
- return (systemPrompt || AGENT_SYSTEM_TEMPLATE)
16026
- .replace("{name}", config.name)
16021
+ if (context.chain.agents.length > 1) {
16022
+ prompt += "\n Main task: " + context.chain.taskPrompt;
16023
+ prompt += "\n# Pre-task execution results";
16024
+ for (let i = 0; i < context.chain.agents.length; i++) {
16025
+ let agentChain = context.chain.agents[i];
16026
+ if (agentChain.agentResult) {
16027
+ prompt += `\n## ${agentChain.agent.task || agentChain.agent.name}\n${sub(agentChain.agentResult, 500)}`;
16028
+ }
16029
+ }
16030
+ }
16031
+ return AGENT_SYSTEM_TEMPLATE.replace("{name}", config.name)
16027
16032
  .replace("{agent}", agent.Name)
16028
16033
  .replace("{description}", agent.Description)
16029
16034
  .replace("{datetime}", new Date().toISOString())
@@ -16031,8 +16036,9 @@ function getAgentSystemPrompt(agent, agentNode, context, systemPrompt) {
16031
16036
  .replace("{nodePrompt}", nodePrompt)
16032
16037
  .trim();
16033
16038
  }
16034
- function getAgentUserPrompt(agent, agentNode, context) {
16035
- let hasTaskNodeStatusTool = agent.Tools.filter((tool) => tool.name == TOOL_NAME$2).length > 0;
16039
+ function getAgentUserPrompt(agent, agentNode, context, tools) {
16040
+ let hasTaskNodeStatusTool = (tools || agent.Tools).filter((tool) => tool.name == TOOL_NAME$2)
16041
+ .length > 0;
16036
16042
  return buildAgentRootXml(agentNode.xml, context.chain.taskPrompt, (nodeId, node) => {
16037
16043
  if (hasTaskNodeStatusTool) {
16038
16044
  node.setAttribute("status", "todo");
@@ -16065,9 +16071,10 @@ class Agent {
16065
16071
  let loopNum = 0;
16066
16072
  let context = agentContext.context;
16067
16073
  let agentNode = agentContext.agentChain.agent;
16068
- let messages = this.initMessages(agentContext);
16074
+ const tools = [...this.tools, ...this.system_auto_tools(agentNode)];
16075
+ let messages = await this.initMessages(agentContext, tools);
16069
16076
  let rlm = new RetryLanguageModel(context.config.llms, this.llms);
16070
- let agentTools = [...this.tools, ...this.system_auto_tools(agentNode)];
16077
+ let agentTools = tools;
16071
16078
  while (loopNum < maxReactNum) {
16072
16079
  context.checkAborted();
16073
16080
  if (mcpClient) {
@@ -16075,7 +16082,7 @@ class Agent {
16075
16082
  if (controlMcp.mcpTools) {
16076
16083
  let mcpTools = await this.listTools(agentNode, context, mcpClient, controlMcp.mcpParams);
16077
16084
  let usedTools = this.extractUsedTool(messages, agentTools);
16078
- let _agentTools = mergeTools(this.tools, usedTools);
16085
+ let _agentTools = mergeTools(tools, usedTools);
16079
16086
  agentTools = mergeTools(_agentTools, mcpTools);
16080
16087
  }
16081
16088
  }
@@ -16087,7 +16094,7 @@ class Agent {
16087
16094
  }
16088
16095
  loopNum++;
16089
16096
  }
16090
- return null;
16097
+ return "Unfinished";
16091
16098
  }
16092
16099
  async handleResult(agentContext, messages, agentTools, results) {
16093
16100
  let text = null;
@@ -16167,8 +16174,8 @@ class Agent {
16167
16174
  system_auto_tools(agentNode) {
16168
16175
  let tools = [];
16169
16176
  let agentNodeXml = agentNode.xml;
16170
- let hasVariable = agentNodeXml.indexOf(" input=") > -1 ||
16171
- agentNodeXml.indexOf(" output=") > -1;
16177
+ let hasVariable = agentNodeXml.indexOf("input=") > -1 ||
16178
+ agentNodeXml.indexOf("output=") > -1;
16172
16179
  if (hasVariable) {
16173
16180
  tools.push(new VariableStorageTool());
16174
16181
  }
@@ -16197,24 +16204,27 @@ class Agent {
16197
16204
  }
16198
16205
  return _results;
16199
16206
  }
16200
- initMessages(agentContext) {
16207
+ async initMessages(agentContext, tools) {
16201
16208
  let messages = [
16202
16209
  {
16203
16210
  role: "system",
16204
- content: getAgentSystemPrompt(this, agentContext.agentChain.agent, agentContext.context),
16211
+ content: getAgentSystemPrompt(this, agentContext.agentChain.agent, agentContext.context, tools, await this.extSysPrompt(agentContext)),
16205
16212
  },
16206
16213
  {
16207
16214
  role: "user",
16208
16215
  content: [
16209
16216
  {
16210
16217
  type: "text",
16211
- text: getAgentUserPrompt(this, agentContext.agentChain.agent, agentContext.context),
16218
+ text: getAgentUserPrompt(this, agentContext.agentChain.agent, agentContext.context, tools),
16212
16219
  },
16213
16220
  ],
16214
16221
  },
16215
16222
  ];
16216
16223
  return messages;
16217
16224
  }
16225
+ async extSysPrompt(agentContext) {
16226
+ return "";
16227
+ }
16218
16228
  async listTools(agentNode, context, mcpClient, mcpParams) {
16219
16229
  let list = await mcpClient.listTools({
16220
16230
  taskId: context.taskId,
@@ -16250,7 +16260,7 @@ class Agent {
16250
16260
  nodeId: agentContext.agentChain.agent.id,
16251
16261
  environment: config.platform,
16252
16262
  agent_name: agentContext.agent.Name,
16253
- }
16263
+ },
16254
16264
  });
16255
16265
  },
16256
16266
  };
@@ -16278,7 +16288,7 @@ class Agent {
16278
16288
  let message = messages[i];
16279
16289
  if (message.role == "tool") {
16280
16290
  for (let j = 0; j < message.content.length; j++) {
16281
- let toolName = message.content[i].toolName;
16291
+ let toolName = message.content[j].toolName;
16282
16292
  if (toolNames.indexOf(toolName) > -1) {
16283
16293
  continue;
16284
16294
  }
@@ -16322,12 +16332,17 @@ class Agent {
16322
16332
  else if (!isError && text.length == 0) {
16323
16333
  text = "Successful";
16324
16334
  }
16325
- let result = { result: text };
16335
+ let contentText = {
16336
+ type: "text",
16337
+ text: text,
16338
+ };
16339
+ let result = text;
16326
16340
  if (text &&
16327
16341
  ((text.startsWith("{") && text.endsWith("}")) ||
16328
16342
  (text.startsWith("[") && text.endsWith("]")))) {
16329
16343
  try {
16330
16344
  result = JSON.parse(text);
16345
+ contentText = null;
16331
16346
  }
16332
16347
  catch (e) { }
16333
16348
  }
@@ -16336,6 +16351,7 @@ class Agent {
16336
16351
  toolCallId: toolUse.toolCallId,
16337
16352
  toolName: toolUse.toolName,
16338
16353
  result: result,
16354
+ content: contentText ? [contentText] : undefined,
16339
16355
  isError: isError,
16340
16356
  };
16341
16357
  }
@@ -16610,7 +16626,7 @@ Your task is to understand the user's requirements, dynamically plan the user's
16610
16626
  2. Analyze the Agents that need to be used based on the user's requirements.
16611
16627
  3. Generate the Agent calling plan based on the analysis results.
16612
16628
  4. About agent name, please do not arbitrarily fabricate non-existent agent names.
16613
- 5. You only need to provide the steps to complete the user's task, steps are simple and straightforward, no need for too many specific details.
16629
+ 5. You only need to provide the steps to complete the user's task, key steps only, no need to be too detailed.
16614
16630
  6. Please strictly follow the output format and example output.
16615
16631
  7. The output language should follow the language corresponding to the user's task.
16616
16632
 
@@ -16761,15 +16777,22 @@ const PLAN_USER_TEMPLATE = `
16761
16777
  User Platform: {platform}
16762
16778
  Task Description: {taskPrompt}
16763
16779
  `;
16780
+ const PLAN_USER_TASK_WEBSITE_TEMPLATE = `
16781
+ User Platform: {platform}
16782
+ Task Website: {task_website}
16783
+ Task Description: {taskPrompt}
16784
+ `;
16764
16785
  function getPlanSystemPrompt(agents) {
16765
16786
  let agents_prompt = agents
16766
16787
  .map((agent) => {
16767
16788
  return (`<agent name="${agent.Name}">\n` +
16768
16789
  `Description: ${agent.PlanDescription || agent.Description}\nTools:\n` +
16769
- agent.Tools.map((tool) => `- ${tool.name}: ${tool.description || ""}`).join("\n") +
16790
+ agent.Tools.filter((tool) => !tool.noPlan)
16791
+ .map((tool) => `- ${tool.name}: ${tool.planDescription || tool.description || ""}`)
16792
+ .join("\n") +
16770
16793
  `\n</agent>`);
16771
16794
  })
16772
- .join("\n");
16795
+ .join("\n\n");
16773
16796
  let example_prompt = "";
16774
16797
  let hasChatAgent = agents.filter((a) => a.Name == AGENT_NAME$4).length > 0;
16775
16798
  const example_list = hasChatAgent
@@ -16784,10 +16807,18 @@ function getPlanSystemPrompt(agents) {
16784
16807
  .replace("{example_prompt}", example_prompt)
16785
16808
  .trim();
16786
16809
  }
16787
- function getPlanUserPrompt(taskPrompt) {
16788
- return PLAN_USER_TEMPLATE.replace("{taskPrompt}", taskPrompt)
16789
- .replace("{platform}", config.platform)
16790
- .trim();
16810
+ function getPlanUserPrompt(taskPrompt, task_website) {
16811
+ if (task_website) {
16812
+ return PLAN_USER_TASK_WEBSITE_TEMPLATE.replace("{taskPrompt}", taskPrompt)
16813
+ .replace("{platform}", config.platform)
16814
+ .replace("{task_website}", task_website)
16815
+ .trim();
16816
+ }
16817
+ else {
16818
+ return PLAN_USER_TEMPLATE.replace("{taskPrompt}", taskPrompt)
16819
+ .replace("{platform}", config.platform)
16820
+ .trim();
16821
+ }
16791
16822
  }
16792
16823
 
16793
16824
  class Planner {
@@ -16816,7 +16847,7 @@ class Planner {
16816
16847
  {
16817
16848
  role: "user",
16818
16849
  content: [{ type: "text", text: taskPrompt }],
16819
- }
16850
+ },
16820
16851
  ];
16821
16852
  }
16822
16853
  else {
@@ -16824,12 +16855,17 @@ class Planner {
16824
16855
  { role: "system", content: getPlanSystemPrompt(this.context.agents) },
16825
16856
  {
16826
16857
  role: "user",
16827
- content: [{ type: "text", text: getPlanUserPrompt(taskPrompt) }],
16858
+ content: [
16859
+ {
16860
+ type: "text",
16861
+ text: getPlanUserPrompt(taskPrompt, this.context.variables.get("task_website")),
16862
+ },
16863
+ ],
16828
16864
  },
16829
16865
  ];
16830
16866
  }
16831
16867
  let request = {
16832
- maxTokens: 1024,
16868
+ maxTokens: 4096,
16833
16869
  temperature: 0.7,
16834
16870
  messages: messages,
16835
16871
  abortSignal: this.context.controller.signal,
@@ -16933,30 +16969,33 @@ class Eko {
16933
16969
  throw new Error("The task does not exist");
16934
16970
  }
16935
16971
  try {
16936
- return this.doRunWorkflow(context);
16972
+ return await this.doRunWorkflow(context);
16937
16973
  }
16938
16974
  catch (e) {
16939
16975
  return {
16976
+ taskId,
16940
16977
  success: false,
16941
16978
  stopReason: e?.name == "AbortError" ? "abort" : "error",
16942
16979
  result: e,
16943
16980
  };
16944
16981
  }
16945
- finally {
16946
- this.deleteTask(taskId);
16947
- }
16948
16982
  }
16949
16983
  async run(taskPrompt, taskId = uuidv4(), contextParams) {
16950
16984
  await this.generate(taskPrompt, taskId, contextParams);
16951
16985
  return await this.execute(taskId);
16952
16986
  }
16953
16987
  async initContext(workflow, contextParams) {
16954
- const agents = [...(this.config.agents || [])];
16988
+ const agents = this.config.agents || [];
16955
16989
  let chain = new Chain(workflow.taskPrompt || workflow.name);
16956
16990
  let context = new Context(workflow.taskId, this.config, agents, chain);
16991
+ if (this.config.a2aClient) {
16992
+ let a2aList = await this.config.a2aClient.listAgents(workflow.taskPrompt || workflow.name);
16993
+ context.agents = mergeAgents(context.agents, a2aList);
16994
+ }
16957
16995
  if (contextParams) {
16958
16996
  Object.keys(contextParams).forEach((key) => context.variables.set(key, contextParams[key]));
16959
16997
  }
16998
+ context.workflow = workflow;
16960
16999
  this.taskMap.set(workflow.taskId, context);
16961
17000
  return context;
16962
17001
  }
@@ -16970,7 +17009,7 @@ class Eko {
16970
17009
  map[item.Name] = item;
16971
17010
  return map;
16972
17011
  }, {});
16973
- let lastResult;
17012
+ let results = [];
16974
17013
  for (let i = 0; i < workflow.agents.length; i++) {
16975
17014
  context.checkAborted();
16976
17015
  let agentNode = workflow.agents[i];
@@ -16981,18 +17020,21 @@ class Eko {
16981
17020
  let agentChain = new AgentChain(agentNode);
16982
17021
  context.chain.push(agentChain);
16983
17022
  agent.result = await agent.run(context, agentChain);
16984
- lastResult = agent.result;
17023
+ results.push(agent.result);
16985
17024
  }
16986
- // TODO 超过2个Agent时需要summary输出结果。
16987
17025
  return {
16988
17026
  success: true,
16989
17027
  stopReason: "done",
16990
- result: lastResult,
17028
+ result: results[results.length - 1],
17029
+ taskId: context.taskId,
16991
17030
  };
16992
17031
  }
16993
17032
  getTask(taskId) {
16994
17033
  return this.taskMap.get(taskId);
16995
17034
  }
17035
+ getAllTaskId() {
17036
+ return [...this.taskMap.keys()];
17037
+ }
16996
17038
  deleteTask(taskId) {
16997
17039
  return this.taskMap.delete(taskId);
16998
17040
  }
@@ -17237,18 +17279,17 @@ function parseChunk(chunk) {
17237
17279
 
17238
17280
  const AGENT_NAME$3 = "File";
17239
17281
  class BaseFileAgent extends Agent {
17240
- constructor(work_path, llms, ext_tools, mcpClient) {
17282
+ constructor(work_path, llms, ext_tools, mcpClient, planDescription) {
17241
17283
  const _tools_ = [];
17242
- const prompt = work_path
17243
- ? `Your default working path is: ${work_path}`
17244
- : "";
17284
+ const prompt = work_path ? `Your default working path is: ${work_path}` : "";
17245
17285
  super({
17246
17286
  name: AGENT_NAME$3,
17247
17287
  description: `You are a file agent, handling file-related tasks such as creating, finding, reading, modifying files, etc.${prompt}`,
17248
17288
  tools: _tools_,
17249
17289
  llms: llms,
17250
17290
  mcpClient: mcpClient,
17251
- planDescription: "File operation agent, handling file-related tasks such as creating, finding, reading, modifying files, etc.",
17291
+ planDescription: planDescription ||
17292
+ "File operation agent, handling file-related tasks such as creating, finding, reading, modifying files, etc, only text file writing is supported.",
17252
17293
  });
17253
17294
  let init_tools = this.buildInitTools();
17254
17295
  if (ext_tools && ext_tools.length > 0) {
@@ -17294,7 +17335,7 @@ class BaseFileAgent extends Agent {
17294
17335
  },
17295
17336
  {
17296
17337
  name: "file_write",
17297
- description: "Overwrite or append content to a file. Use for creating new files, appending content, or modifying existing files.",
17338
+ description: "Overwrite or append content to a file. Use for creating new files, appending content, or modifying existing files, only supports txt/md/csv or other text formats.",
17298
17339
  parameters: {
17299
17340
  type: "object",
17300
17341
  properties: {
@@ -17370,7 +17411,7 @@ class BaseFileAgent extends Agent {
17370
17411
 
17371
17412
  const AGENT_NAME$2 = "Shell";
17372
17413
  class BaseShellAgent extends Agent {
17373
- constructor(llms, ext_tools, mcpClient) {
17414
+ constructor(llms, ext_tools, mcpClient, planDescription) {
17374
17415
  const _tools_ = [];
17375
17416
  super({
17376
17417
  name: AGENT_NAME$2,
@@ -17378,7 +17419,7 @@ class BaseShellAgent extends Agent {
17378
17419
  tools: _tools_,
17379
17420
  llms: llms,
17380
17421
  mcpClient: mcpClient,
17381
- planDescription: "Shell command agent, use to execute shell commands.",
17422
+ planDescription: planDescription || "Shell command agent, use to execute shell commands.",
17382
17423
  });
17383
17424
  let init_tools = this.buildInitTools();
17384
17425
  if (ext_tools && ext_tools.length > 0) {
@@ -17452,34 +17493,55 @@ class BaseTimerAgent extends Agent {
17452
17493
 
17453
17494
  const AGENT_NAME$1 = "Computer";
17454
17495
  class BaseComputerAgent extends Agent {
17455
- constructor(llms, ext_tools, mcpClient) {
17496
+ constructor(llms, ext_tools, mcpClient, keyboardKeys) {
17456
17497
  const _tools_ = [];
17457
17498
  super({
17458
17499
  name: AGENT_NAME$1,
17459
- description: "You are a computer operation agent, who interacts with the computer using mouse and keyboard, completing specified tasks step by step based on the given tasks and screenshots. After each of your operations, you will receive the latest computer screenshot to evaluate the task execution status.",
17500
+ description: `You are a computer operation agent, who interacts with the computer using mouse and keyboard, completing specified tasks step by step based on the given tasks and screenshots. After each of your operations, you will receive the latest computer screenshot to evaluate the task execution status.
17501
+ This is a computer GUI interface, observe the execution through screenshots, and specify action sequences to complete designated tasks.
17502
+ * COMPUTER OPERATIONS:
17503
+ - You can operate the application using shortcuts.
17504
+ - If stuck, try alternative approaches`,
17460
17505
  tools: _tools_,
17461
17506
  llms: llms,
17462
17507
  mcpClient: mcpClient,
17463
- planDescription: "Computer operation agent, interact with the computer using the mouse and keyboard."
17508
+ planDescription: "Computer operation agent, interact with the computer using the mouse and keyboard, operation application."
17464
17509
  });
17465
- this.keyboardKeys = [
17466
- 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
17467
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
17468
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
17469
- 'enter', 'esc', 'backspace', 'tab', 'space', 'delete',
17470
- 'ctrl', 'alt', 'shift', 'win',
17471
- 'up', 'down', 'left', 'right',
17472
- 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12',
17473
- 'ctrl+c', 'ctrl+v', 'ctrl+x', 'ctrl+z', 'ctrl+a', 'ctrl+s',
17474
- 'alt+tab', 'alt+f4', 'ctrl+alt+delete'
17475
- ];
17476
- let init_tools = this.buildInitTools();
17510
+ if (!keyboardKeys) {
17511
+ if (config.platform == "windows") {
17512
+ keyboardKeys = [
17513
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
17514
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
17515
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
17516
+ 'enter', 'esc', 'backspace', 'tab', 'space', 'delete',
17517
+ 'ctrl', 'alt', 'shift', 'win',
17518
+ 'up', 'down', 'left', 'right',
17519
+ 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12',
17520
+ 'ctrl+c', 'ctrl+v', 'ctrl+x', 'ctrl+z', 'ctrl+a', 'ctrl+s',
17521
+ 'alt+tab', 'alt+f4', 'ctrl+alt+delete'
17522
+ ];
17523
+ }
17524
+ else {
17525
+ keyboardKeys = [
17526
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
17527
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
17528
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
17529
+ 'enter', 'esc', 'backspace', 'tab', 'space', 'delete',
17530
+ 'command', 'option', 'shift', 'control',
17531
+ 'up', 'down', 'left', 'right',
17532
+ 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12',
17533
+ 'command+c', 'command+v', 'command+x', 'command+z', 'command+a', 'command+s',
17534
+ 'command+tab', 'command+q', 'command+escape'
17535
+ ];
17536
+ }
17537
+ }
17538
+ let init_tools = this.buildInitTools(keyboardKeys);
17477
17539
  if (ext_tools && ext_tools.length > 0) {
17478
17540
  init_tools = mergeTools(init_tools, ext_tools);
17479
17541
  }
17480
17542
  init_tools.forEach((tool) => _tools_.push(tool));
17481
17543
  }
17482
- buildInitTools() {
17544
+ buildInitTools(keyboardKeys) {
17483
17545
  return [
17484
17546
  {
17485
17547
  name: "typing",
@@ -17560,15 +17622,22 @@ class BaseComputerAgent extends Agent {
17560
17622
  properties: {
17561
17623
  amount: {
17562
17624
  type: "number",
17563
- description: "Scroll amount (positive for up, negative for down)",
17564
- minimum: -10,
17625
+ description: "Scroll amount (up / down)",
17626
+ minimum: 1,
17565
17627
  maximum: 10,
17566
17628
  },
17629
+ direction: {
17630
+ type: "string",
17631
+ enum: ["up", "down"],
17632
+ },
17567
17633
  },
17568
- required: ["amount"],
17634
+ required: ["amount", "direction"],
17569
17635
  },
17570
17636
  execute: async (args, agentContext) => {
17571
- return await this.callInnerTool(() => this.scroll(agentContext, args.amount));
17637
+ return await this.callInnerTool(async () => {
17638
+ let amount = args.amount;
17639
+ await this.scroll(agentContext, args.direction == "up" ? -amount : amount);
17640
+ });
17572
17641
  },
17573
17642
  },
17574
17643
  {
@@ -17580,7 +17649,7 @@ class BaseComputerAgent extends Agent {
17580
17649
  key: {
17581
17650
  type: "string",
17582
17651
  description: "Key to press",
17583
- enum: this.keyboardKeys,
17652
+ enum: keyboardKeys,
17584
17653
  },
17585
17654
  },
17586
17655
  required: ["key"],
@@ -17598,7 +17667,7 @@ class BaseComputerAgent extends Agent {
17598
17667
  keys: {
17599
17668
  type: "string",
17600
17669
  description: "Key combination to press",
17601
- enum: this.keyboardKeys,
17670
+ enum: keyboardKeys,
17602
17671
  },
17603
17672
  },
17604
17673
  required: ["keys"],
@@ -17638,20 +17707,23 @@ class BaseComputerAgent extends Agent {
17638
17707
  },
17639
17708
  {
17640
17709
  name: "wait",
17710
+ noPlan: true,
17641
17711
  description: "Wait for specified duration",
17642
17712
  parameters: {
17643
17713
  type: "object",
17644
17714
  properties: {
17645
17715
  duration: {
17646
17716
  type: "number",
17647
- description: "Duration in seconds",
17648
- default: 0.5,
17717
+ description: "Duration in millisecond",
17718
+ default: 500,
17719
+ minimum: 200,
17720
+ maximum: 2000,
17649
17721
  },
17650
17722
  },
17651
17723
  required: ["duration"],
17652
17724
  },
17653
17725
  execute: async (args, agentContext) => {
17654
- return await this.callInnerTool(() => sleep((args.duration || 0.5) * 1000));
17726
+ return await this.callInnerTool(() => sleep((args.duration || 200)));
17655
17727
  },
17656
17728
  },
17657
17729
  ];
@@ -17660,7 +17732,7 @@ class BaseComputerAgent extends Agent {
17660
17732
  let lastMessage = messages[messages.length - 1];
17661
17733
  if (lastMessage.role == "tool" &&
17662
17734
  lastMessage.content.filter((t) => t.type == "tool-result").length > 0) {
17663
- await sleep(200);
17735
+ await sleep(300);
17664
17736
  let result = await this.screenshot(agentContext);
17665
17737
  let image = toImage(result.imageBase64);
17666
17738
  messages.push({
@@ -17695,12 +17767,15 @@ class BaseComputerAgent extends Agent {
17695
17767
 
17696
17768
  class BaseBrowserAgent extends Agent {
17697
17769
  async go_back(agentContext) {
17698
- await this.execute_script(agentContext, () => {
17699
- return window.navigation.back();
17700
- }, []);
17701
- await sleep(200);
17770
+ try {
17771
+ await this.execute_script(agentContext, () => {
17772
+ window.navigation.back();
17773
+ }, []);
17774
+ await sleep(100);
17775
+ }
17776
+ catch (e) { }
17702
17777
  }
17703
- async extract_content(agentContext) {
17778
+ async extract_content(agentContext, variable_name) {
17704
17779
  let content = await this.execute_script(agentContext, () => {
17705
17780
  return window.document.body.innerText
17706
17781
  .replaceAll(/\n+/g, "\n")
@@ -17708,19 +17783,37 @@ class BaseBrowserAgent extends Agent {
17708
17783
  .trim();
17709
17784
  }, []);
17710
17785
  let pageInfo = await this.get_current_page(agentContext);
17711
- return `title: ${pageInfo.title}\npage_url: ${pageInfo.url}\npage_content: \n${content}`;
17786
+ let result = `title: ${pageInfo.title}\npage_url: ${pageInfo.url}\npage_content: \n${content}`;
17787
+ if (variable_name) {
17788
+ agentContext.context.variables.set(variable_name, result);
17789
+ }
17790
+ return result;
17712
17791
  }
17713
17792
  async controlMcpTools(agentContext, messages, loopNum) {
17714
- let url = (await this.get_current_page(agentContext)).url;
17715
- let lastUrl = agentContext.variables.get("lastUrl");
17716
- agentContext.variables.set("lastUrl", url);
17717
- return {
17718
- mcpTools: loopNum == 0 || url != lastUrl,
17719
- mcpParams: {
17720
- environment: "browser",
17721
- browser_url: url,
17722
- },
17723
- };
17793
+ if (loopNum > 0) {
17794
+ let url = null;
17795
+ try {
17796
+ url = (await this.get_current_page(agentContext)).url;
17797
+ }
17798
+ catch (e) { }
17799
+ let lastUrl = agentContext.variables.get("lastUrl");
17800
+ agentContext.variables.set("lastUrl", url);
17801
+ return {
17802
+ mcpTools: loopNum == 0 || url != lastUrl,
17803
+ mcpParams: {
17804
+ environment: "browser",
17805
+ browser_url: url,
17806
+ },
17807
+ };
17808
+ }
17809
+ else {
17810
+ return {
17811
+ mcpTools: true,
17812
+ mcpParams: {
17813
+ environment: "browser",
17814
+ },
17815
+ };
17816
+ }
17724
17817
  }
17725
17818
  toolExecuter(mcpClient, name) {
17726
17819
  return {
@@ -17734,7 +17827,7 @@ class BaseBrowserAgent extends Agent {
17734
17827
  environment: "browser",
17735
17828
  agent_name: agentContext.agent.Name,
17736
17829
  browser_url: agentContext.variables.get("lastUrl"),
17737
- }
17830
+ },
17738
17831
  });
17739
17832
  if (result.extInfo &&
17740
17833
  result.extInfo["javascript"] &&
@@ -17774,6 +17867,42 @@ class BaseBrowserAgent extends Agent {
17774
17867
  };
17775
17868
  }, []);
17776
17869
  }
17870
+ lastToolResult(messages) {
17871
+ let lastMessage = messages[messages.length - 1];
17872
+ if (lastMessage.role != "tool") {
17873
+ return null;
17874
+ }
17875
+ let toolResult = lastMessage.content.filter((t) => t.type == "tool-result")[0];
17876
+ if (!toolResult) {
17877
+ return null;
17878
+ }
17879
+ let result = toolResult.result;
17880
+ let isError = toolResult.isError;
17881
+ for (let i = messages.length - 2; i > 0; i--) {
17882
+ if (messages[i].role !== "assistant" ||
17883
+ typeof messages[i].content == "string") {
17884
+ continue;
17885
+ }
17886
+ for (let j = 0; j < messages[i].content.length; j++) {
17887
+ let content = messages[i].content[j];
17888
+ if (typeof content !== "string" && content.type !== "tool-call") {
17889
+ continue;
17890
+ }
17891
+ let toolUse = content;
17892
+ if (toolResult.toolCallId != toolUse.toolCallId) {
17893
+ continue;
17894
+ }
17895
+ return {
17896
+ id: toolResult.toolCallId,
17897
+ toolName: toolUse.toolName,
17898
+ args: toolUse.args,
17899
+ result,
17900
+ isError,
17901
+ };
17902
+ }
17903
+ }
17904
+ return null;
17905
+ }
17777
17906
  async execute_mcp_script(agentContext, script) {
17778
17907
  return;
17779
17908
  }
@@ -17800,7 +17929,7 @@ function run_build_dom_tree() {
17800
17929
  return window.clickable_elements[highlightIndex];
17801
17930
  }
17802
17931
  function remove_highlight() {
17803
- let highlight = document.getElementById('playwright-highlight-container');
17932
+ let highlight = document.getElementById('eko-highlight-container');
17804
17933
  if (highlight) {
17805
17934
  highlight.remove();
17806
17935
  }
@@ -17860,6 +17989,10 @@ function run_build_dom_tree() {
17860
17989
  for (let i = 0; i < includeAttributes.length; i++) {
17861
17990
  let key = includeAttributes[i];
17862
17991
  let value = node.attributes[key];
17992
+ if (key == "class" && value && value.length > 30) {
17993
+ let classList = value.split(" ").slice(0, 3);
17994
+ value = classList.join(" ");
17995
+ }
17863
17996
  if (key && value) {
17864
17997
  attributes_str += ` ${key}="${value}"`;
17865
17998
  }
@@ -17938,10 +18071,10 @@ function run_build_dom_tree() {
17938
18071
  let highlightIndex = 0; // Reset highlight index
17939
18072
  function highlightElement(element, index, parentIframe = null) {
17940
18073
  // Create or get highlight container
17941
- let container = document.getElementById('playwright-highlight-container');
18074
+ let container = document.getElementById('eko-highlight-container');
17942
18075
  if (!container) {
17943
18076
  container = document.createElement('div');
17944
- container.id = 'playwright-highlight-container';
18077
+ container.id = 'eko-highlight-container';
17945
18078
  container.style.position = 'fixed';
17946
18079
  container.style.pointerEvents = 'none';
17947
18080
  container.style.top = '0';
@@ -17994,7 +18127,7 @@ function run_build_dom_tree() {
17994
18127
  overlay.style.height = `${rect.height}px`;
17995
18128
  // Create label
17996
18129
  const label = document.createElement('div');
17997
- label.className = 'playwright-highlight-label';
18130
+ label.className = 'eko-highlight-label';
17998
18131
  label.style.position = 'absolute';
17999
18132
  label.style.background = baseColor;
18000
18133
  label.style.color = 'white';
@@ -18028,7 +18161,7 @@ function run_build_dom_tree() {
18028
18161
  container.appendChild(overlay);
18029
18162
  container.appendChild(label);
18030
18163
  // Store reference for cleanup
18031
- element.setAttribute('browser-user-highlight-id', `playwright-highlight-${index}`);
18164
+ element.setAttribute('eko-user-highlight-id', `eko-highlight-${index}`);
18032
18165
  return index + 1;
18033
18166
  }
18034
18167
  // Helper function to generate XPath as a tree
@@ -18373,15 +18506,17 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18373
18506
  - Screenshot help verify element positions and relationships. Labels may sometimes overlap, so extracted elements are used to verify the correct elements.
18374
18507
  - In addition to screenshot, simplified information about interactive elements is returned, with element indexes corresponding to those in the screenshot.
18375
18508
  - This tool can ONLY screenshot the VISIBLE content. If a complete content is required, use 'extract_content' instead.
18509
+ - If the webpage content hasn't loaded, please use the \`wait\` tool to allow time for the content to load.
18376
18510
  * ELEMENT INTERACTION:
18377
18511
  - Only use indexes that exist in the provided element list
18378
18512
  - Each element has a unique index number (e.g., "[33]:<button>")
18379
18513
  - Elements marked with "[]:" are non-interactive (for context only)
18380
- * NAVIGATION & ERROR HANDLING:
18514
+ * ERROR HANDLING:
18381
18515
  - If no suitable elements exist, use other functions to complete the task
18382
- - If stuck, try alternative approaches
18516
+ - If stuck, try alternative approaches, don't refuse tasks
18383
18517
  - Handle popups/cookies by accepting or closing them
18384
- - Use scroll to find elements you are looking for`;
18518
+ - Use scroll to find elements you are looking for
18519
+ - When extracting content, prioritize using extract_content, only scroll when you need to load more content`;
18385
18520
  const _tools_ = [];
18386
18521
  super({
18387
18522
  name: AGENT_NAME,
@@ -18399,6 +18534,9 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18399
18534
  }
18400
18535
  async input_text(agentContext, index, text, enter) {
18401
18536
  await this.execute_script(agentContext, typing, [{ index, text, enter }]);
18537
+ if (enter) {
18538
+ await sleep(200);
18539
+ }
18402
18540
  }
18403
18541
  async click_element(agentContext, index, num_clicks, button) {
18404
18542
  await this.execute_script(agentContext, do_click, [
@@ -18415,18 +18553,32 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18415
18553
  }
18416
18554
  async scroll_mouse_wheel(agentContext, amount) {
18417
18555
  await this.execute_script(agentContext, (amount) => {
18418
- window.scrollBy(0, amount * 50);
18556
+ let viewportHeight = window.innerHeight ||
18557
+ document.documentElement.clientHeight ||
18558
+ document.body.clientHeight;
18559
+ let y = Math.max(20, Math.min(viewportHeight / 10, 200));
18560
+ window.scrollBy(0, y * amount);
18419
18561
  }, [amount]);
18420
18562
  await sleep(200);
18421
18563
  }
18422
18564
  async hover_to_element(agentContext, index) {
18423
18565
  await this.execute_script(agentContext, hover_to, [{ index }]);
18424
18566
  }
18567
+ async get_select_options(agentContext, index) {
18568
+ return await this.execute_script(agentContext, get_select_options, [
18569
+ { index },
18570
+ ]);
18571
+ }
18572
+ async select_option(agentContext, index, option) {
18573
+ return await this.execute_script(agentContext, select_option, [
18574
+ { index, option },
18575
+ ]);
18576
+ }
18425
18577
  async screenshot_and_html(agentContext) {
18426
18578
  try {
18427
18579
  let element_result = null;
18428
18580
  for (let i = 0; i < 5; i++) {
18429
- await sleep(300);
18581
+ await sleep(200);
18430
18582
  await this.execute_script(agentContext, run_build_dom_tree, []);
18431
18583
  element_result = (await this.execute_script(agentContext, () => {
18432
18584
  return window.get_clickable_elements(true);
@@ -18435,7 +18587,9 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18435
18587
  break;
18436
18588
  }
18437
18589
  }
18590
+ await sleep(50);
18438
18591
  let screenshot = await this.screenshot(agentContext);
18592
+ // agentContext.variables.set("selector_map", element_result.selector_map);
18439
18593
  let pseudoHtml = element_result.element_str;
18440
18594
  return {
18441
18595
  imageBase64: screenshot.imageBase64,
@@ -18573,15 +18727,22 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18573
18727
  properties: {
18574
18728
  amount: {
18575
18729
  type: "number",
18576
- description: "Scroll amount (positive for up, negative for down)",
18577
- minimum: -10,
18730
+ description: "Scroll amount (up / down)",
18731
+ minimum: 1,
18578
18732
  maximum: 10,
18579
18733
  },
18734
+ direction: {
18735
+ type: "string",
18736
+ enum: ["up", "down"],
18737
+ },
18580
18738
  },
18581
- required: ["amount"],
18739
+ required: ["amount", "direction"],
18582
18740
  },
18583
18741
  execute: async (args, agentContext) => {
18584
- return await this.callInnerTool(() => this.scroll_mouse_wheel(agentContext, args.amount));
18742
+ return await this.callInnerTool(async () => {
18743
+ let amount = args.amount;
18744
+ await this.scroll_mouse_wheel(agentContext, args.direction == "up" ? -amount : amount);
18745
+ });
18585
18746
  },
18586
18747
  },
18587
18748
  {
@@ -18603,7 +18764,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18603
18764
  },
18604
18765
  {
18605
18766
  name: "extract_content",
18606
- description: "Extract the text content of the current webpage.",
18767
+ description: "Extract the text content of the current webpage, obtain webpage data through this tool.",
18607
18768
  parameters: {
18608
18769
  type: "object",
18609
18770
  properties: {},
@@ -18612,31 +18773,102 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18612
18773
  return await this.callInnerTool(() => this.extract_content(agentContext));
18613
18774
  },
18614
18775
  },
18776
+ {
18777
+ name: "get_select_options",
18778
+ description: "Get all options from a native dropdown element",
18779
+ parameters: {
18780
+ type: "object",
18781
+ properties: {
18782
+ index: {
18783
+ type: "number",
18784
+ description: "The index of the element to select",
18785
+ },
18786
+ },
18787
+ required: ["index"],
18788
+ },
18789
+ execute: async (args, agentContext) => {
18790
+ return await this.callInnerTool(() => this.get_select_options(agentContext, args.index));
18791
+ },
18792
+ },
18793
+ {
18794
+ name: "select_option",
18795
+ description: "Select the native dropdown option",
18796
+ parameters: {
18797
+ type: "object",
18798
+ properties: {
18799
+ index: {
18800
+ type: "number",
18801
+ description: "The index of the element to select",
18802
+ },
18803
+ option: {
18804
+ type: "string",
18805
+ description: "Text option",
18806
+ },
18807
+ },
18808
+ required: ["index", "option"],
18809
+ },
18810
+ execute: async (args, agentContext) => {
18811
+ return await this.callInnerTool(() => this.select_option(agentContext, args.index, args.option));
18812
+ },
18813
+ },
18814
+ {
18815
+ name: "get_all_tabs",
18816
+ description: "Get all tabs of the current browser",
18817
+ parameters: {
18818
+ type: "object",
18819
+ properties: {},
18820
+ },
18821
+ execute: async (args, agentContext) => {
18822
+ return await this.callInnerTool(() => this.get_all_tabs(agentContext));
18823
+ },
18824
+ },
18825
+ {
18826
+ name: "switch_tab",
18827
+ description: "Switch to the specified tab page",
18828
+ parameters: {
18829
+ type: "object",
18830
+ properties: {
18831
+ tabId: {
18832
+ type: "number",
18833
+ description: "Tab ID, obtained through get_all_tabs",
18834
+ },
18835
+ },
18836
+ required: ["tabId"],
18837
+ },
18838
+ execute: async (args, agentContext) => {
18839
+ return await this.callInnerTool(() => this.switch_tab(agentContext, args.tabId));
18840
+ },
18841
+ },
18615
18842
  {
18616
18843
  name: "wait",
18844
+ noPlan: true,
18617
18845
  description: "Wait for specified duration",
18618
18846
  parameters: {
18619
18847
  type: "object",
18620
18848
  properties: {
18621
18849
  duration: {
18622
18850
  type: "number",
18623
- description: "Duration in seconds",
18624
- default: 0.5,
18851
+ description: "Duration in millisecond",
18852
+ default: 500,
18853
+ minimum: 200,
18854
+ maximum: 2000,
18625
18855
  },
18626
18856
  },
18627
18857
  required: ["duration"],
18628
18858
  },
18629
18859
  execute: async (args, agentContext) => {
18630
- return await this.callInnerTool(() => sleep((args.duration || 0.5) * 1000));
18860
+ return await this.callInnerTool(() => sleep((args.duration || 200)));
18631
18861
  },
18632
18862
  },
18633
18863
  ];
18634
18864
  }
18635
18865
  async handleMessages(agentContext, messages) {
18636
- let lastMessage = messages[messages.length - 1];
18637
- if (lastMessage.role == "tool" &&
18638
- lastMessage.content.filter((t) => t.type == "tool-result").length > 0) {
18639
- await sleep(200);
18866
+ let lastTool = this.lastToolResult(messages);
18867
+ if (lastTool &&
18868
+ lastTool.toolName !== "extract_content" &&
18869
+ lastTool.toolName !== "get_all_tabs" &&
18870
+ lastTool.toolName !== "variable_storage") {
18871
+ await sleep(300);
18640
18872
  let result = await this.screenshot_and_html(agentContext);
18641
18873
  let image = toImage(result.imageBase64);
18642
18874
  messages.push({
@@ -18693,6 +18925,10 @@ function typing(params) {
18693
18925
  }
18694
18926
  else {
18695
18927
  input.value = text;
18928
+ if (input.__proto__) {
18929
+ let value_setter = Object.getOwnPropertyDescriptor(input.__proto__, "value")?.set;
18930
+ value_setter && value_setter.call(input, text);
18931
+ }
18696
18932
  }
18697
18933
  input.dispatchEvent(new Event("input", { bubbles: true }));
18698
18934
  if (enter) {
@@ -18752,6 +18988,45 @@ function hover_to(params) {
18752
18988
  element.dispatchEvent(event);
18753
18989
  return true;
18754
18990
  }
18991
+ function get_select_options(params) {
18992
+ let element = window.get_highlight_element(params.index);
18993
+ if (!element || element.tagName.toUpperCase() !== "SELECT") {
18994
+ return "Error: Not a select element";
18995
+ }
18996
+ return {
18997
+ options: Array.from(element.options).map((opt) => ({
18998
+ index: opt.index,
18999
+ text: opt.text.trim(),
19000
+ value: opt.value,
19001
+ })),
19002
+ name: element.name,
19003
+ };
19004
+ }
19005
+ function select_option(params) {
19006
+ let element = window.get_highlight_element(params.index);
19007
+ if (!element || element.tagName.toUpperCase() !== "SELECT") {
19008
+ return "Error: Not a select element";
19009
+ }
19010
+ let text = params.option.trim();
19011
+ let option = Array.from(element.options).find((opt) => opt.text.trim() === text);
19012
+ if (!option) {
19013
+ option = Array.from(element.options).find((opt) => opt.value.trim() === text);
19014
+ }
19015
+ if (!option) {
19016
+ return {
19017
+ success: false,
19018
+ error: "Select Option not found",
19019
+ availableOptions: Array.from(element.options).map((o) => o.text.trim()),
19020
+ };
19021
+ }
19022
+ element.value = option.value;
19023
+ element.dispatchEvent(new Event("change"));
19024
+ return {
19025
+ success: true,
19026
+ selectedValue: option.value,
19027
+ selectedText: option.text.trim(),
19028
+ };
19029
+ }
18755
19030
 
18756
19031
  class BaseBrowserScreenAgent extends BaseBrowserAgent {
18757
19032
  constructor(llms, ext_tools, mcpClient) {
@@ -18773,7 +19048,7 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
18773
19048
  tools: _tools_,
18774
19049
  llms: llms,
18775
19050
  mcpClient: mcpClient,
18776
- planDescription: "Browser operation agent, interact with the browser using the mouse and keyboard."
19051
+ planDescription: "Browser operation agent, interact with the browser using the mouse and keyboard.",
18777
19052
  });
18778
19053
  let init_tools = this.buildInitTools();
18779
19054
  if (ext_tools && ext_tools.length > 0) {
@@ -18901,20 +19176,27 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
18901
19176
  properties: {
18902
19177
  amount: {
18903
19178
  type: "number",
18904
- description: "Scroll amount (positive for up, negative for down)",
18905
- minimum: -10,
19179
+ description: "Scroll amount (up / down)",
19180
+ minimum: 1,
18906
19181
  maximum: 10,
18907
19182
  },
19183
+ direction: {
19184
+ type: "string",
19185
+ enum: ["up", "down"],
19186
+ },
18908
19187
  },
18909
- required: ["amount"],
19188
+ required: ["amount", "direction"],
18910
19189
  },
18911
19190
  execute: async (args, agentContext) => {
18912
- return await this.callInnerTool(() => this.scroll(agentContext, args.amount));
19191
+ return await this.callInnerTool(async () => {
19192
+ let amount = args.amount;
19193
+ await this.scroll(agentContext, args.direction == "up" ? -amount : amount);
19194
+ });
18913
19195
  },
18914
19196
  },
18915
19197
  {
18916
19198
  name: "extract_content",
18917
- description: "Extract the text content of the current webpage.",
19199
+ description: "Extract the text content of the current webpage, obtain webpage data through this tool.",
18918
19200
  parameters: {
18919
19201
  type: "object",
18920
19202
  properties: {},
@@ -18970,31 +19252,64 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
18970
19252
  return await this.callInnerTool(() => this.drag_and_drop(agentContext, args.x1, args.y1, args.x2, args.y2));
18971
19253
  },
18972
19254
  },
19255
+ {
19256
+ name: "get_all_tabs",
19257
+ description: "Get all tabs of the current browser",
19258
+ parameters: {
19259
+ type: "object",
19260
+ properties: {},
19261
+ },
19262
+ execute: async (args, agentContext) => {
19263
+ return await this.callInnerTool(() => this.get_all_tabs(agentContext));
19264
+ },
19265
+ },
19266
+ {
19267
+ name: "switch_tab",
19268
+ description: "Switch to the specified tab page",
19269
+ parameters: {
19270
+ type: "object",
19271
+ properties: {
19272
+ tabId: {
19273
+ type: "number",
19274
+ description: "Tab ID, obtained through get_all_tabs",
19275
+ },
19276
+ },
19277
+ required: ["tabId"],
19278
+ },
19279
+ execute: async (args, agentContext) => {
19280
+ return await this.callInnerTool(() => this.switch_tab(agentContext, args.tabId));
19281
+ },
19282
+ },
18973
19283
  {
18974
19284
  name: "wait",
19285
+ noPlan: true,
18975
19286
  description: "Wait for specified duration",
18976
19287
  parameters: {
18977
19288
  type: "object",
18978
19289
  properties: {
18979
19290
  duration: {
18980
19291
  type: "number",
18981
- description: "Duration in seconds",
18982
- default: 0.5,
19292
+ description: "Duration in millisecond",
19293
+ default: 500,
19294
+ minimum: 200,
19295
+ maximum: 2000,
18983
19296
  },
18984
19297
  },
18985
19298
  required: ["duration"],
18986
19299
  },
18987
19300
  execute: async (args, agentContext) => {
18988
- return await this.callInnerTool(() => sleep((args.duration || 0.5) * 1000));
19301
+ return await this.callInnerTool(() => sleep((args.duration || 200)));
18989
19302
  },
18990
19303
  },
18991
19304
  ];
18992
19305
  }
18993
19306
  async handleMessages(agentContext, messages) {
18994
- let lastMessage = messages[messages.length - 1];
18995
- if (lastMessage.role == "tool" &&
18996
- lastMessage.content.filter((t) => t.type == "tool-result").length > 0) {
18997
- await sleep(200);
19307
+ let lastTool = this.lastToolResult(messages);
19308
+ if (lastTool &&
19309
+ lastTool.toolName !== "extract_content" &&
19310
+ lastTool.toolName !== "get_all_tabs" &&
19311
+ lastTool.toolName !== "variable_storage") {
19312
+ await sleep(300);
18998
19313
  let result = await this.screenshot(agentContext);
18999
19314
  let image = toImage(result.imageBase64);
19000
19315
  messages.push({