@eko-ai/eko 2.0.2 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.esm.js CHANGED
@@ -1,7 +1,8 @@
1
1
  const config = {
2
2
  name: "Fellou",
3
3
  platform: "mac",
4
- maxReactNum: 100
4
+ maxReactNum: 100,
5
+ maxTokens: 16000
5
6
  };
6
7
 
7
8
  var LogLevel;
@@ -8556,6 +8557,15 @@ function mergeAgents(agents1, agents2) {
8556
8557
  }
8557
8558
  return tools;
8558
8559
  }
8560
+ function sub(str, maxLength, appendPoint = true) {
8561
+ if (!str) {
8562
+ return "";
8563
+ }
8564
+ if (str.length > maxLength) {
8565
+ return str.substring(0, maxLength) + (appendPoint ? "..." : "");
8566
+ }
8567
+ return str;
8568
+ }
8559
8569
  function fixXmlTag(code) {
8560
8570
  function fixDoubleChar(code) {
8561
8571
  const stack = [];
@@ -12728,7 +12738,7 @@ class RetryLanguageModel {
12728
12738
  constructor(llms, names, stream_first_timeout) {
12729
12739
  this.llms = llms;
12730
12740
  this.names = names || [];
12731
- this.stream_first_timeout = stream_first_timeout || 20000;
12741
+ this.stream_first_timeout = stream_first_timeout || 30000;
12732
12742
  if (this.names.indexOf("default") == -1) {
12733
12743
  this.names.push("default");
12734
12744
  }
@@ -12742,7 +12752,7 @@ class RetryLanguageModel {
12742
12752
  toolChoice: request.toolChoice,
12743
12753
  },
12744
12754
  prompt: request.messages,
12745
- maxTokens: request.maxTokens,
12755
+ maxTokens: request.maxTokens || config.maxTokens,
12746
12756
  temperature: request.temperature,
12747
12757
  topP: request.topP,
12748
12758
  topK: request.topK,
@@ -12765,6 +12775,9 @@ class RetryLanguageModel {
12765
12775
  return result;
12766
12776
  }
12767
12777
  catch (e) {
12778
+ if (e?.name === "AbortError") {
12779
+ throw e;
12780
+ }
12768
12781
  if (Log.isEnableInfo()) {
12769
12782
  Log.info(`LLM nonstream request, name: ${name} => `, {
12770
12783
  tools: options.mode?.tools,
@@ -12785,7 +12798,7 @@ class RetryLanguageModel {
12785
12798
  toolChoice: request.toolChoice,
12786
12799
  },
12787
12800
  prompt: request.messages,
12788
- maxTokens: request.maxTokens,
12801
+ maxTokens: request.maxTokens || config.maxTokens,
12789
12802
  temperature: request.temperature,
12790
12803
  topP: request.topP,
12791
12804
  topK: request.topK,
@@ -12801,12 +12814,19 @@ class RetryLanguageModel {
12801
12814
  continue;
12802
12815
  }
12803
12816
  try {
12804
- const result = await call_timeout(async () => await llm.doStream(options), this.stream_first_timeout);
12817
+ const controller = new AbortController();
12818
+ const signal = options.abortSignal
12819
+ ? AbortSignal.any([options.abortSignal, controller.signal])
12820
+ : controller.signal;
12821
+ const result = await call_timeout(async () => await llm.doStream({ ...options, abortSignal: signal }), this.stream_first_timeout, (e) => {
12822
+ controller.abort();
12823
+ });
12805
12824
  const stream = result.stream;
12806
12825
  const reader = stream.getReader();
12807
12826
  const { done, value } = await call_timeout(async () => await reader.read(), this.stream_first_timeout, (e) => {
12808
12827
  reader.cancel();
12809
12828
  reader.releaseLock();
12829
+ controller.abort();
12810
12830
  });
12811
12831
  if (done) {
12812
12832
  Log.warn(`LLM stream done, name: ${name} => `, { done, value });
@@ -12826,6 +12846,9 @@ class RetryLanguageModel {
12826
12846
  return result;
12827
12847
  }
12828
12848
  catch (e) {
12849
+ if (e?.name === "AbortError") {
12850
+ throw e;
12851
+ }
12829
12852
  if (Log.isEnableInfo()) {
12830
12853
  Log.info(`LLM stream request, name: ${name} => `, {
12831
12854
  tools: options.mode?.tools,
@@ -15624,6 +15647,7 @@ const TOOL_NAME$3 = "human_interact";
15624
15647
  class HumanInteractTool {
15625
15648
  constructor() {
15626
15649
  this.name = TOOL_NAME$3;
15650
+ this.noPlan = true;
15627
15651
  this.description = `AI interacts with humans:
15628
15652
  confirm: Ask the user to confirm whether to execute an operation, especially when performing dangerous actions such as deleting system files.
15629
15653
  input: Prompt the user to enter text; for example, when a task is ambiguous, the AI can choose to ask the user for details, and the user can respond by inputting.
@@ -15637,93 +15661,57 @@ request_help: Request assistance from the user; for instance, when an operation
15637
15661
  description: "The type of interaction with users.",
15638
15662
  enum: ["confirm", "input", "select", "request_help"],
15639
15663
  },
15640
- confirm: {
15641
- type: "object",
15642
- properties: {
15643
- prompt: {
15644
- type: "string",
15645
- description: "Display prompts to users",
15646
- },
15647
- },
15648
- required: ["prompt"],
15664
+ prompt: {
15665
+ type: "string",
15666
+ description: "Display prompts to users",
15649
15667
  },
15650
- input: {
15651
- type: "object",
15652
- properties: {
15653
- prompt: {
15654
- type: "string",
15655
- description: "Display prompts to users",
15656
- },
15668
+ selectOptions: {
15669
+ type: "array",
15670
+ description: "Options provided to users, this parameter is required when interactType is select.",
15671
+ items: {
15672
+ type: "string",
15657
15673
  },
15658
- required: ["prompt"],
15659
15674
  },
15660
- select: {
15661
- type: "object",
15662
- properties: {
15663
- prompt: {
15664
- type: "string",
15665
- description: "Display prompts to users",
15666
- },
15667
- options: {
15668
- type: "array",
15669
- description: "Options provided to the user",
15670
- items: {
15671
- type: "string",
15672
- },
15673
- },
15674
- multiple: {
15675
- type: "boolean",
15676
- },
15677
- },
15678
- required: ["prompt", "options"],
15675
+ selectMultiple: {
15676
+ type: "boolean",
15677
+ description: "isMultiple, used when interactType is select",
15679
15678
  },
15680
- request_help: {
15681
- type: "object",
15682
- properties: {
15683
- helpType: {
15684
- type: "string",
15685
- description: "Display prompts to users",
15686
- enum: ["request_login", "request_assistance"],
15687
- },
15688
- prompt: {
15689
- type: "string",
15690
- description: "Display prompts to users",
15691
- },
15692
- },
15693
- required: ["helpType", "prompt"],
15679
+ helpType: {
15680
+ type: "string",
15681
+ description: "Help type, required when interactType is request_help.",
15682
+ enum: ["request_login", "request_assistance"],
15694
15683
  },
15695
15684
  },
15696
- required: ["interactType"],
15685
+ required: ["interactType", "prompt"],
15697
15686
  };
15698
15687
  }
15699
15688
  async execute(args, agentContext) {
15700
15689
  let interactType = args.interactType;
15701
- let interact = args[interactType];
15702
15690
  let callback = agentContext.context.config.callback;
15703
15691
  let resultText = "";
15704
15692
  if (callback) {
15705
15693
  switch (interactType) {
15706
15694
  case "confirm":
15707
15695
  if (callback.onHumanConfirm) {
15708
- let result = await callback.onHumanConfirm(agentContext, interact.prompt);
15696
+ let result = await callback.onHumanConfirm(agentContext, args.prompt);
15709
15697
  resultText = `confirm result: ${result ? "Yes" : "No"}`;
15710
15698
  }
15711
15699
  break;
15712
15700
  case "input":
15713
15701
  if (callback.onHumanInput) {
15714
- let result = await callback.onHumanInput(agentContext, interact.prompt);
15702
+ let result = await callback.onHumanInput(agentContext, args.prompt);
15715
15703
  resultText = `input result: ${result}`;
15716
15704
  }
15717
15705
  break;
15718
15706
  case "select":
15719
15707
  if (callback.onHumanSelect) {
15720
- let result = await callback.onHumanSelect(agentContext, interact.prompt, interact.options, interact.multiple);
15708
+ let result = await callback.onHumanSelect(agentContext, args.prompt, (args.selectOptions || []), (args.selectMultiple || false));
15721
15709
  resultText = `select result: ${JSON.stringify(result)}`;
15722
15710
  }
15723
15711
  break;
15724
15712
  case "request_help":
15725
15713
  if (callback.onHumanHelp) {
15726
- let result = await callback.onHumanHelp(agentContext, interact.helpType, interact.prompt);
15714
+ let result = await callback.onHumanHelp(agentContext, (args.helpType || "request_assistance"), args.prompt);
15727
15715
  resultText = `request_help result: ${result ? "Solved" : "Unresolved"}`;
15728
15716
  }
15729
15717
  break;
@@ -15897,7 +15885,7 @@ class WatchTriggerTool {
15897
15885
  };
15898
15886
  }
15899
15887
  async execute(args, agentContext) {
15900
- // TODO 监听 dom 文件 改变,执行节点
15888
+ // TODO Listen for changes to the DOM or file, and execute nodes
15901
15889
  return null;
15902
15890
  }
15903
15891
  }
@@ -15936,11 +15924,15 @@ UTC datetime: {datetime}
15936
15924
  </root>
15937
15925
  `;
15938
15926
  const HUMAN_PROMPT = `
15939
- During the task execution process, you can use the \`${TOOL_NAME$3}\` tool to interact with humans. Please do not abuse this tool to harass humans. Please call it in the following situations:
15927
+ * HUMAN INTERACT
15928
+ During the task execution process, you can use the \`${TOOL_NAME$3}\` tool to interact with humans, please call it in the following situations:
15940
15929
  - When performing dangerous operations such as deleting files, confirmation from humans is required
15941
15930
  - When encountering obstacles while accessing websites, such as requiring user login, you need to request human assistance
15931
+ - When requesting login, please only call the function when a login dialog box is clearly displayed.
15932
+ - Try not to use the \`${TOOL_NAME$3}\` tool
15942
15933
  `;
15943
15934
  const VARIABLE_PROMPT = `
15935
+ * VARIABLE STORAGE
15944
15936
  If you need to read and write the input/output variables in the node, require the use of the \`${TOOL_NAME$1}\` tool.
15945
15937
  `;
15946
15938
  const FOR_EACH_NODE = `
@@ -15965,14 +15957,18 @@ const WATCH_NODE = `
15965
15957
  const WATCH_PROMPT = `
15966
15958
  \`watch\`: monitor changes in webpage DOM or file content, when executing to the watch node, require the use of the \`${TOOL_NAME}\` tool.
15967
15959
  `;
15968
- function getAgentSystemPrompt(agent, agentNode, context, systemPrompt) {
15969
- let prompt = "";
15960
+ function getAgentSystemPrompt(agent, agentNode, context, tools, extSysPrompt) {
15961
+ let prompt = extSysPrompt || "";
15970
15962
  let nodePrompt = "";
15971
15963
  let agentNodeXml = agentNode.xml;
15972
- let hasForEach = agentNodeXml.indexOf("</forEach>") > -1;
15973
15964
  let hasWatch = agentNodeXml.indexOf("</watch>") > -1;
15974
- let hasVariable = agentNodeXml.indexOf(" input=") > -1 || agentNodeXml.indexOf(" output=") > -1;
15975
- let hasHumanTool = agent.Tools.filter((tool) => tool.name == TOOL_NAME$3).length > 0;
15965
+ let hasForEach = agentNodeXml.indexOf("</forEach>") > -1;
15966
+ let hasHumanTool = (tools || agent.Tools).filter((tool) => tool.name == TOOL_NAME$3)
15967
+ .length > 0;
15968
+ let hasVariable = agentNodeXml.indexOf("input=") > -1 ||
15969
+ agentNodeXml.indexOf("output=") > -1 ||
15970
+ (tools || agent.Tools).filter((tool) => tool.name == TOOL_NAME$1)
15971
+ .length > 0;
15976
15972
  if (hasHumanTool) {
15977
15973
  prompt += HUMAN_PROMPT;
15978
15974
  }
@@ -15987,8 +15983,17 @@ function getAgentSystemPrompt(agent, agentNode, context, systemPrompt) {
15987
15983
  prompt += WATCH_PROMPT;
15988
15984
  nodePrompt += WATCH_NODE;
15989
15985
  }
15990
- return (systemPrompt || AGENT_SYSTEM_TEMPLATE)
15991
- .replace("{name}", config.name)
15986
+ if (context.chain.agents.length > 1) {
15987
+ prompt += "\n Main task: " + context.chain.taskPrompt;
15988
+ prompt += "\n# Pre-task execution results";
15989
+ for (let i = 0; i < context.chain.agents.length; i++) {
15990
+ let agentChain = context.chain.agents[i];
15991
+ if (agentChain.agentResult) {
15992
+ prompt += `\n## ${agentChain.agent.task || agentChain.agent.name}\n${sub(agentChain.agentResult, 500)}`;
15993
+ }
15994
+ }
15995
+ }
15996
+ return AGENT_SYSTEM_TEMPLATE.replace("{name}", config.name)
15992
15997
  .replace("{agent}", agent.Name)
15993
15998
  .replace("{description}", agent.Description)
15994
15999
  .replace("{datetime}", new Date().toISOString())
@@ -15996,8 +16001,9 @@ function getAgentSystemPrompt(agent, agentNode, context, systemPrompt) {
15996
16001
  .replace("{nodePrompt}", nodePrompt)
15997
16002
  .trim();
15998
16003
  }
15999
- function getAgentUserPrompt(agent, agentNode, context) {
16000
- let hasTaskNodeStatusTool = agent.Tools.filter((tool) => tool.name == TOOL_NAME$2).length > 0;
16004
+ function getAgentUserPrompt(agent, agentNode, context, tools) {
16005
+ let hasTaskNodeStatusTool = (tools || agent.Tools).filter((tool) => tool.name == TOOL_NAME$2)
16006
+ .length > 0;
16001
16007
  return buildAgentRootXml(agentNode.xml, context.chain.taskPrompt, (nodeId, node) => {
16002
16008
  if (hasTaskNodeStatusTool) {
16003
16009
  node.setAttribute("status", "todo");
@@ -16030,9 +16036,10 @@ class Agent {
16030
16036
  let loopNum = 0;
16031
16037
  let context = agentContext.context;
16032
16038
  let agentNode = agentContext.agentChain.agent;
16033
- let messages = this.initMessages(agentContext);
16039
+ const tools = [...this.tools, ...this.system_auto_tools(agentNode)];
16040
+ let messages = await this.initMessages(agentContext, tools);
16034
16041
  let rlm = new RetryLanguageModel(context.config.llms, this.llms);
16035
- let agentTools = [...this.tools, ...this.system_auto_tools(agentNode)];
16042
+ let agentTools = tools;
16036
16043
  while (loopNum < maxReactNum) {
16037
16044
  context.checkAborted();
16038
16045
  if (mcpClient) {
@@ -16040,7 +16047,7 @@ class Agent {
16040
16047
  if (controlMcp.mcpTools) {
16041
16048
  let mcpTools = await this.listTools(agentNode, context, mcpClient, controlMcp.mcpParams);
16042
16049
  let usedTools = this.extractUsedTool(messages, agentTools);
16043
- let _agentTools = mergeTools(this.tools, usedTools);
16050
+ let _agentTools = mergeTools(tools, usedTools);
16044
16051
  agentTools = mergeTools(_agentTools, mcpTools);
16045
16052
  }
16046
16053
  }
@@ -16052,7 +16059,7 @@ class Agent {
16052
16059
  }
16053
16060
  loopNum++;
16054
16061
  }
16055
- return null;
16062
+ return "Unfinished";
16056
16063
  }
16057
16064
  async handleResult(agentContext, messages, agentTools, results) {
16058
16065
  let text = null;
@@ -16132,8 +16139,8 @@ class Agent {
16132
16139
  system_auto_tools(agentNode) {
16133
16140
  let tools = [];
16134
16141
  let agentNodeXml = agentNode.xml;
16135
- let hasVariable = agentNodeXml.indexOf(" input=") > -1 ||
16136
- agentNodeXml.indexOf(" output=") > -1;
16142
+ let hasVariable = agentNodeXml.indexOf("input=") > -1 ||
16143
+ agentNodeXml.indexOf("output=") > -1;
16137
16144
  if (hasVariable) {
16138
16145
  tools.push(new VariableStorageTool());
16139
16146
  }
@@ -16162,24 +16169,27 @@ class Agent {
16162
16169
  }
16163
16170
  return _results;
16164
16171
  }
16165
- initMessages(agentContext) {
16172
+ async initMessages(agentContext, tools) {
16166
16173
  let messages = [
16167
16174
  {
16168
16175
  role: "system",
16169
- content: getAgentSystemPrompt(this, agentContext.agentChain.agent, agentContext.context),
16176
+ content: getAgentSystemPrompt(this, agentContext.agentChain.agent, agentContext.context, tools, await this.extSysPrompt(agentContext)),
16170
16177
  },
16171
16178
  {
16172
16179
  role: "user",
16173
16180
  content: [
16174
16181
  {
16175
16182
  type: "text",
16176
- text: getAgentUserPrompt(this, agentContext.agentChain.agent, agentContext.context),
16183
+ text: getAgentUserPrompt(this, agentContext.agentChain.agent, agentContext.context, tools),
16177
16184
  },
16178
16185
  ],
16179
16186
  },
16180
16187
  ];
16181
16188
  return messages;
16182
16189
  }
16190
+ async extSysPrompt(agentContext) {
16191
+ return "";
16192
+ }
16183
16193
  async listTools(agentNode, context, mcpClient, mcpParams) {
16184
16194
  let list = await mcpClient.listTools({
16185
16195
  taskId: context.taskId,
@@ -16215,7 +16225,7 @@ class Agent {
16215
16225
  nodeId: agentContext.agentChain.agent.id,
16216
16226
  environment: config.platform,
16217
16227
  agent_name: agentContext.agent.Name,
16218
- }
16228
+ },
16219
16229
  });
16220
16230
  },
16221
16231
  };
@@ -16243,7 +16253,7 @@ class Agent {
16243
16253
  let message = messages[i];
16244
16254
  if (message.role == "tool") {
16245
16255
  for (let j = 0; j < message.content.length; j++) {
16246
- let toolName = message.content[i].toolName;
16256
+ let toolName = message.content[j].toolName;
16247
16257
  if (toolNames.indexOf(toolName) > -1) {
16248
16258
  continue;
16249
16259
  }
@@ -16287,12 +16297,17 @@ class Agent {
16287
16297
  else if (!isError && text.length == 0) {
16288
16298
  text = "Successful";
16289
16299
  }
16290
- let result = { result: text };
16300
+ let contentText = {
16301
+ type: "text",
16302
+ text: text,
16303
+ };
16304
+ let result = text;
16291
16305
  if (text &&
16292
16306
  ((text.startsWith("{") && text.endsWith("}")) ||
16293
16307
  (text.startsWith("[") && text.endsWith("]")))) {
16294
16308
  try {
16295
16309
  result = JSON.parse(text);
16310
+ contentText = null;
16296
16311
  }
16297
16312
  catch (e) { }
16298
16313
  }
@@ -16301,6 +16316,7 @@ class Agent {
16301
16316
  toolCallId: toolUse.toolCallId,
16302
16317
  toolName: toolUse.toolName,
16303
16318
  result: result,
16319
+ content: contentText ? [contentText] : undefined,
16304
16320
  isError: isError,
16305
16321
  };
16306
16322
  }
@@ -16575,7 +16591,7 @@ Your task is to understand the user's requirements, dynamically plan the user's
16575
16591
  2. Analyze the Agents that need to be used based on the user's requirements.
16576
16592
  3. Generate the Agent calling plan based on the analysis results.
16577
16593
  4. About agent name, please do not arbitrarily fabricate non-existent agent names.
16578
- 5. You only need to provide the steps to complete the user's task, steps are simple and straightforward, no need for too many specific details.
16594
+ 5. You only need to provide the steps to complete the user's task, key steps only, no need to be too detailed.
16579
16595
  6. Please strictly follow the output format and example output.
16580
16596
  7. The output language should follow the language corresponding to the user's task.
16581
16597
 
@@ -16726,15 +16742,22 @@ const PLAN_USER_TEMPLATE = `
16726
16742
  User Platform: {platform}
16727
16743
  Task Description: {taskPrompt}
16728
16744
  `;
16745
+ const PLAN_USER_TASK_WEBSITE_TEMPLATE = `
16746
+ User Platform: {platform}
16747
+ Task Website: {task_website}
16748
+ Task Description: {taskPrompt}
16749
+ `;
16729
16750
  function getPlanSystemPrompt(agents) {
16730
16751
  let agents_prompt = agents
16731
16752
  .map((agent) => {
16732
16753
  return (`<agent name="${agent.Name}">\n` +
16733
16754
  `Description: ${agent.PlanDescription || agent.Description}\nTools:\n` +
16734
- agent.Tools.map((tool) => `- ${tool.name}: ${tool.description || ""}`).join("\n") +
16755
+ agent.Tools.filter((tool) => !tool.noPlan)
16756
+ .map((tool) => `- ${tool.name}: ${tool.planDescription || tool.description || ""}`)
16757
+ .join("\n") +
16735
16758
  `\n</agent>`);
16736
16759
  })
16737
- .join("\n");
16760
+ .join("\n\n");
16738
16761
  let example_prompt = "";
16739
16762
  let hasChatAgent = agents.filter((a) => a.Name == AGENT_NAME$4).length > 0;
16740
16763
  const example_list = hasChatAgent
@@ -16749,10 +16772,18 @@ function getPlanSystemPrompt(agents) {
16749
16772
  .replace("{example_prompt}", example_prompt)
16750
16773
  .trim();
16751
16774
  }
16752
- function getPlanUserPrompt(taskPrompt) {
16753
- return PLAN_USER_TEMPLATE.replace("{taskPrompt}", taskPrompt)
16754
- .replace("{platform}", config.platform)
16755
- .trim();
16775
+ function getPlanUserPrompt(taskPrompt, task_website) {
16776
+ if (task_website) {
16777
+ return PLAN_USER_TASK_WEBSITE_TEMPLATE.replace("{taskPrompt}", taskPrompt)
16778
+ .replace("{platform}", config.platform)
16779
+ .replace("{task_website}", task_website)
16780
+ .trim();
16781
+ }
16782
+ else {
16783
+ return PLAN_USER_TEMPLATE.replace("{taskPrompt}", taskPrompt)
16784
+ .replace("{platform}", config.platform)
16785
+ .trim();
16786
+ }
16756
16787
  }
16757
16788
 
16758
16789
  class Planner {
@@ -16781,7 +16812,7 @@ class Planner {
16781
16812
  {
16782
16813
  role: "user",
16783
16814
  content: [{ type: "text", text: taskPrompt }],
16784
- }
16815
+ },
16785
16816
  ];
16786
16817
  }
16787
16818
  else {
@@ -16789,12 +16820,17 @@ class Planner {
16789
16820
  { role: "system", content: getPlanSystemPrompt(this.context.agents) },
16790
16821
  {
16791
16822
  role: "user",
16792
- content: [{ type: "text", text: getPlanUserPrompt(taskPrompt) }],
16823
+ content: [
16824
+ {
16825
+ type: "text",
16826
+ text: getPlanUserPrompt(taskPrompt, this.context.variables.get("task_website")),
16827
+ },
16828
+ ],
16793
16829
  },
16794
16830
  ];
16795
16831
  }
16796
16832
  let request = {
16797
- maxTokens: 1024,
16833
+ maxTokens: 4096,
16798
16834
  temperature: 0.7,
16799
16835
  messages: messages,
16800
16836
  abortSignal: this.context.controller.signal,
@@ -16898,30 +16934,33 @@ class Eko {
16898
16934
  throw new Error("The task does not exist");
16899
16935
  }
16900
16936
  try {
16901
- return this.doRunWorkflow(context);
16937
+ return await this.doRunWorkflow(context);
16902
16938
  }
16903
16939
  catch (e) {
16904
16940
  return {
16941
+ taskId,
16905
16942
  success: false,
16906
16943
  stopReason: e?.name == "AbortError" ? "abort" : "error",
16907
16944
  result: e,
16908
16945
  };
16909
16946
  }
16910
- finally {
16911
- this.deleteTask(taskId);
16912
- }
16913
16947
  }
16914
16948
  async run(taskPrompt, taskId = uuidv4(), contextParams) {
16915
16949
  await this.generate(taskPrompt, taskId, contextParams);
16916
16950
  return await this.execute(taskId);
16917
16951
  }
16918
16952
  async initContext(workflow, contextParams) {
16919
- const agents = [...(this.config.agents || [])];
16953
+ const agents = this.config.agents || [];
16920
16954
  let chain = new Chain(workflow.taskPrompt || workflow.name);
16921
16955
  let context = new Context(workflow.taskId, this.config, agents, chain);
16956
+ if (this.config.a2aClient) {
16957
+ let a2aList = await this.config.a2aClient.listAgents(workflow.taskPrompt || workflow.name);
16958
+ context.agents = mergeAgents(context.agents, a2aList);
16959
+ }
16922
16960
  if (contextParams) {
16923
16961
  Object.keys(contextParams).forEach((key) => context.variables.set(key, contextParams[key]));
16924
16962
  }
16963
+ context.workflow = workflow;
16925
16964
  this.taskMap.set(workflow.taskId, context);
16926
16965
  return context;
16927
16966
  }
@@ -16935,7 +16974,7 @@ class Eko {
16935
16974
  map[item.Name] = item;
16936
16975
  return map;
16937
16976
  }, {});
16938
- let lastResult;
16977
+ let results = [];
16939
16978
  for (let i = 0; i < workflow.agents.length; i++) {
16940
16979
  context.checkAborted();
16941
16980
  let agentNode = workflow.agents[i];
@@ -16946,18 +16985,21 @@ class Eko {
16946
16985
  let agentChain = new AgentChain(agentNode);
16947
16986
  context.chain.push(agentChain);
16948
16987
  agent.result = await agent.run(context, agentChain);
16949
- lastResult = agent.result;
16988
+ results.push(agent.result);
16950
16989
  }
16951
- // TODO 超过2个Agent时需要summary输出结果。
16952
16990
  return {
16953
16991
  success: true,
16954
16992
  stopReason: "done",
16955
- result: lastResult,
16993
+ result: results[results.length - 1],
16994
+ taskId: context.taskId,
16956
16995
  };
16957
16996
  }
16958
16997
  getTask(taskId) {
16959
16998
  return this.taskMap.get(taskId);
16960
16999
  }
17000
+ getAllTaskId() {
17001
+ return [...this.taskMap.keys()];
17002
+ }
16961
17003
  deleteTask(taskId) {
16962
17004
  return this.taskMap.delete(taskId);
16963
17005
  }
@@ -17202,18 +17244,17 @@ function parseChunk(chunk) {
17202
17244
 
17203
17245
  const AGENT_NAME$3 = "File";
17204
17246
  class BaseFileAgent extends Agent {
17205
- constructor(work_path, llms, ext_tools, mcpClient) {
17247
+ constructor(work_path, llms, ext_tools, mcpClient, planDescription) {
17206
17248
  const _tools_ = [];
17207
- const prompt = work_path
17208
- ? `Your default working path is: ${work_path}`
17209
- : "";
17249
+ const prompt = work_path ? `Your default working path is: ${work_path}` : "";
17210
17250
  super({
17211
17251
  name: AGENT_NAME$3,
17212
17252
  description: `You are a file agent, handling file-related tasks such as creating, finding, reading, modifying files, etc.${prompt}`,
17213
17253
  tools: _tools_,
17214
17254
  llms: llms,
17215
17255
  mcpClient: mcpClient,
17216
- planDescription: "File operation agent, handling file-related tasks such as creating, finding, reading, modifying files, etc.",
17256
+ planDescription: planDescription ||
17257
+ "File operation agent, handling file-related tasks such as creating, finding, reading, modifying files, etc, only text file writing is supported.",
17217
17258
  });
17218
17259
  let init_tools = this.buildInitTools();
17219
17260
  if (ext_tools && ext_tools.length > 0) {
@@ -17259,7 +17300,7 @@ class BaseFileAgent extends Agent {
17259
17300
  },
17260
17301
  {
17261
17302
  name: "file_write",
17262
- description: "Overwrite or append content to a file. Use for creating new files, appending content, or modifying existing files.",
17303
+ description: "Overwrite or append content to a file. Use for creating new files, appending content, or modifying existing files, only supports txt/md/csv or other text formats.",
17263
17304
  parameters: {
17264
17305
  type: "object",
17265
17306
  properties: {
@@ -17335,7 +17376,7 @@ class BaseFileAgent extends Agent {
17335
17376
 
17336
17377
  const AGENT_NAME$2 = "Shell";
17337
17378
  class BaseShellAgent extends Agent {
17338
- constructor(llms, ext_tools, mcpClient) {
17379
+ constructor(llms, ext_tools, mcpClient, planDescription) {
17339
17380
  const _tools_ = [];
17340
17381
  super({
17341
17382
  name: AGENT_NAME$2,
@@ -17343,7 +17384,7 @@ class BaseShellAgent extends Agent {
17343
17384
  tools: _tools_,
17344
17385
  llms: llms,
17345
17386
  mcpClient: mcpClient,
17346
- planDescription: "Shell command agent, use to execute shell commands.",
17387
+ planDescription: planDescription || "Shell command agent, use to execute shell commands.",
17347
17388
  });
17348
17389
  let init_tools = this.buildInitTools();
17349
17390
  if (ext_tools && ext_tools.length > 0) {
@@ -17417,34 +17458,55 @@ class BaseTimerAgent extends Agent {
17417
17458
 
17418
17459
  const AGENT_NAME$1 = "Computer";
17419
17460
  class BaseComputerAgent extends Agent {
17420
- constructor(llms, ext_tools, mcpClient) {
17461
+ constructor(llms, ext_tools, mcpClient, keyboardKeys) {
17421
17462
  const _tools_ = [];
17422
17463
  super({
17423
17464
  name: AGENT_NAME$1,
17424
- description: "You are a computer operation agent, who interacts with the computer using mouse and keyboard, completing specified tasks step by step based on the given tasks and screenshots. After each of your operations, you will receive the latest computer screenshot to evaluate the task execution status.",
17465
+ description: `You are a computer operation agent, who interacts with the computer using mouse and keyboard, completing specified tasks step by step based on the given tasks and screenshots. After each of your operations, you will receive the latest computer screenshot to evaluate the task execution status.
17466
+ This is a computer GUI interface, observe the execution through screenshots, and specify action sequences to complete designated tasks.
17467
+ * COMPUTER OPERATIONS:
17468
+ - You can operate the application using shortcuts.
17469
+ - If stuck, try alternative approaches`,
17425
17470
  tools: _tools_,
17426
17471
  llms: llms,
17427
17472
  mcpClient: mcpClient,
17428
- planDescription: "Computer operation agent, interact with the computer using the mouse and keyboard."
17473
+ planDescription: "Computer operation agent, interact with the computer using the mouse and keyboard, operation application."
17429
17474
  });
17430
- this.keyboardKeys = [
17431
- 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
17432
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
17433
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
17434
- 'enter', 'esc', 'backspace', 'tab', 'space', 'delete',
17435
- 'ctrl', 'alt', 'shift', 'win',
17436
- 'up', 'down', 'left', 'right',
17437
- 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12',
17438
- 'ctrl+c', 'ctrl+v', 'ctrl+x', 'ctrl+z', 'ctrl+a', 'ctrl+s',
17439
- 'alt+tab', 'alt+f4', 'ctrl+alt+delete'
17440
- ];
17441
- let init_tools = this.buildInitTools();
17475
+ if (!keyboardKeys) {
17476
+ if (config.platform == "windows") {
17477
+ keyboardKeys = [
17478
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
17479
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
17480
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
17481
+ 'enter', 'esc', 'backspace', 'tab', 'space', 'delete',
17482
+ 'ctrl', 'alt', 'shift', 'win',
17483
+ 'up', 'down', 'left', 'right',
17484
+ 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12',
17485
+ 'ctrl+c', 'ctrl+v', 'ctrl+x', 'ctrl+z', 'ctrl+a', 'ctrl+s',
17486
+ 'alt+tab', 'alt+f4', 'ctrl+alt+delete'
17487
+ ];
17488
+ }
17489
+ else {
17490
+ keyboardKeys = [
17491
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
17492
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
17493
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
17494
+ 'enter', 'esc', 'backspace', 'tab', 'space', 'delete',
17495
+ 'command', 'option', 'shift', 'control',
17496
+ 'up', 'down', 'left', 'right',
17497
+ 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12',
17498
+ 'command+c', 'command+v', 'command+x', 'command+z', 'command+a', 'command+s',
17499
+ 'command+tab', 'command+q', 'command+escape'
17500
+ ];
17501
+ }
17502
+ }
17503
+ let init_tools = this.buildInitTools(keyboardKeys);
17442
17504
  if (ext_tools && ext_tools.length > 0) {
17443
17505
  init_tools = mergeTools(init_tools, ext_tools);
17444
17506
  }
17445
17507
  init_tools.forEach((tool) => _tools_.push(tool));
17446
17508
  }
17447
- buildInitTools() {
17509
+ buildInitTools(keyboardKeys) {
17448
17510
  return [
17449
17511
  {
17450
17512
  name: "typing",
@@ -17525,15 +17587,22 @@ class BaseComputerAgent extends Agent {
17525
17587
  properties: {
17526
17588
  amount: {
17527
17589
  type: "number",
17528
- description: "Scroll amount (positive for up, negative for down)",
17529
- minimum: -10,
17590
+ description: "Scroll amount (up / down)",
17591
+ minimum: 1,
17530
17592
  maximum: 10,
17531
17593
  },
17594
+ direction: {
17595
+ type: "string",
17596
+ enum: ["up", "down"],
17597
+ },
17532
17598
  },
17533
- required: ["amount"],
17599
+ required: ["amount", "direction"],
17534
17600
  },
17535
17601
  execute: async (args, agentContext) => {
17536
- return await this.callInnerTool(() => this.scroll(agentContext, args.amount));
17602
+ return await this.callInnerTool(async () => {
17603
+ let amount = args.amount;
17604
+ await this.scroll(agentContext, args.direction == "up" ? -amount : amount);
17605
+ });
17537
17606
  },
17538
17607
  },
17539
17608
  {
@@ -17545,7 +17614,7 @@ class BaseComputerAgent extends Agent {
17545
17614
  key: {
17546
17615
  type: "string",
17547
17616
  description: "Key to press",
17548
- enum: this.keyboardKeys,
17617
+ enum: keyboardKeys,
17549
17618
  },
17550
17619
  },
17551
17620
  required: ["key"],
@@ -17563,7 +17632,7 @@ class BaseComputerAgent extends Agent {
17563
17632
  keys: {
17564
17633
  type: "string",
17565
17634
  description: "Key combination to press",
17566
- enum: this.keyboardKeys,
17635
+ enum: keyboardKeys,
17567
17636
  },
17568
17637
  },
17569
17638
  required: ["keys"],
@@ -17603,20 +17672,23 @@ class BaseComputerAgent extends Agent {
17603
17672
  },
17604
17673
  {
17605
17674
  name: "wait",
17675
+ noPlan: true,
17606
17676
  description: "Wait for specified duration",
17607
17677
  parameters: {
17608
17678
  type: "object",
17609
17679
  properties: {
17610
17680
  duration: {
17611
17681
  type: "number",
17612
- description: "Duration in seconds",
17613
- default: 0.5,
17682
+ description: "Duration in millisecond",
17683
+ default: 500,
17684
+ minimum: 200,
17685
+ maximum: 2000,
17614
17686
  },
17615
17687
  },
17616
17688
  required: ["duration"],
17617
17689
  },
17618
17690
  execute: async (args, agentContext) => {
17619
- return await this.callInnerTool(() => sleep((args.duration || 0.5) * 1000));
17691
+ return await this.callInnerTool(() => sleep((args.duration || 200)));
17620
17692
  },
17621
17693
  },
17622
17694
  ];
@@ -17625,7 +17697,7 @@ class BaseComputerAgent extends Agent {
17625
17697
  let lastMessage = messages[messages.length - 1];
17626
17698
  if (lastMessage.role == "tool" &&
17627
17699
  lastMessage.content.filter((t) => t.type == "tool-result").length > 0) {
17628
- await sleep(200);
17700
+ await sleep(300);
17629
17701
  let result = await this.screenshot(agentContext);
17630
17702
  let image = toImage(result.imageBase64);
17631
17703
  messages.push({
@@ -17660,12 +17732,15 @@ class BaseComputerAgent extends Agent {
17660
17732
 
17661
17733
  class BaseBrowserAgent extends Agent {
17662
17734
  async go_back(agentContext) {
17663
- await this.execute_script(agentContext, () => {
17664
- return window.navigation.back();
17665
- }, []);
17666
- await sleep(200);
17735
+ try {
17736
+ await this.execute_script(agentContext, () => {
17737
+ window.navigation.back();
17738
+ }, []);
17739
+ await sleep(100);
17740
+ }
17741
+ catch (e) { }
17667
17742
  }
17668
- async extract_content(agentContext) {
17743
+ async extract_content(agentContext, variable_name) {
17669
17744
  let content = await this.execute_script(agentContext, () => {
17670
17745
  return window.document.body.innerText
17671
17746
  .replaceAll(/\n+/g, "\n")
@@ -17673,19 +17748,37 @@ class BaseBrowserAgent extends Agent {
17673
17748
  .trim();
17674
17749
  }, []);
17675
17750
  let pageInfo = await this.get_current_page(agentContext);
17676
- return `title: ${pageInfo.title}\npage_url: ${pageInfo.url}\npage_content: \n${content}`;
17751
+ let result = `title: ${pageInfo.title}\npage_url: ${pageInfo.url}\npage_content: \n${content}`;
17752
+ if (variable_name) {
17753
+ agentContext.context.variables.set(variable_name, result);
17754
+ }
17755
+ return result;
17677
17756
  }
17678
17757
  async controlMcpTools(agentContext, messages, loopNum) {
17679
- let url = (await this.get_current_page(agentContext)).url;
17680
- let lastUrl = agentContext.variables.get("lastUrl");
17681
- agentContext.variables.set("lastUrl", url);
17682
- return {
17683
- mcpTools: loopNum == 0 || url != lastUrl,
17684
- mcpParams: {
17685
- environment: "browser",
17686
- browser_url: url,
17687
- },
17688
- };
17758
+ if (loopNum > 0) {
17759
+ let url = null;
17760
+ try {
17761
+ url = (await this.get_current_page(agentContext)).url;
17762
+ }
17763
+ catch (e) { }
17764
+ let lastUrl = agentContext.variables.get("lastUrl");
17765
+ agentContext.variables.set("lastUrl", url);
17766
+ return {
17767
+ mcpTools: loopNum == 0 || url != lastUrl,
17768
+ mcpParams: {
17769
+ environment: "browser",
17770
+ browser_url: url,
17771
+ },
17772
+ };
17773
+ }
17774
+ else {
17775
+ return {
17776
+ mcpTools: true,
17777
+ mcpParams: {
17778
+ environment: "browser",
17779
+ },
17780
+ };
17781
+ }
17689
17782
  }
17690
17783
  toolExecuter(mcpClient, name) {
17691
17784
  return {
@@ -17699,7 +17792,7 @@ class BaseBrowserAgent extends Agent {
17699
17792
  environment: "browser",
17700
17793
  agent_name: agentContext.agent.Name,
17701
17794
  browser_url: agentContext.variables.get("lastUrl"),
17702
- }
17795
+ },
17703
17796
  });
17704
17797
  if (result.extInfo &&
17705
17798
  result.extInfo["javascript"] &&
@@ -17739,6 +17832,42 @@ class BaseBrowserAgent extends Agent {
17739
17832
  };
17740
17833
  }, []);
17741
17834
  }
17835
+ lastToolResult(messages) {
17836
+ let lastMessage = messages[messages.length - 1];
17837
+ if (lastMessage.role != "tool") {
17838
+ return null;
17839
+ }
17840
+ let toolResult = lastMessage.content.filter((t) => t.type == "tool-result")[0];
17841
+ if (!toolResult) {
17842
+ return null;
17843
+ }
17844
+ let result = toolResult.result;
17845
+ let isError = toolResult.isError;
17846
+ for (let i = messages.length - 2; i > 0; i--) {
17847
+ if (messages[i].role !== "assistant" ||
17848
+ typeof messages[i].content == "string") {
17849
+ continue;
17850
+ }
17851
+ for (let j = 0; j < messages[i].content.length; j++) {
17852
+ let content = messages[i].content[j];
17853
+ if (typeof content !== "string" && content.type !== "tool-call") {
17854
+ continue;
17855
+ }
17856
+ let toolUse = content;
17857
+ if (toolResult.toolCallId != toolUse.toolCallId) {
17858
+ continue;
17859
+ }
17860
+ return {
17861
+ id: toolResult.toolCallId,
17862
+ toolName: toolUse.toolName,
17863
+ args: toolUse.args,
17864
+ result,
17865
+ isError,
17866
+ };
17867
+ }
17868
+ }
17869
+ return null;
17870
+ }
17742
17871
  async execute_mcp_script(agentContext, script) {
17743
17872
  return;
17744
17873
  }
@@ -17765,7 +17894,7 @@ function run_build_dom_tree() {
17765
17894
  return window.clickable_elements[highlightIndex];
17766
17895
  }
17767
17896
  function remove_highlight() {
17768
- let highlight = document.getElementById('playwright-highlight-container');
17897
+ let highlight = document.getElementById('eko-highlight-container');
17769
17898
  if (highlight) {
17770
17899
  highlight.remove();
17771
17900
  }
@@ -17825,6 +17954,10 @@ function run_build_dom_tree() {
17825
17954
  for (let i = 0; i < includeAttributes.length; i++) {
17826
17955
  let key = includeAttributes[i];
17827
17956
  let value = node.attributes[key];
17957
+ if (key == "class" && value && value.length > 30) {
17958
+ let classList = value.split(" ").slice(0, 3);
17959
+ value = classList.join(" ");
17960
+ }
17828
17961
  if (key && value) {
17829
17962
  attributes_str += ` ${key}="${value}"`;
17830
17963
  }
@@ -17903,10 +18036,10 @@ function run_build_dom_tree() {
17903
18036
  let highlightIndex = 0; // Reset highlight index
17904
18037
  function highlightElement(element, index, parentIframe = null) {
17905
18038
  // Create or get highlight container
17906
- let container = document.getElementById('playwright-highlight-container');
18039
+ let container = document.getElementById('eko-highlight-container');
17907
18040
  if (!container) {
17908
18041
  container = document.createElement('div');
17909
- container.id = 'playwright-highlight-container';
18042
+ container.id = 'eko-highlight-container';
17910
18043
  container.style.position = 'fixed';
17911
18044
  container.style.pointerEvents = 'none';
17912
18045
  container.style.top = '0';
@@ -17959,7 +18092,7 @@ function run_build_dom_tree() {
17959
18092
  overlay.style.height = `${rect.height}px`;
17960
18093
  // Create label
17961
18094
  const label = document.createElement('div');
17962
- label.className = 'playwright-highlight-label';
18095
+ label.className = 'eko-highlight-label';
17963
18096
  label.style.position = 'absolute';
17964
18097
  label.style.background = baseColor;
17965
18098
  label.style.color = 'white';
@@ -17993,7 +18126,7 @@ function run_build_dom_tree() {
17993
18126
  container.appendChild(overlay);
17994
18127
  container.appendChild(label);
17995
18128
  // Store reference for cleanup
17996
- element.setAttribute('browser-user-highlight-id', `playwright-highlight-${index}`);
18129
+ element.setAttribute('eko-user-highlight-id', `eko-highlight-${index}`);
17997
18130
  return index + 1;
17998
18131
  }
17999
18132
  // Helper function to generate XPath as a tree
@@ -18338,15 +18471,17 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18338
18471
  - Screenshot help verify element positions and relationships. Labels may sometimes overlap, so extracted elements are used to verify the correct elements.
18339
18472
  - In addition to screenshot, simplified information about interactive elements is returned, with element indexes corresponding to those in the screenshot.
18340
18473
  - This tool can ONLY screenshot the VISIBLE content. If a complete content is required, use 'extract_content' instead.
18474
+ - If the webpage content hasn't loaded, please use the \`wait\` tool to allow time for the content to load.
18341
18475
  * ELEMENT INTERACTION:
18342
18476
  - Only use indexes that exist in the provided element list
18343
18477
  - Each element has a unique index number (e.g., "[33]:<button>")
18344
18478
  - Elements marked with "[]:" are non-interactive (for context only)
18345
- * NAVIGATION & ERROR HANDLING:
18479
+ * ERROR HANDLING:
18346
18480
  - If no suitable elements exist, use other functions to complete the task
18347
- - If stuck, try alternative approaches
18481
+ - If stuck, try alternative approaches, don't refuse tasks
18348
18482
  - Handle popups/cookies by accepting or closing them
18349
- - Use scroll to find elements you are looking for`;
18483
+ - Use scroll to find elements you are looking for
18484
+ - When extracting content, prioritize using extract_content, only scroll when you need to load more content`;
18350
18485
  const _tools_ = [];
18351
18486
  super({
18352
18487
  name: AGENT_NAME,
@@ -18364,6 +18499,9 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18364
18499
  }
18365
18500
  async input_text(agentContext, index, text, enter) {
18366
18501
  await this.execute_script(agentContext, typing, [{ index, text, enter }]);
18502
+ if (enter) {
18503
+ await sleep(200);
18504
+ }
18367
18505
  }
18368
18506
  async click_element(agentContext, index, num_clicks, button) {
18369
18507
  await this.execute_script(agentContext, do_click, [
@@ -18380,18 +18518,32 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18380
18518
  }
18381
18519
  async scroll_mouse_wheel(agentContext, amount) {
18382
18520
  await this.execute_script(agentContext, (amount) => {
18383
- window.scrollBy(0, amount * 50);
18521
+ let viewportHeight = window.innerHeight ||
18522
+ document.documentElement.clientHeight ||
18523
+ document.body.clientHeight;
18524
+ let y = Math.max(20, Math.min(viewportHeight / 10, 200));
18525
+ window.scrollBy(0, y * amount);
18384
18526
  }, [amount]);
18385
18527
  await sleep(200);
18386
18528
  }
18387
18529
  async hover_to_element(agentContext, index) {
18388
18530
  await this.execute_script(agentContext, hover_to, [{ index }]);
18389
18531
  }
18532
+ async get_select_options(agentContext, index) {
18533
+ return await this.execute_script(agentContext, get_select_options, [
18534
+ { index },
18535
+ ]);
18536
+ }
18537
+ async select_option(agentContext, index, option) {
18538
+ return await this.execute_script(agentContext, select_option, [
18539
+ { index, option },
18540
+ ]);
18541
+ }
18390
18542
  async screenshot_and_html(agentContext) {
18391
18543
  try {
18392
18544
  let element_result = null;
18393
18545
  for (let i = 0; i < 5; i++) {
18394
- await sleep(300);
18546
+ await sleep(200);
18395
18547
  await this.execute_script(agentContext, run_build_dom_tree, []);
18396
18548
  element_result = (await this.execute_script(agentContext, () => {
18397
18549
  return window.get_clickable_elements(true);
@@ -18400,7 +18552,9 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18400
18552
  break;
18401
18553
  }
18402
18554
  }
18555
+ await sleep(50);
18403
18556
  let screenshot = await this.screenshot(agentContext);
18557
+ // agentContext.variables.set("selector_map", element_result.selector_map);
18404
18558
  let pseudoHtml = element_result.element_str;
18405
18559
  return {
18406
18560
  imageBase64: screenshot.imageBase64,
@@ -18538,15 +18692,22 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18538
18692
  properties: {
18539
18693
  amount: {
18540
18694
  type: "number",
18541
- description: "Scroll amount (positive for up, negative for down)",
18542
- minimum: -10,
18695
+ description: "Scroll amount (up / down)",
18696
+ minimum: 1,
18543
18697
  maximum: 10,
18544
18698
  },
18699
+ direction: {
18700
+ type: "string",
18701
+ enum: ["up", "down"],
18702
+ },
18545
18703
  },
18546
- required: ["amount"],
18704
+ required: ["amount", "direction"],
18547
18705
  },
18548
18706
  execute: async (args, agentContext) => {
18549
- return await this.callInnerTool(() => this.scroll_mouse_wheel(agentContext, args.amount));
18707
+ return await this.callInnerTool(async () => {
18708
+ let amount = args.amount;
18709
+ await this.scroll_mouse_wheel(agentContext, args.direction == "up" ? -amount : amount);
18710
+ });
18550
18711
  },
18551
18712
  },
18552
18713
  {
@@ -18568,7 +18729,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18568
18729
  },
18569
18730
  {
18570
18731
  name: "extract_content",
18571
- description: "Extract the text content of the current webpage.",
18732
+ description: "Extract the text content of the current webpage, obtain webpage data through this tool.",
18572
18733
  parameters: {
18573
18734
  type: "object",
18574
18735
  properties: {},
@@ -18577,31 +18738,102 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18577
18738
  return await this.callInnerTool(() => this.extract_content(agentContext));
18578
18739
  },
18579
18740
  },
18741
+ {
18742
+ name: "get_select_options",
18743
+ description: "Get all options from a native dropdown element",
18744
+ parameters: {
18745
+ type: "object",
18746
+ properties: {
18747
+ index: {
18748
+ type: "number",
18749
+ description: "The index of the element to select",
18750
+ },
18751
+ },
18752
+ required: ["index"],
18753
+ },
18754
+ execute: async (args, agentContext) => {
18755
+ return await this.callInnerTool(() => this.get_select_options(agentContext, args.index));
18756
+ },
18757
+ },
18758
+ {
18759
+ name: "select_option",
18760
+ description: "Select the native dropdown option",
18761
+ parameters: {
18762
+ type: "object",
18763
+ properties: {
18764
+ index: {
18765
+ type: "number",
18766
+ description: "The index of the element to select",
18767
+ },
18768
+ option: {
18769
+ type: "string",
18770
+ description: "Text option",
18771
+ },
18772
+ },
18773
+ required: ["index", "option"],
18774
+ },
18775
+ execute: async (args, agentContext) => {
18776
+ return await this.callInnerTool(() => this.select_option(agentContext, args.index, args.option));
18777
+ },
18778
+ },
18779
+ {
18780
+ name: "get_all_tabs",
18781
+ description: "Get all tabs of the current browser",
18782
+ parameters: {
18783
+ type: "object",
18784
+ properties: {},
18785
+ },
18786
+ execute: async (args, agentContext) => {
18787
+ return await this.callInnerTool(() => this.get_all_tabs(agentContext));
18788
+ },
18789
+ },
18790
+ {
18791
+ name: "switch_tab",
18792
+ description: "Switch to the specified tab page",
18793
+ parameters: {
18794
+ type: "object",
18795
+ properties: {
18796
+ tabId: {
18797
+ type: "number",
18798
+ description: "Tab ID, obtained through get_all_tabs",
18799
+ },
18800
+ },
18801
+ required: ["tabId"],
18802
+ },
18803
+ execute: async (args, agentContext) => {
18804
+ return await this.callInnerTool(() => this.switch_tab(agentContext, args.tabId));
18805
+ },
18806
+ },
18580
18807
  {
18581
18808
  name: "wait",
18809
+ noPlan: true,
18582
18810
  description: "Wait for specified duration",
18583
18811
  parameters: {
18584
18812
  type: "object",
18585
18813
  properties: {
18586
18814
  duration: {
18587
18815
  type: "number",
18588
- description: "Duration in seconds",
18589
- default: 0.5,
18816
+ description: "Duration in millisecond",
18817
+ default: 500,
18818
+ minimum: 200,
18819
+ maximum: 2000,
18590
18820
  },
18591
18821
  },
18592
18822
  required: ["duration"],
18593
18823
  },
18594
18824
  execute: async (args, agentContext) => {
18595
- return await this.callInnerTool(() => sleep((args.duration || 0.5) * 1000));
18825
+ return await this.callInnerTool(() => sleep((args.duration || 200)));
18596
18826
  },
18597
18827
  },
18598
18828
  ];
18599
18829
  }
18600
18830
  async handleMessages(agentContext, messages) {
18601
- let lastMessage = messages[messages.length - 1];
18602
- if (lastMessage.role == "tool" &&
18603
- lastMessage.content.filter((t) => t.type == "tool-result").length > 0) {
18604
- await sleep(200);
18831
+ let lastTool = this.lastToolResult(messages);
18832
+ if (lastTool &&
18833
+ lastTool.toolName !== "extract_content" &&
18834
+ lastTool.toolName !== "get_all_tabs" &&
18835
+ lastTool.toolName !== "variable_storage") {
18836
+ await sleep(300);
18605
18837
  let result = await this.screenshot_and_html(agentContext);
18606
18838
  let image = toImage(result.imageBase64);
18607
18839
  messages.push({
@@ -18658,6 +18890,10 @@ function typing(params) {
18658
18890
  }
18659
18891
  else {
18660
18892
  input.value = text;
18893
+ if (input.__proto__) {
18894
+ let value_setter = Object.getOwnPropertyDescriptor(input.__proto__, "value")?.set;
18895
+ value_setter && value_setter.call(input, text);
18896
+ }
18661
18897
  }
18662
18898
  input.dispatchEvent(new Event("input", { bubbles: true }));
18663
18899
  if (enter) {
@@ -18717,6 +18953,45 @@ function hover_to(params) {
18717
18953
  element.dispatchEvent(event);
18718
18954
  return true;
18719
18955
  }
18956
+ function get_select_options(params) {
18957
+ let element = window.get_highlight_element(params.index);
18958
+ if (!element || element.tagName.toUpperCase() !== "SELECT") {
18959
+ return "Error: Not a select element";
18960
+ }
18961
+ return {
18962
+ options: Array.from(element.options).map((opt) => ({
18963
+ index: opt.index,
18964
+ text: opt.text.trim(),
18965
+ value: opt.value,
18966
+ })),
18967
+ name: element.name,
18968
+ };
18969
+ }
18970
+ function select_option(params) {
18971
+ let element = window.get_highlight_element(params.index);
18972
+ if (!element || element.tagName.toUpperCase() !== "SELECT") {
18973
+ return "Error: Not a select element";
18974
+ }
18975
+ let text = params.option.trim();
18976
+ let option = Array.from(element.options).find((opt) => opt.text.trim() === text);
18977
+ if (!option) {
18978
+ option = Array.from(element.options).find((opt) => opt.value.trim() === text);
18979
+ }
18980
+ if (!option) {
18981
+ return {
18982
+ success: false,
18983
+ error: "Select Option not found",
18984
+ availableOptions: Array.from(element.options).map((o) => o.text.trim()),
18985
+ };
18986
+ }
18987
+ element.value = option.value;
18988
+ element.dispatchEvent(new Event("change"));
18989
+ return {
18990
+ success: true,
18991
+ selectedValue: option.value,
18992
+ selectedText: option.text.trim(),
18993
+ };
18994
+ }
18720
18995
 
18721
18996
  class BaseBrowserScreenAgent extends BaseBrowserAgent {
18722
18997
  constructor(llms, ext_tools, mcpClient) {
@@ -18738,7 +19013,7 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
18738
19013
  tools: _tools_,
18739
19014
  llms: llms,
18740
19015
  mcpClient: mcpClient,
18741
- planDescription: "Browser operation agent, interact with the browser using the mouse and keyboard."
19016
+ planDescription: "Browser operation agent, interact with the browser using the mouse and keyboard.",
18742
19017
  });
18743
19018
  let init_tools = this.buildInitTools();
18744
19019
  if (ext_tools && ext_tools.length > 0) {
@@ -18866,20 +19141,27 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
18866
19141
  properties: {
18867
19142
  amount: {
18868
19143
  type: "number",
18869
- description: "Scroll amount (positive for up, negative for down)",
18870
- minimum: -10,
19144
+ description: "Scroll amount (up / down)",
19145
+ minimum: 1,
18871
19146
  maximum: 10,
18872
19147
  },
19148
+ direction: {
19149
+ type: "string",
19150
+ enum: ["up", "down"],
19151
+ },
18873
19152
  },
18874
- required: ["amount"],
19153
+ required: ["amount", "direction"],
18875
19154
  },
18876
19155
  execute: async (args, agentContext) => {
18877
- return await this.callInnerTool(() => this.scroll(agentContext, args.amount));
19156
+ return await this.callInnerTool(async () => {
19157
+ let amount = args.amount;
19158
+ await this.scroll(agentContext, args.direction == "up" ? -amount : amount);
19159
+ });
18878
19160
  },
18879
19161
  },
18880
19162
  {
18881
19163
  name: "extract_content",
18882
- description: "Extract the text content of the current webpage.",
19164
+ description: "Extract the text content of the current webpage, obtain webpage data through this tool.",
18883
19165
  parameters: {
18884
19166
  type: "object",
18885
19167
  properties: {},
@@ -18935,31 +19217,64 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
18935
19217
  return await this.callInnerTool(() => this.drag_and_drop(agentContext, args.x1, args.y1, args.x2, args.y2));
18936
19218
  },
18937
19219
  },
19220
+ {
19221
+ name: "get_all_tabs",
19222
+ description: "Get all tabs of the current browser",
19223
+ parameters: {
19224
+ type: "object",
19225
+ properties: {},
19226
+ },
19227
+ execute: async (args, agentContext) => {
19228
+ return await this.callInnerTool(() => this.get_all_tabs(agentContext));
19229
+ },
19230
+ },
19231
+ {
19232
+ name: "switch_tab",
19233
+ description: "Switch to the specified tab page",
19234
+ parameters: {
19235
+ type: "object",
19236
+ properties: {
19237
+ tabId: {
19238
+ type: "number",
19239
+ description: "Tab ID, obtained through get_all_tabs",
19240
+ },
19241
+ },
19242
+ required: ["tabId"],
19243
+ },
19244
+ execute: async (args, agentContext) => {
19245
+ return await this.callInnerTool(() => this.switch_tab(agentContext, args.tabId));
19246
+ },
19247
+ },
18938
19248
  {
18939
19249
  name: "wait",
19250
+ noPlan: true,
18940
19251
  description: "Wait for specified duration",
18941
19252
  parameters: {
18942
19253
  type: "object",
18943
19254
  properties: {
18944
19255
  duration: {
18945
19256
  type: "number",
18946
- description: "Duration in seconds",
18947
- default: 0.5,
19257
+ description: "Duration in millisecond",
19258
+ default: 500,
19259
+ minimum: 200,
19260
+ maximum: 2000,
18948
19261
  },
18949
19262
  },
18950
19263
  required: ["duration"],
18951
19264
  },
18952
19265
  execute: async (args, agentContext) => {
18953
- return await this.callInnerTool(() => sleep((args.duration || 0.5) * 1000));
19266
+ return await this.callInnerTool(() => sleep((args.duration || 200)));
18954
19267
  },
18955
19268
  },
18956
19269
  ];
18957
19270
  }
18958
19271
  async handleMessages(agentContext, messages) {
18959
- let lastMessage = messages[messages.length - 1];
18960
- if (lastMessage.role == "tool" &&
18961
- lastMessage.content.filter((t) => t.type == "tool-result").length > 0) {
18962
- await sleep(200);
19272
+ let lastTool = this.lastToolResult(messages);
19273
+ if (lastTool &&
19274
+ lastTool.toolName !== "extract_content" &&
19275
+ lastTool.toolName !== "get_all_tabs" &&
19276
+ lastTool.toolName !== "variable_storage") {
19277
+ await sleep(300);
18963
19278
  let result = await this.screenshot(agentContext);
18964
19279
  let image = toImage(result.imageBase64);
18965
19280
  messages.push({