@eko-ai/eko 2.0.2-alpha.9 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.esm.js CHANGED
@@ -1,8 +1,7 @@
1
1
  const config = {
2
2
  name: "Fellou",
3
3
  platform: "mac",
4
- maxReactNum: 100,
5
- maxTokens: 16000
4
+ maxReactNum: 100
6
5
  };
7
6
 
8
7
  var LogLevel;
@@ -8557,15 +8556,6 @@ function mergeAgents(agents1, agents2) {
8557
8556
  }
8558
8557
  return tools;
8559
8558
  }
8560
- function sub(str, maxLength, appendPoint = true) {
8561
- if (!str) {
8562
- return "";
8563
- }
8564
- if (str.length > maxLength) {
8565
- return str.substring(0, maxLength) + (appendPoint ? "..." : "");
8566
- }
8567
- return str;
8568
- }
8569
8559
  function fixXmlTag(code) {
8570
8560
  function fixDoubleChar(code) {
8571
8561
  const stack = [];
@@ -12738,7 +12728,7 @@ class RetryLanguageModel {
12738
12728
  constructor(llms, names, stream_first_timeout) {
12739
12729
  this.llms = llms;
12740
12730
  this.names = names || [];
12741
- this.stream_first_timeout = stream_first_timeout || 30000;
12731
+ this.stream_first_timeout = stream_first_timeout || 20000;
12742
12732
  if (this.names.indexOf("default") == -1) {
12743
12733
  this.names.push("default");
12744
12734
  }
@@ -12752,7 +12742,7 @@ class RetryLanguageModel {
12752
12742
  toolChoice: request.toolChoice,
12753
12743
  },
12754
12744
  prompt: request.messages,
12755
- maxTokens: request.maxTokens || config.maxTokens,
12745
+ maxTokens: request.maxTokens,
12756
12746
  temperature: request.temperature,
12757
12747
  topP: request.topP,
12758
12748
  topK: request.topK,
@@ -12775,9 +12765,6 @@ class RetryLanguageModel {
12775
12765
  return result;
12776
12766
  }
12777
12767
  catch (e) {
12778
- if (e?.name === "AbortError") {
12779
- throw e;
12780
- }
12781
12768
  if (Log.isEnableInfo()) {
12782
12769
  Log.info(`LLM nonstream request, name: ${name} => `, {
12783
12770
  tools: options.mode?.tools,
@@ -12798,7 +12785,7 @@ class RetryLanguageModel {
12798
12785
  toolChoice: request.toolChoice,
12799
12786
  },
12800
12787
  prompt: request.messages,
12801
- maxTokens: request.maxTokens || config.maxTokens,
12788
+ maxTokens: request.maxTokens,
12802
12789
  temperature: request.temperature,
12803
12790
  topP: request.topP,
12804
12791
  topK: request.topK,
@@ -12814,19 +12801,12 @@ class RetryLanguageModel {
12814
12801
  continue;
12815
12802
  }
12816
12803
  try {
12817
- const controller = new AbortController();
12818
- const signal = options.abortSignal
12819
- ? AbortSignal.any([options.abortSignal, controller.signal])
12820
- : controller.signal;
12821
- const result = await call_timeout(async () => await llm.doStream({ ...options, abortSignal: signal }), this.stream_first_timeout, (e) => {
12822
- controller.abort();
12823
- });
12804
+ const result = await call_timeout(async () => await llm.doStream(options), this.stream_first_timeout);
12824
12805
  const stream = result.stream;
12825
12806
  const reader = stream.getReader();
12826
12807
  const { done, value } = await call_timeout(async () => await reader.read(), this.stream_first_timeout, (e) => {
12827
12808
  reader.cancel();
12828
12809
  reader.releaseLock();
12829
- controller.abort();
12830
12810
  });
12831
12811
  if (done) {
12832
12812
  Log.warn(`LLM stream done, name: ${name} => `, { done, value });
@@ -12846,9 +12826,6 @@ class RetryLanguageModel {
12846
12826
  return result;
12847
12827
  }
12848
12828
  catch (e) {
12849
- if (e?.name === "AbortError") {
12850
- throw e;
12851
- }
12852
12829
  if (Log.isEnableInfo()) {
12853
12830
  Log.info(`LLM stream request, name: ${name} => `, {
12854
12831
  tools: options.mode?.tools,
@@ -15647,7 +15624,6 @@ const TOOL_NAME$3 = "human_interact";
15647
15624
  class HumanInteractTool {
15648
15625
  constructor() {
15649
15626
  this.name = TOOL_NAME$3;
15650
- this.noPlan = true;
15651
15627
  this.description = `AI interacts with humans:
15652
15628
  confirm: Ask the user to confirm whether to execute an operation, especially when performing dangerous actions such as deleting system files.
15653
15629
  input: Prompt the user to enter text; for example, when a task is ambiguous, the AI can choose to ask the user for details, and the user can respond by inputting.
@@ -15661,57 +15637,93 @@ request_help: Request assistance from the user; for instance, when an operation
15661
15637
  description: "The type of interaction with users.",
15662
15638
  enum: ["confirm", "input", "select", "request_help"],
15663
15639
  },
15664
- prompt: {
15665
- type: "string",
15666
- description: "Display prompts to users",
15640
+ confirm: {
15641
+ type: "object",
15642
+ properties: {
15643
+ prompt: {
15644
+ type: "string",
15645
+ description: "Display prompts to users",
15646
+ },
15647
+ },
15648
+ required: ["prompt"],
15667
15649
  },
15668
- selectOptions: {
15669
- type: "array",
15670
- description: "Options provided to users, this parameter is required when interactType is select.",
15671
- items: {
15672
- type: "string",
15650
+ input: {
15651
+ type: "object",
15652
+ properties: {
15653
+ prompt: {
15654
+ type: "string",
15655
+ description: "Display prompts to users",
15656
+ },
15673
15657
  },
15658
+ required: ["prompt"],
15674
15659
  },
15675
- selectMultiple: {
15676
- type: "boolean",
15677
- description: "isMultiple, used when interactType is select",
15660
+ select: {
15661
+ type: "object",
15662
+ properties: {
15663
+ prompt: {
15664
+ type: "string",
15665
+ description: "Display prompts to users",
15666
+ },
15667
+ options: {
15668
+ type: "array",
15669
+ description: "Options provided to the user",
15670
+ items: {
15671
+ type: "string",
15672
+ },
15673
+ },
15674
+ multiple: {
15675
+ type: "boolean",
15676
+ },
15677
+ },
15678
+ required: ["prompt", "options"],
15678
15679
  },
15679
- helpType: {
15680
- type: "string",
15681
- description: "Help type, required when interactType is request_help.",
15682
- enum: ["request_login", "request_assistance"],
15680
+ request_help: {
15681
+ type: "object",
15682
+ properties: {
15683
+ helpType: {
15684
+ type: "string",
15685
+ description: "Display prompts to users",
15686
+ enum: ["request_login", "request_assistance"],
15687
+ },
15688
+ prompt: {
15689
+ type: "string",
15690
+ description: "Display prompts to users",
15691
+ },
15692
+ },
15693
+ required: ["helpType", "prompt"],
15683
15694
  },
15684
15695
  },
15685
- required: ["interactType", "prompt"],
15696
+ required: ["interactType"],
15686
15697
  };
15687
15698
  }
15688
15699
  async execute(args, agentContext) {
15689
15700
  let interactType = args.interactType;
15701
+ let interact = args[interactType];
15690
15702
  let callback = agentContext.context.config.callback;
15691
15703
  let resultText = "";
15692
15704
  if (callback) {
15693
15705
  switch (interactType) {
15694
15706
  case "confirm":
15695
15707
  if (callback.onHumanConfirm) {
15696
- let result = await callback.onHumanConfirm(agentContext, args.prompt);
15708
+ let result = await callback.onHumanConfirm(agentContext, interact.prompt);
15697
15709
  resultText = `confirm result: ${result ? "Yes" : "No"}`;
15698
15710
  }
15699
15711
  break;
15700
15712
  case "input":
15701
15713
  if (callback.onHumanInput) {
15702
- let result = await callback.onHumanInput(agentContext, args.prompt);
15714
+ let result = await callback.onHumanInput(agentContext, interact.prompt);
15703
15715
  resultText = `input result: ${result}`;
15704
15716
  }
15705
15717
  break;
15706
15718
  case "select":
15707
15719
  if (callback.onHumanSelect) {
15708
- let result = await callback.onHumanSelect(agentContext, args.prompt, (args.selectOptions || []), (args.selectMultiple || false));
15720
+ let result = await callback.onHumanSelect(agentContext, interact.prompt, interact.options, interact.multiple);
15709
15721
  resultText = `select result: ${JSON.stringify(result)}`;
15710
15722
  }
15711
15723
  break;
15712
15724
  case "request_help":
15713
15725
  if (callback.onHumanHelp) {
15714
- let result = await callback.onHumanHelp(agentContext, (args.helpType || "request_assistance"), args.prompt);
15726
+ let result = await callback.onHumanHelp(agentContext, interact.helpType, interact.prompt);
15715
15727
  resultText = `request_help result: ${result ? "Solved" : "Unresolved"}`;
15716
15728
  }
15717
15729
  break;
@@ -15885,7 +15897,7 @@ class WatchTriggerTool {
15885
15897
  };
15886
15898
  }
15887
15899
  async execute(args, agentContext) {
15888
- // TODO Listen for changes to the DOM or file, and execute nodes
15900
+ // TODO 监听 dom 文件 改变,执行节点
15889
15901
  return null;
15890
15902
  }
15891
15903
  }
@@ -15924,15 +15936,11 @@ UTC datetime: {datetime}
15924
15936
  </root>
15925
15937
  `;
15926
15938
  const HUMAN_PROMPT = `
15927
- * HUMAN INTERACT
15928
- During the task execution process, you can use the \`${TOOL_NAME$3}\` tool to interact with humans, please call it in the following situations:
15939
+ During the task execution process, you can use the \`${TOOL_NAME$3}\` tool to interact with humans. Please do not abuse this tool to harass humans. Please call it in the following situations:
15929
15940
  - When performing dangerous operations such as deleting files, confirmation from humans is required
15930
15941
  - When encountering obstacles while accessing websites, such as requiring user login, you need to request human assistance
15931
- - When requesting login, please only call the function when a login dialog box is clearly displayed.
15932
- - Try not to use the \`${TOOL_NAME$3}\` tool
15933
15942
  `;
15934
15943
  const VARIABLE_PROMPT = `
15935
- * VARIABLE STORAGE
15936
15944
  If you need to read and write the input/output variables in the node, require the use of the \`${TOOL_NAME$1}\` tool.
15937
15945
  `;
15938
15946
  const FOR_EACH_NODE = `
@@ -15957,18 +15965,14 @@ const WATCH_NODE = `
15957
15965
  const WATCH_PROMPT = `
15958
15966
  \`watch\`: monitor changes in webpage DOM or file content, when executing to the watch node, require the use of the \`${TOOL_NAME}\` tool.
15959
15967
  `;
15960
- function getAgentSystemPrompt(agent, agentNode, context, tools, extSysPrompt) {
15961
- let prompt = extSysPrompt || "";
15968
+ function getAgentSystemPrompt(agent, agentNode, context, systemPrompt) {
15969
+ let prompt = "";
15962
15970
  let nodePrompt = "";
15963
15971
  let agentNodeXml = agentNode.xml;
15964
- let hasWatch = agentNodeXml.indexOf("</watch>") > -1;
15965
15972
  let hasForEach = agentNodeXml.indexOf("</forEach>") > -1;
15966
- let hasHumanTool = (tools || agent.Tools).filter((tool) => tool.name == TOOL_NAME$3)
15967
- .length > 0;
15968
- let hasVariable = agentNodeXml.indexOf("input=") > -1 ||
15969
- agentNodeXml.indexOf("output=") > -1 ||
15970
- (tools || agent.Tools).filter((tool) => tool.name == TOOL_NAME$1)
15971
- .length > 0;
15973
+ let hasWatch = agentNodeXml.indexOf("</watch>") > -1;
15974
+ let hasVariable = agentNodeXml.indexOf(" input=") > -1 || agentNodeXml.indexOf(" output=") > -1;
15975
+ let hasHumanTool = agent.Tools.filter((tool) => tool.name == TOOL_NAME$3).length > 0;
15972
15976
  if (hasHumanTool) {
15973
15977
  prompt += HUMAN_PROMPT;
15974
15978
  }
@@ -15983,17 +15987,8 @@ function getAgentSystemPrompt(agent, agentNode, context, tools, extSysPrompt) {
15983
15987
  prompt += WATCH_PROMPT;
15984
15988
  nodePrompt += WATCH_NODE;
15985
15989
  }
15986
- if (context.chain.agents.length > 1) {
15987
- prompt += "\n Main task: " + context.chain.taskPrompt;
15988
- prompt += "\n# Pre-task execution results";
15989
- for (let i = 0; i < context.chain.agents.length; i++) {
15990
- let agentChain = context.chain.agents[i];
15991
- if (agentChain.agentResult) {
15992
- prompt += `\n## ${agentChain.agent.task || agentChain.agent.name}\n${sub(agentChain.agentResult, 500)}`;
15993
- }
15994
- }
15995
- }
15996
- return AGENT_SYSTEM_TEMPLATE.replace("{name}", config.name)
15990
+ return (systemPrompt || AGENT_SYSTEM_TEMPLATE)
15991
+ .replace("{name}", config.name)
15997
15992
  .replace("{agent}", agent.Name)
15998
15993
  .replace("{description}", agent.Description)
15999
15994
  .replace("{datetime}", new Date().toISOString())
@@ -16001,9 +15996,8 @@ function getAgentSystemPrompt(agent, agentNode, context, tools, extSysPrompt) {
16001
15996
  .replace("{nodePrompt}", nodePrompt)
16002
15997
  .trim();
16003
15998
  }
16004
- function getAgentUserPrompt(agent, agentNode, context, tools) {
16005
- let hasTaskNodeStatusTool = (tools || agent.Tools).filter((tool) => tool.name == TOOL_NAME$2)
16006
- .length > 0;
15999
+ function getAgentUserPrompt(agent, agentNode, context) {
16000
+ let hasTaskNodeStatusTool = agent.Tools.filter((tool) => tool.name == TOOL_NAME$2).length > 0;
16007
16001
  return buildAgentRootXml(agentNode.xml, context.chain.taskPrompt, (nodeId, node) => {
16008
16002
  if (hasTaskNodeStatusTool) {
16009
16003
  node.setAttribute("status", "todo");
@@ -16036,10 +16030,9 @@ class Agent {
16036
16030
  let loopNum = 0;
16037
16031
  let context = agentContext.context;
16038
16032
  let agentNode = agentContext.agentChain.agent;
16039
- const tools = [...this.tools, ...this.system_auto_tools(agentNode)];
16040
- let messages = await this.initMessages(agentContext, tools);
16033
+ let messages = this.initMessages(agentContext);
16041
16034
  let rlm = new RetryLanguageModel(context.config.llms, this.llms);
16042
- let agentTools = tools;
16035
+ let agentTools = [...this.tools, ...this.system_auto_tools(agentNode)];
16043
16036
  while (loopNum < maxReactNum) {
16044
16037
  context.checkAborted();
16045
16038
  if (mcpClient) {
@@ -16047,7 +16040,7 @@ class Agent {
16047
16040
  if (controlMcp.mcpTools) {
16048
16041
  let mcpTools = await this.listTools(agentNode, context, mcpClient, controlMcp.mcpParams);
16049
16042
  let usedTools = this.extractUsedTool(messages, agentTools);
16050
- let _agentTools = mergeTools(tools, usedTools);
16043
+ let _agentTools = mergeTools(this.tools, usedTools);
16051
16044
  agentTools = mergeTools(_agentTools, mcpTools);
16052
16045
  }
16053
16046
  }
@@ -16059,7 +16052,7 @@ class Agent {
16059
16052
  }
16060
16053
  loopNum++;
16061
16054
  }
16062
- return "Unfinished";
16055
+ return null;
16063
16056
  }
16064
16057
  async handleResult(agentContext, messages, agentTools, results) {
16065
16058
  let text = null;
@@ -16139,8 +16132,8 @@ class Agent {
16139
16132
  system_auto_tools(agentNode) {
16140
16133
  let tools = [];
16141
16134
  let agentNodeXml = agentNode.xml;
16142
- let hasVariable = agentNodeXml.indexOf("input=") > -1 ||
16143
- agentNodeXml.indexOf("output=") > -1;
16135
+ let hasVariable = agentNodeXml.indexOf(" input=") > -1 ||
16136
+ agentNodeXml.indexOf(" output=") > -1;
16144
16137
  if (hasVariable) {
16145
16138
  tools.push(new VariableStorageTool());
16146
16139
  }
@@ -16169,27 +16162,24 @@ class Agent {
16169
16162
  }
16170
16163
  return _results;
16171
16164
  }
16172
- async initMessages(agentContext, tools) {
16165
+ initMessages(agentContext) {
16173
16166
  let messages = [
16174
16167
  {
16175
16168
  role: "system",
16176
- content: getAgentSystemPrompt(this, agentContext.agentChain.agent, agentContext.context, tools, await this.extSysPrompt()),
16169
+ content: getAgentSystemPrompt(this, agentContext.agentChain.agent, agentContext.context),
16177
16170
  },
16178
16171
  {
16179
16172
  role: "user",
16180
16173
  content: [
16181
16174
  {
16182
16175
  type: "text",
16183
- text: getAgentUserPrompt(this, agentContext.agentChain.agent, agentContext.context, tools),
16176
+ text: getAgentUserPrompt(this, agentContext.agentChain.agent, agentContext.context),
16184
16177
  },
16185
16178
  ],
16186
16179
  },
16187
16180
  ];
16188
16181
  return messages;
16189
16182
  }
16190
- async extSysPrompt() {
16191
- return "";
16192
- }
16193
16183
  async listTools(agentNode, context, mcpClient, mcpParams) {
16194
16184
  let list = await mcpClient.listTools({
16195
16185
  taskId: context.taskId,
@@ -16225,7 +16215,7 @@ class Agent {
16225
16215
  nodeId: agentContext.agentChain.agent.id,
16226
16216
  environment: config.platform,
16227
16217
  agent_name: agentContext.agent.Name,
16228
- },
16218
+ }
16229
16219
  });
16230
16220
  },
16231
16221
  };
@@ -16253,7 +16243,7 @@ class Agent {
16253
16243
  let message = messages[i];
16254
16244
  if (message.role == "tool") {
16255
16245
  for (let j = 0; j < message.content.length; j++) {
16256
- let toolName = message.content[j].toolName;
16246
+ let toolName = message.content[i].toolName;
16257
16247
  if (toolNames.indexOf(toolName) > -1) {
16258
16248
  continue;
16259
16249
  }
@@ -16297,17 +16287,12 @@ class Agent {
16297
16287
  else if (!isError && text.length == 0) {
16298
16288
  text = "Successful";
16299
16289
  }
16300
- let contentText = {
16301
- type: "text",
16302
- text: text,
16303
- };
16304
- let result = text;
16290
+ let result = { result: text };
16305
16291
  if (text &&
16306
16292
  ((text.startsWith("{") && text.endsWith("}")) ||
16307
16293
  (text.startsWith("[") && text.endsWith("]")))) {
16308
16294
  try {
16309
16295
  result = JSON.parse(text);
16310
- contentText = null;
16311
16296
  }
16312
16297
  catch (e) { }
16313
16298
  }
@@ -16316,7 +16301,6 @@ class Agent {
16316
16301
  toolCallId: toolUse.toolCallId,
16317
16302
  toolName: toolUse.toolName,
16318
16303
  result: result,
16319
- content: contentText ? [contentText] : undefined,
16320
16304
  isError: isError,
16321
16305
  };
16322
16306
  }
@@ -16591,7 +16575,7 @@ Your task is to understand the user's requirements, dynamically plan the user's
16591
16575
  2. Analyze the Agents that need to be used based on the user's requirements.
16592
16576
  3. Generate the Agent calling plan based on the analysis results.
16593
16577
  4. About agent name, please do not arbitrarily fabricate non-existent agent names.
16594
- 5. You only need to provide the steps to complete the user's task, key steps only, no need to be too detailed.
16578
+ 5. You only need to provide the steps to complete the user's task, steps are simple and straightforward, no need for too many specific details.
16595
16579
  6. Please strictly follow the output format and example output.
16596
16580
  7. The output language should follow the language corresponding to the user's task.
16597
16581
 
@@ -16742,22 +16726,15 @@ const PLAN_USER_TEMPLATE = `
16742
16726
  User Platform: {platform}
16743
16727
  Task Description: {taskPrompt}
16744
16728
  `;
16745
- const PLAN_USER_TASK_WEBSITE_TEMPLATE = `
16746
- User Platform: {platform}
16747
- Task Website: {task_website}
16748
- Task Description: {taskPrompt}
16749
- `;
16750
16729
  function getPlanSystemPrompt(agents) {
16751
16730
  let agents_prompt = agents
16752
16731
  .map((agent) => {
16753
16732
  return (`<agent name="${agent.Name}">\n` +
16754
16733
  `Description: ${agent.PlanDescription || agent.Description}\nTools:\n` +
16755
- agent.Tools.filter((tool) => !tool.noPlan)
16756
- .map((tool) => `- ${tool.name}: ${tool.planDescription || tool.description || ""}`)
16757
- .join("\n") +
16734
+ agent.Tools.map((tool) => `- ${tool.name}: ${tool.description || ""}`).join("\n") +
16758
16735
  `\n</agent>`);
16759
16736
  })
16760
- .join("\n\n");
16737
+ .join("\n");
16761
16738
  let example_prompt = "";
16762
16739
  let hasChatAgent = agents.filter((a) => a.Name == AGENT_NAME$4).length > 0;
16763
16740
  const example_list = hasChatAgent
@@ -16772,18 +16749,10 @@ function getPlanSystemPrompt(agents) {
16772
16749
  .replace("{example_prompt}", example_prompt)
16773
16750
  .trim();
16774
16751
  }
16775
- function getPlanUserPrompt(taskPrompt, task_website) {
16776
- if (task_website) {
16777
- return PLAN_USER_TASK_WEBSITE_TEMPLATE.replace("{taskPrompt}", taskPrompt)
16778
- .replace("{platform}", config.platform)
16779
- .replace("{task_website}", task_website)
16780
- .trim();
16781
- }
16782
- else {
16783
- return PLAN_USER_TEMPLATE.replace("{taskPrompt}", taskPrompt)
16784
- .replace("{platform}", config.platform)
16785
- .trim();
16786
- }
16752
+ function getPlanUserPrompt(taskPrompt) {
16753
+ return PLAN_USER_TEMPLATE.replace("{taskPrompt}", taskPrompt)
16754
+ .replace("{platform}", config.platform)
16755
+ .trim();
16787
16756
  }
16788
16757
 
16789
16758
  class Planner {
@@ -16812,7 +16781,7 @@ class Planner {
16812
16781
  {
16813
16782
  role: "user",
16814
16783
  content: [{ type: "text", text: taskPrompt }],
16815
- },
16784
+ }
16816
16785
  ];
16817
16786
  }
16818
16787
  else {
@@ -16820,17 +16789,12 @@ class Planner {
16820
16789
  { role: "system", content: getPlanSystemPrompt(this.context.agents) },
16821
16790
  {
16822
16791
  role: "user",
16823
- content: [
16824
- {
16825
- type: "text",
16826
- text: getPlanUserPrompt(taskPrompt, this.context.variables.get("task_website")),
16827
- },
16828
- ],
16792
+ content: [{ type: "text", text: getPlanUserPrompt(taskPrompt) }],
16829
16793
  },
16830
16794
  ];
16831
16795
  }
16832
16796
  let request = {
16833
- maxTokens: 4096,
16797
+ maxTokens: 1024,
16834
16798
  temperature: 0.7,
16835
16799
  messages: messages,
16836
16800
  abortSignal: this.context.controller.signal,
@@ -16934,33 +16898,30 @@ class Eko {
16934
16898
  throw new Error("The task does not exist");
16935
16899
  }
16936
16900
  try {
16937
- return await this.doRunWorkflow(context);
16901
+ return this.doRunWorkflow(context);
16938
16902
  }
16939
16903
  catch (e) {
16940
16904
  return {
16941
- taskId,
16942
16905
  success: false,
16943
16906
  stopReason: e?.name == "AbortError" ? "abort" : "error",
16944
16907
  result: e,
16945
16908
  };
16946
16909
  }
16910
+ finally {
16911
+ this.deleteTask(taskId);
16912
+ }
16947
16913
  }
16948
16914
  async run(taskPrompt, taskId = uuidv4(), contextParams) {
16949
16915
  await this.generate(taskPrompt, taskId, contextParams);
16950
16916
  return await this.execute(taskId);
16951
16917
  }
16952
16918
  async initContext(workflow, contextParams) {
16953
- const agents = this.config.agents || [];
16919
+ const agents = [...(this.config.agents || [])];
16954
16920
  let chain = new Chain(workflow.taskPrompt || workflow.name);
16955
16921
  let context = new Context(workflow.taskId, this.config, agents, chain);
16956
- if (this.config.a2aClient) {
16957
- let a2aList = await this.config.a2aClient.listAgents(workflow.taskPrompt || workflow.name);
16958
- context.agents = mergeAgents(context.agents, a2aList);
16959
- }
16960
16922
  if (contextParams) {
16961
16923
  Object.keys(contextParams).forEach((key) => context.variables.set(key, contextParams[key]));
16962
16924
  }
16963
- context.workflow = workflow;
16964
16925
  this.taskMap.set(workflow.taskId, context);
16965
16926
  return context;
16966
16927
  }
@@ -16974,7 +16935,7 @@ class Eko {
16974
16935
  map[item.Name] = item;
16975
16936
  return map;
16976
16937
  }, {});
16977
- let results = [];
16938
+ let lastResult;
16978
16939
  for (let i = 0; i < workflow.agents.length; i++) {
16979
16940
  context.checkAborted();
16980
16941
  let agentNode = workflow.agents[i];
@@ -16985,21 +16946,18 @@ class Eko {
16985
16946
  let agentChain = new AgentChain(agentNode);
16986
16947
  context.chain.push(agentChain);
16987
16948
  agent.result = await agent.run(context, agentChain);
16988
- results.push(agent.result);
16949
+ lastResult = agent.result;
16989
16950
  }
16951
+ // TODO 超过2个Agent时需要summary输出结果。
16990
16952
  return {
16991
16953
  success: true,
16992
16954
  stopReason: "done",
16993
- result: results[results.length - 1],
16994
- taskId: context.taskId,
16955
+ result: lastResult,
16995
16956
  };
16996
16957
  }
16997
16958
  getTask(taskId) {
16998
16959
  return this.taskMap.get(taskId);
16999
16960
  }
17000
- getAllTaskId() {
17001
- return [...this.taskMap.keys()];
17002
- }
17003
16961
  deleteTask(taskId) {
17004
16962
  return this.taskMap.delete(taskId);
17005
16963
  }
@@ -17244,17 +17202,18 @@ function parseChunk(chunk) {
17244
17202
 
17245
17203
  const AGENT_NAME$3 = "File";
17246
17204
  class BaseFileAgent extends Agent {
17247
- constructor(work_path, llms, ext_tools, mcpClient, planDescription) {
17205
+ constructor(work_path, llms, ext_tools, mcpClient) {
17248
17206
  const _tools_ = [];
17249
- const prompt = work_path ? `Your default working path is: ${work_path}` : "";
17207
+ const prompt = work_path
17208
+ ? `Your default working path is: ${work_path}`
17209
+ : "";
17250
17210
  super({
17251
17211
  name: AGENT_NAME$3,
17252
17212
  description: `You are a file agent, handling file-related tasks such as creating, finding, reading, modifying files, etc.${prompt}`,
17253
17213
  tools: _tools_,
17254
17214
  llms: llms,
17255
17215
  mcpClient: mcpClient,
17256
- planDescription: planDescription ||
17257
- "File operation agent, handling file-related tasks such as creating, finding, reading, modifying files, etc, only text file writing is supported.",
17216
+ planDescription: "File operation agent, handling file-related tasks such as creating, finding, reading, modifying files, etc.",
17258
17217
  });
17259
17218
  let init_tools = this.buildInitTools();
17260
17219
  if (ext_tools && ext_tools.length > 0) {
@@ -17300,7 +17259,7 @@ class BaseFileAgent extends Agent {
17300
17259
  },
17301
17260
  {
17302
17261
  name: "file_write",
17303
- description: "Overwrite or append content to a file. Use for creating new files, appending content, or modifying existing files, only supports txt/md/csv or other text formats.",
17262
+ description: "Overwrite or append content to a file. Use for creating new files, appending content, or modifying existing files.",
17304
17263
  parameters: {
17305
17264
  type: "object",
17306
17265
  properties: {
@@ -17376,7 +17335,7 @@ class BaseFileAgent extends Agent {
17376
17335
 
17377
17336
  const AGENT_NAME$2 = "Shell";
17378
17337
  class BaseShellAgent extends Agent {
17379
- constructor(llms, ext_tools, mcpClient, planDescription) {
17338
+ constructor(llms, ext_tools, mcpClient) {
17380
17339
  const _tools_ = [];
17381
17340
  super({
17382
17341
  name: AGENT_NAME$2,
@@ -17384,7 +17343,7 @@ class BaseShellAgent extends Agent {
17384
17343
  tools: _tools_,
17385
17344
  llms: llms,
17386
17345
  mcpClient: mcpClient,
17387
- planDescription: planDescription || "Shell command agent, use to execute shell commands.",
17346
+ planDescription: "Shell command agent, use to execute shell commands.",
17388
17347
  });
17389
17348
  let init_tools = this.buildInitTools();
17390
17349
  if (ext_tools && ext_tools.length > 0) {
@@ -17458,55 +17417,34 @@ class BaseTimerAgent extends Agent {
17458
17417
 
17459
17418
  const AGENT_NAME$1 = "Computer";
17460
17419
  class BaseComputerAgent extends Agent {
17461
- constructor(llms, ext_tools, mcpClient, keyboardKeys) {
17420
+ constructor(llms, ext_tools, mcpClient) {
17462
17421
  const _tools_ = [];
17463
17422
  super({
17464
17423
  name: AGENT_NAME$1,
17465
- description: `You are a computer operation agent, who interacts with the computer using mouse and keyboard, completing specified tasks step by step based on the given tasks and screenshots. After each of your operations, you will receive the latest computer screenshot to evaluate the task execution status.
17466
- This is a computer GUI interface, observe the execution through screenshots, and specify action sequences to complete designated tasks.
17467
- * COMPUTER OPERATIONS:
17468
- - You can operate the application using shortcuts.
17469
- - If stuck, try alternative approaches`,
17424
+ description: "You are a computer operation agent, who interacts with the computer using mouse and keyboard, completing specified tasks step by step based on the given tasks and screenshots. After each of your operations, you will receive the latest computer screenshot to evaluate the task execution status.",
17470
17425
  tools: _tools_,
17471
17426
  llms: llms,
17472
17427
  mcpClient: mcpClient,
17473
- planDescription: "Computer operation agent, interact with the computer using the mouse and keyboard, operation application."
17428
+ planDescription: "Computer operation agent, interact with the computer using the mouse and keyboard."
17474
17429
  });
17475
- if (!keyboardKeys) {
17476
- if (config.platform == "windows") {
17477
- keyboardKeys = [
17478
- 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
17479
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
17480
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
17481
- 'enter', 'esc', 'backspace', 'tab', 'space', 'delete',
17482
- 'ctrl', 'alt', 'shift', 'win',
17483
- 'up', 'down', 'left', 'right',
17484
- 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12',
17485
- 'ctrl+c', 'ctrl+v', 'ctrl+x', 'ctrl+z', 'ctrl+a', 'ctrl+s',
17486
- 'alt+tab', 'alt+f4', 'ctrl+alt+delete'
17487
- ];
17488
- }
17489
- else {
17490
- keyboardKeys = [
17491
- 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
17492
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
17493
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
17494
- 'enter', 'esc', 'backspace', 'tab', 'space', 'delete',
17495
- 'command', 'option', 'shift', 'control',
17496
- 'up', 'down', 'left', 'right',
17497
- 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12',
17498
- 'command+c', 'command+v', 'command+x', 'command+z', 'command+a', 'command+s',
17499
- 'command+tab', 'command+q', 'command+escape'
17500
- ];
17501
- }
17502
- }
17503
- let init_tools = this.buildInitTools(keyboardKeys);
17430
+ this.keyboardKeys = [
17431
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
17432
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
17433
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
17434
+ 'enter', 'esc', 'backspace', 'tab', 'space', 'delete',
17435
+ 'ctrl', 'alt', 'shift', 'win',
17436
+ 'up', 'down', 'left', 'right',
17437
+ 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12',
17438
+ 'ctrl+c', 'ctrl+v', 'ctrl+x', 'ctrl+z', 'ctrl+a', 'ctrl+s',
17439
+ 'alt+tab', 'alt+f4', 'ctrl+alt+delete'
17440
+ ];
17441
+ let init_tools = this.buildInitTools();
17504
17442
  if (ext_tools && ext_tools.length > 0) {
17505
17443
  init_tools = mergeTools(init_tools, ext_tools);
17506
17444
  }
17507
17445
  init_tools.forEach((tool) => _tools_.push(tool));
17508
17446
  }
17509
- buildInitTools(keyboardKeys) {
17447
+ buildInitTools() {
17510
17448
  return [
17511
17449
  {
17512
17450
  name: "typing",
@@ -17587,22 +17525,15 @@ This is a computer GUI interface, observe the execution through screenshots, and
17587
17525
  properties: {
17588
17526
  amount: {
17589
17527
  type: "number",
17590
- description: "Scroll amount (up / down)",
17591
- minimum: 1,
17528
+ description: "Scroll amount (positive for up, negative for down)",
17529
+ minimum: -10,
17592
17530
  maximum: 10,
17593
17531
  },
17594
- direction: {
17595
- type: "string",
17596
- enum: ["up", "down"],
17597
- },
17598
17532
  },
17599
- required: ["amount", "direction"],
17533
+ required: ["amount"],
17600
17534
  },
17601
17535
  execute: async (args, agentContext) => {
17602
- return await this.callInnerTool(async () => {
17603
- let amount = args.amount;
17604
- await this.scroll(agentContext, args.direction == "up" ? -amount : amount);
17605
- });
17536
+ return await this.callInnerTool(() => this.scroll(agentContext, args.amount));
17606
17537
  },
17607
17538
  },
17608
17539
  {
@@ -17614,7 +17545,7 @@ This is a computer GUI interface, observe the execution through screenshots, and
17614
17545
  key: {
17615
17546
  type: "string",
17616
17547
  description: "Key to press",
17617
- enum: keyboardKeys,
17548
+ enum: this.keyboardKeys,
17618
17549
  },
17619
17550
  },
17620
17551
  required: ["key"],
@@ -17632,7 +17563,7 @@ This is a computer GUI interface, observe the execution through screenshots, and
17632
17563
  keys: {
17633
17564
  type: "string",
17634
17565
  description: "Key combination to press",
17635
- enum: keyboardKeys,
17566
+ enum: this.keyboardKeys,
17636
17567
  },
17637
17568
  },
17638
17569
  required: ["keys"],
@@ -17672,23 +17603,20 @@ This is a computer GUI interface, observe the execution through screenshots, and
17672
17603
  },
17673
17604
  {
17674
17605
  name: "wait",
17675
- noPlan: true,
17676
17606
  description: "Wait for specified duration",
17677
17607
  parameters: {
17678
17608
  type: "object",
17679
17609
  properties: {
17680
17610
  duration: {
17681
17611
  type: "number",
17682
- description: "Duration in millisecond",
17683
- default: 500,
17684
- minimum: 200,
17685
- maximum: 2000,
17612
+ description: "Duration in seconds",
17613
+ default: 0.5,
17686
17614
  },
17687
17615
  },
17688
17616
  required: ["duration"],
17689
17617
  },
17690
17618
  execute: async (args, agentContext) => {
17691
- return await this.callInnerTool(() => sleep((args.duration || 200)));
17619
+ return await this.callInnerTool(() => sleep((args.duration || 0.5) * 1000));
17692
17620
  },
17693
17621
  },
17694
17622
  ];
@@ -17697,7 +17625,7 @@ This is a computer GUI interface, observe the execution through screenshots, and
17697
17625
  let lastMessage = messages[messages.length - 1];
17698
17626
  if (lastMessage.role == "tool" &&
17699
17627
  lastMessage.content.filter((t) => t.type == "tool-result").length > 0) {
17700
- await sleep(300);
17628
+ await sleep(200);
17701
17629
  let result = await this.screenshot(agentContext);
17702
17630
  let image = toImage(result.imageBase64);
17703
17631
  messages.push({
@@ -17732,15 +17660,12 @@ This is a computer GUI interface, observe the execution through screenshots, and
17732
17660
 
17733
17661
  class BaseBrowserAgent extends Agent {
17734
17662
  async go_back(agentContext) {
17735
- try {
17736
- await this.execute_script(agentContext, () => {
17737
- window.navigation.back();
17738
- }, []);
17739
- await sleep(100);
17740
- }
17741
- catch (e) { }
17663
+ await this.execute_script(agentContext, () => {
17664
+ return window.navigation.back();
17665
+ }, []);
17666
+ await sleep(200);
17742
17667
  }
17743
- async extract_content(agentContext, variable_name) {
17668
+ async extract_content(agentContext) {
17744
17669
  let content = await this.execute_script(agentContext, () => {
17745
17670
  return window.document.body.innerText
17746
17671
  .replaceAll(/\n+/g, "\n")
@@ -17748,37 +17673,19 @@ class BaseBrowserAgent extends Agent {
17748
17673
  .trim();
17749
17674
  }, []);
17750
17675
  let pageInfo = await this.get_current_page(agentContext);
17751
- let result = `title: ${pageInfo.title}\npage_url: ${pageInfo.url}\npage_content: \n${content}`;
17752
- if (variable_name) {
17753
- agentContext.context.variables.set(variable_name, result);
17754
- }
17755
- return result;
17676
+ return `title: ${pageInfo.title}\npage_url: ${pageInfo.url}\npage_content: \n${content}`;
17756
17677
  }
17757
17678
  async controlMcpTools(agentContext, messages, loopNum) {
17758
- if (loopNum > 0) {
17759
- let url = null;
17760
- try {
17761
- url = (await this.get_current_page(agentContext)).url;
17762
- }
17763
- catch (e) { }
17764
- let lastUrl = agentContext.variables.get("lastUrl");
17765
- agentContext.variables.set("lastUrl", url);
17766
- return {
17767
- mcpTools: loopNum == 0 || url != lastUrl,
17768
- mcpParams: {
17769
- environment: "browser",
17770
- browser_url: url,
17771
- },
17772
- };
17773
- }
17774
- else {
17775
- return {
17776
- mcpTools: true,
17777
- mcpParams: {
17778
- environment: "browser",
17779
- },
17780
- };
17781
- }
17679
+ let url = (await this.get_current_page(agentContext)).url;
17680
+ let lastUrl = agentContext.variables.get("lastUrl");
17681
+ agentContext.variables.set("lastUrl", url);
17682
+ return {
17683
+ mcpTools: loopNum == 0 || url != lastUrl,
17684
+ mcpParams: {
17685
+ environment: "browser",
17686
+ browser_url: url,
17687
+ },
17688
+ };
17782
17689
  }
17783
17690
  toolExecuter(mcpClient, name) {
17784
17691
  return {
@@ -17792,7 +17699,7 @@ class BaseBrowserAgent extends Agent {
17792
17699
  environment: "browser",
17793
17700
  agent_name: agentContext.agent.Name,
17794
17701
  browser_url: agentContext.variables.get("lastUrl"),
17795
- },
17702
+ }
17796
17703
  });
17797
17704
  if (result.extInfo &&
17798
17705
  result.extInfo["javascript"] &&
@@ -17832,42 +17739,6 @@ class BaseBrowserAgent extends Agent {
17832
17739
  };
17833
17740
  }, []);
17834
17741
  }
17835
- lastToolResult(messages) {
17836
- let lastMessage = messages[messages.length - 1];
17837
- if (lastMessage.role != "tool") {
17838
- return null;
17839
- }
17840
- let toolResult = lastMessage.content.filter((t) => t.type == "tool-result")[0];
17841
- if (!toolResult) {
17842
- return null;
17843
- }
17844
- let result = toolResult.result;
17845
- let isError = toolResult.isError;
17846
- for (let i = messages.length - 2; i > 0; i--) {
17847
- if (messages[i].role !== "assistant" ||
17848
- typeof messages[i].content == "string") {
17849
- continue;
17850
- }
17851
- for (let j = 0; j < messages[i].content.length; j++) {
17852
- let content = messages[i].content[j];
17853
- if (typeof content !== "string" && content.type !== "tool-call") {
17854
- continue;
17855
- }
17856
- let toolUse = content;
17857
- if (toolResult.toolCallId != toolUse.toolCallId) {
17858
- continue;
17859
- }
17860
- return {
17861
- id: toolResult.toolCallId,
17862
- toolName: toolUse.toolName,
17863
- args: toolUse.args,
17864
- result,
17865
- isError,
17866
- };
17867
- }
17868
- }
17869
- return null;
17870
- }
17871
17742
  async execute_mcp_script(agentContext, script) {
17872
17743
  return;
17873
17744
  }
@@ -17894,7 +17765,7 @@ function run_build_dom_tree() {
17894
17765
  return window.clickable_elements[highlightIndex];
17895
17766
  }
17896
17767
  function remove_highlight() {
17897
- let highlight = document.getElementById('eko-highlight-container');
17768
+ let highlight = document.getElementById('playwright-highlight-container');
17898
17769
  if (highlight) {
17899
17770
  highlight.remove();
17900
17771
  }
@@ -17954,10 +17825,6 @@ function run_build_dom_tree() {
17954
17825
  for (let i = 0; i < includeAttributes.length; i++) {
17955
17826
  let key = includeAttributes[i];
17956
17827
  let value = node.attributes[key];
17957
- if (key == "class" && value && value.length > 30) {
17958
- let classList = value.split(" ").slice(0, 3);
17959
- value = classList.join(" ");
17960
- }
17961
17828
  if (key && value) {
17962
17829
  attributes_str += ` ${key}="${value}"`;
17963
17830
  }
@@ -18036,10 +17903,10 @@ function run_build_dom_tree() {
18036
17903
  let highlightIndex = 0; // Reset highlight index
18037
17904
  function highlightElement(element, index, parentIframe = null) {
18038
17905
  // Create or get highlight container
18039
- let container = document.getElementById('eko-highlight-container');
17906
+ let container = document.getElementById('playwright-highlight-container');
18040
17907
  if (!container) {
18041
17908
  container = document.createElement('div');
18042
- container.id = 'eko-highlight-container';
17909
+ container.id = 'playwright-highlight-container';
18043
17910
  container.style.position = 'fixed';
18044
17911
  container.style.pointerEvents = 'none';
18045
17912
  container.style.top = '0';
@@ -18092,7 +17959,7 @@ function run_build_dom_tree() {
18092
17959
  overlay.style.height = `${rect.height}px`;
18093
17960
  // Create label
18094
17961
  const label = document.createElement('div');
18095
- label.className = 'eko-highlight-label';
17962
+ label.className = 'playwright-highlight-label';
18096
17963
  label.style.position = 'absolute';
18097
17964
  label.style.background = baseColor;
18098
17965
  label.style.color = 'white';
@@ -18126,7 +17993,7 @@ function run_build_dom_tree() {
18126
17993
  container.appendChild(overlay);
18127
17994
  container.appendChild(label);
18128
17995
  // Store reference for cleanup
18129
- element.setAttribute('eko-user-highlight-id', `eko-highlight-${index}`);
17996
+ element.setAttribute('browser-user-highlight-id', `playwright-highlight-${index}`);
18130
17997
  return index + 1;
18131
17998
  }
18132
17999
  // Helper function to generate XPath as a tree
@@ -18471,17 +18338,15 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18471
18338
  - Screenshot help verify element positions and relationships. Labels may sometimes overlap, so extracted elements are used to verify the correct elements.
18472
18339
  - In addition to screenshot, simplified information about interactive elements is returned, with element indexes corresponding to those in the screenshot.
18473
18340
  - This tool can ONLY screenshot the VISIBLE content. If a complete content is required, use 'extract_content' instead.
18474
- - If the webpage content hasn't loaded, please use the \`wait\` tool to allow time for the content to load.
18475
18341
  * ELEMENT INTERACTION:
18476
18342
  - Only use indexes that exist in the provided element list
18477
18343
  - Each element has a unique index number (e.g., "[33]:<button>")
18478
18344
  - Elements marked with "[]:" are non-interactive (for context only)
18479
- * ERROR HANDLING:
18345
+ * NAVIGATION & ERROR HANDLING:
18480
18346
  - If no suitable elements exist, use other functions to complete the task
18481
- - If stuck, try alternative approaches, don't refuse tasks
18347
+ - If stuck, try alternative approaches
18482
18348
  - Handle popups/cookies by accepting or closing them
18483
- - Use scroll to find elements you are looking for
18484
- - When extracting content, prioritize using extract_content, only scroll when you need to load more content`;
18349
+ - Use scroll to find elements you are looking for`;
18485
18350
  const _tools_ = [];
18486
18351
  super({
18487
18352
  name: AGENT_NAME,
@@ -18499,9 +18364,6 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18499
18364
  }
18500
18365
  async input_text(agentContext, index, text, enter) {
18501
18366
  await this.execute_script(agentContext, typing, [{ index, text, enter }]);
18502
- if (enter) {
18503
- await sleep(200);
18504
- }
18505
18367
  }
18506
18368
  async click_element(agentContext, index, num_clicks, button) {
18507
18369
  await this.execute_script(agentContext, do_click, [
@@ -18518,32 +18380,18 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18518
18380
  }
18519
18381
  async scroll_mouse_wheel(agentContext, amount) {
18520
18382
  await this.execute_script(agentContext, (amount) => {
18521
- let viewportHeight = window.innerHeight ||
18522
- document.documentElement.clientHeight ||
18523
- document.body.clientHeight;
18524
- let y = Math.max(20, Math.min(viewportHeight / 10, 200));
18525
- window.scrollBy(0, y * amount);
18383
+ window.scrollBy(0, amount * 50);
18526
18384
  }, [amount]);
18527
18385
  await sleep(200);
18528
18386
  }
18529
18387
  async hover_to_element(agentContext, index) {
18530
18388
  await this.execute_script(agentContext, hover_to, [{ index }]);
18531
18389
  }
18532
- async get_select_options(agentContext, index) {
18533
- return await this.execute_script(agentContext, get_select_options, [
18534
- { index },
18535
- ]);
18536
- }
18537
- async select_option(agentContext, index, option) {
18538
- return await this.execute_script(agentContext, select_option, [
18539
- { index, option },
18540
- ]);
18541
- }
18542
18390
  async screenshot_and_html(agentContext) {
18543
18391
  try {
18544
18392
  let element_result = null;
18545
18393
  for (let i = 0; i < 5; i++) {
18546
- await sleep(200);
18394
+ await sleep(300);
18547
18395
  await this.execute_script(agentContext, run_build_dom_tree, []);
18548
18396
  element_result = (await this.execute_script(agentContext, () => {
18549
18397
  return window.get_clickable_elements(true);
@@ -18552,9 +18400,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18552
18400
  break;
18553
18401
  }
18554
18402
  }
18555
- await sleep(50);
18556
18403
  let screenshot = await this.screenshot(agentContext);
18557
- // agentContext.variables.set("selector_map", element_result.selector_map);
18558
18404
  let pseudoHtml = element_result.element_str;
18559
18405
  return {
18560
18406
  imageBase64: screenshot.imageBase64,
@@ -18692,22 +18538,15 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18692
18538
  properties: {
18693
18539
  amount: {
18694
18540
  type: "number",
18695
- description: "Scroll amount (up / down)",
18696
- minimum: 1,
18541
+ description: "Scroll amount (positive for up, negative for down)",
18542
+ minimum: -10,
18697
18543
  maximum: 10,
18698
18544
  },
18699
- direction: {
18700
- type: "string",
18701
- enum: ["up", "down"],
18702
- },
18703
18545
  },
18704
- required: ["amount", "direction"],
18546
+ required: ["amount"],
18705
18547
  },
18706
18548
  execute: async (args, agentContext) => {
18707
- return await this.callInnerTool(async () => {
18708
- let amount = args.amount;
18709
- await this.scroll_mouse_wheel(agentContext, args.direction == "up" ? -amount : amount);
18710
- });
18549
+ return await this.callInnerTool(() => this.scroll_mouse_wheel(agentContext, args.amount));
18711
18550
  },
18712
18551
  },
18713
18552
  {
@@ -18729,7 +18568,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18729
18568
  },
18730
18569
  {
18731
18570
  name: "extract_content",
18732
- description: "Extract the text content of the current webpage, obtain webpage data through this tool.",
18571
+ description: "Extract the text content of the current webpage.",
18733
18572
  parameters: {
18734
18573
  type: "object",
18735
18574
  properties: {},
@@ -18738,102 +18577,31 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
18738
18577
  return await this.callInnerTool(() => this.extract_content(agentContext));
18739
18578
  },
18740
18579
  },
18741
- {
18742
- name: "get_select_options",
18743
- description: "Get all options from a native dropdown element",
18744
- parameters: {
18745
- type: "object",
18746
- properties: {
18747
- index: {
18748
- type: "number",
18749
- description: "The index of the element to select",
18750
- },
18751
- },
18752
- required: ["index"],
18753
- },
18754
- execute: async (args, agentContext) => {
18755
- return await this.callInnerTool(() => this.get_select_options(agentContext, args.index));
18756
- },
18757
- },
18758
- {
18759
- name: "select_option",
18760
- description: "Select the native dropdown option",
18761
- parameters: {
18762
- type: "object",
18763
- properties: {
18764
- index: {
18765
- type: "number",
18766
- description: "The index of the element to select",
18767
- },
18768
- option: {
18769
- type: "string",
18770
- description: "Text option",
18771
- },
18772
- },
18773
- required: ["index", "option"],
18774
- },
18775
- execute: async (args, agentContext) => {
18776
- return await this.callInnerTool(() => this.select_option(agentContext, args.index, args.option));
18777
- },
18778
- },
18779
- {
18780
- name: "get_all_tabs",
18781
- description: "Get all tabs of the current browser",
18782
- parameters: {
18783
- type: "object",
18784
- properties: {},
18785
- },
18786
- execute: async (args, agentContext) => {
18787
- return await this.callInnerTool(() => this.get_all_tabs(agentContext));
18788
- },
18789
- },
18790
- {
18791
- name: "switch_tab",
18792
- description: "Switch to the specified tab page",
18793
- parameters: {
18794
- type: "object",
18795
- properties: {
18796
- tabId: {
18797
- type: "number",
18798
- description: "Tab ID, obtained through get_all_tabs",
18799
- },
18800
- },
18801
- required: ["tabId"],
18802
- },
18803
- execute: async (args, agentContext) => {
18804
- return await this.callInnerTool(() => this.switch_tab(agentContext, args.tabId));
18805
- },
18806
- },
18807
18580
  {
18808
18581
  name: "wait",
18809
- noPlan: true,
18810
18582
  description: "Wait for specified duration",
18811
18583
  parameters: {
18812
18584
  type: "object",
18813
18585
  properties: {
18814
18586
  duration: {
18815
18587
  type: "number",
18816
- description: "Duration in millisecond",
18817
- default: 500,
18818
- minimum: 200,
18819
- maximum: 2000,
18588
+ description: "Duration in seconds",
18589
+ default: 0.5,
18820
18590
  },
18821
18591
  },
18822
18592
  required: ["duration"],
18823
18593
  },
18824
18594
  execute: async (args, agentContext) => {
18825
- return await this.callInnerTool(() => sleep((args.duration || 200)));
18595
+ return await this.callInnerTool(() => sleep((args.duration || 0.5) * 1000));
18826
18596
  },
18827
18597
  },
18828
18598
  ];
18829
18599
  }
18830
18600
  async handleMessages(agentContext, messages) {
18831
- let lastTool = this.lastToolResult(messages);
18832
- if (lastTool &&
18833
- lastTool.toolName !== "extract_content" &&
18834
- lastTool.toolName !== "get_all_tabs" &&
18835
- lastTool.toolName !== "variable_storage") {
18836
- await sleep(300);
18601
+ let lastMessage = messages[messages.length - 1];
18602
+ if (lastMessage.role == "tool" &&
18603
+ lastMessage.content.filter((t) => t.type == "tool-result").length > 0) {
18604
+ await sleep(200);
18837
18605
  let result = await this.screenshot_and_html(agentContext);
18838
18606
  let image = toImage(result.imageBase64);
18839
18607
  messages.push({
@@ -18890,10 +18658,6 @@ function typing(params) {
18890
18658
  }
18891
18659
  else {
18892
18660
  input.value = text;
18893
- if (input.__proto__) {
18894
- let value_setter = Object.getOwnPropertyDescriptor(input.__proto__, "value")?.set;
18895
- value_setter && value_setter.call(input, text);
18896
- }
18897
18661
  }
18898
18662
  input.dispatchEvent(new Event("input", { bubbles: true }));
18899
18663
  if (enter) {
@@ -18953,45 +18717,6 @@ function hover_to(params) {
18953
18717
  element.dispatchEvent(event);
18954
18718
  return true;
18955
18719
  }
18956
- function get_select_options(params) {
18957
- let element = window.get_highlight_element(params.index);
18958
- if (!element || element.tagName.toUpperCase() !== "SELECT") {
18959
- return "Error: Not a select element";
18960
- }
18961
- return {
18962
- options: Array.from(element.options).map((opt) => ({
18963
- index: opt.index,
18964
- text: opt.text.trim(),
18965
- value: opt.value,
18966
- })),
18967
- name: element.name,
18968
- };
18969
- }
18970
- function select_option(params) {
18971
- let element = window.get_highlight_element(params.index);
18972
- if (!element || element.tagName.toUpperCase() !== "SELECT") {
18973
- return "Error: Not a select element";
18974
- }
18975
- let text = params.option.trim();
18976
- let option = Array.from(element.options).find((opt) => opt.text.trim() === text);
18977
- if (!option) {
18978
- option = Array.from(element.options).find((opt) => opt.value.trim() === text);
18979
- }
18980
- if (!option) {
18981
- return {
18982
- success: false,
18983
- error: "Select Option not found",
18984
- availableOptions: Array.from(element.options).map((o) => o.text.trim()),
18985
- };
18986
- }
18987
- element.value = option.value;
18988
- element.dispatchEvent(new Event("change"));
18989
- return {
18990
- success: true,
18991
- selectedValue: option.value,
18992
- selectedText: option.text.trim(),
18993
- };
18994
- }
18995
18720
 
18996
18721
  class BaseBrowserScreenAgent extends BaseBrowserAgent {
18997
18722
  constructor(llms, ext_tools, mcpClient) {
@@ -19013,7 +18738,7 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
19013
18738
  tools: _tools_,
19014
18739
  llms: llms,
19015
18740
  mcpClient: mcpClient,
19016
- planDescription: "Browser operation agent, interact with the browser using the mouse and keyboard.",
18741
+ planDescription: "Browser operation agent, interact with the browser using the mouse and keyboard."
19017
18742
  });
19018
18743
  let init_tools = this.buildInitTools();
19019
18744
  if (ext_tools && ext_tools.length > 0) {
@@ -19141,27 +18866,20 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
19141
18866
  properties: {
19142
18867
  amount: {
19143
18868
  type: "number",
19144
- description: "Scroll amount (up / down)",
19145
- minimum: 1,
18869
+ description: "Scroll amount (positive for up, negative for down)",
18870
+ minimum: -10,
19146
18871
  maximum: 10,
19147
18872
  },
19148
- direction: {
19149
- type: "string",
19150
- enum: ["up", "down"],
19151
- },
19152
18873
  },
19153
- required: ["amount", "direction"],
18874
+ required: ["amount"],
19154
18875
  },
19155
18876
  execute: async (args, agentContext) => {
19156
- return await this.callInnerTool(async () => {
19157
- let amount = args.amount;
19158
- await this.scroll(agentContext, args.direction == "up" ? -amount : amount);
19159
- });
18877
+ return await this.callInnerTool(() => this.scroll(agentContext, args.amount));
19160
18878
  },
19161
18879
  },
19162
18880
  {
19163
18881
  name: "extract_content",
19164
- description: "Extract the text content of the current webpage, obtain webpage data through this tool.",
18882
+ description: "Extract the text content of the current webpage.",
19165
18883
  parameters: {
19166
18884
  type: "object",
19167
18885
  properties: {},
@@ -19217,64 +18935,31 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
19217
18935
  return await this.callInnerTool(() => this.drag_and_drop(agentContext, args.x1, args.y1, args.x2, args.y2));
19218
18936
  },
19219
18937
  },
19220
- {
19221
- name: "get_all_tabs",
19222
- description: "Get all tabs of the current browser",
19223
- parameters: {
19224
- type: "object",
19225
- properties: {},
19226
- },
19227
- execute: async (args, agentContext) => {
19228
- return await this.callInnerTool(() => this.get_all_tabs(agentContext));
19229
- },
19230
- },
19231
- {
19232
- name: "switch_tab",
19233
- description: "Switch to the specified tab page",
19234
- parameters: {
19235
- type: "object",
19236
- properties: {
19237
- tabId: {
19238
- type: "number",
19239
- description: "Tab ID, obtained through get_all_tabs",
19240
- },
19241
- },
19242
- required: ["tabId"],
19243
- },
19244
- execute: async (args, agentContext) => {
19245
- return await this.callInnerTool(() => this.switch_tab(agentContext, args.tabId));
19246
- },
19247
- },
19248
18938
  {
19249
18939
  name: "wait",
19250
- noPlan: true,
19251
18940
  description: "Wait for specified duration",
19252
18941
  parameters: {
19253
18942
  type: "object",
19254
18943
  properties: {
19255
18944
  duration: {
19256
18945
  type: "number",
19257
- description: "Duration in millisecond",
19258
- default: 500,
19259
- minimum: 200,
19260
- maximum: 2000,
18946
+ description: "Duration in seconds",
18947
+ default: 0.5,
19261
18948
  },
19262
18949
  },
19263
18950
  required: ["duration"],
19264
18951
  },
19265
18952
  execute: async (args, agentContext) => {
19266
- return await this.callInnerTool(() => sleep((args.duration || 200)));
18953
+ return await this.callInnerTool(() => sleep((args.duration || 0.5) * 1000));
19267
18954
  },
19268
18955
  },
19269
18956
  ];
19270
18957
  }
19271
18958
  async handleMessages(agentContext, messages) {
19272
- let lastTool = this.lastToolResult(messages);
19273
- if (lastTool &&
19274
- lastTool.toolName !== "extract_content" &&
19275
- lastTool.toolName !== "get_all_tabs" &&
19276
- lastTool.toolName !== "variable_storage") {
19277
- await sleep(300);
18959
+ let lastMessage = messages[messages.length - 1];
18960
+ if (lastMessage.role == "tool" &&
18961
+ lastMessage.content.filter((t) => t.type == "tool-result").length > 0) {
18962
+ await sleep(200);
19278
18963
  let result = await this.screenshot(agentContext);
19279
18964
  let image = toImage(result.imageBase64);
19280
18965
  messages.push({