@jarvis-agent/core 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.esm.js CHANGED
@@ -1,4 +1,4 @@
1
- const config$1 = {
1
+ const defaultConfig = {
2
2
  name: "Eko",
3
3
  platform: "mac",
4
4
  maxReactNum: 500,
@@ -14,7 +14,18 @@ const config$1 = {
14
14
  parallelToolCalls: true,
15
15
  expertMode: false,
16
16
  expertModeTodoLoopNum: 10,
17
+ streamFirstTimeout: 30000,
18
+ streamTokenTimeout: 180000,
17
19
  };
20
+ let config$1 = { ...defaultConfig };
21
+ function mergeGlobalConfig(userConfig) {
22
+ if (userConfig) {
23
+ config$1 = { ...defaultConfig, ...userConfig };
24
+ }
25
+ else {
26
+ config$1 = { ...defaultConfig };
27
+ }
28
+ }
18
29
 
19
30
  var LogLevel;
20
31
  (function (LogLevel) {
@@ -23926,6 +23937,7 @@ class Context {
23926
23937
  this.chain = chain;
23927
23938
  this.variables = new Map();
23928
23939
  this.controller = new AbortController();
23940
+ mergeGlobalConfig(config.globalConfig);
23929
23941
  }
23930
23942
  async checkAborted(noCheckPause) {
23931
23943
  if (this.controller.signal.aborted) {
@@ -29806,8 +29818,7 @@ async function compressAgentMessages(agentContext, messages, tools) {
29806
29818
  }
29807
29819
  async function doCompressAgentMessages(agentContext, messages, tools) {
29808
29820
  const ekoConfig = agentContext.context.config;
29809
- const rlm = new RetryLanguageModel(ekoConfig.llms, ekoConfig.compressLlms);
29810
- rlm.setContext(agentContext);
29821
+ const rlm = new RetryLanguageModel(ekoConfig.llms, ekoConfig.compressLlms, ekoConfig.globalConfig?.streamFirstTimeout, ekoConfig.globalConfig?.streamTokenTimeout, agentContext);
29811
29822
  // extract used tool
29812
29823
  const usedTools = extractUsedTool(messages, tools);
29813
29824
  const snapshotTool = new TaskSnapshotTool();
@@ -31225,8 +31236,7 @@ class Planner {
31225
31236
  }
31226
31237
  async doPlan(taskPrompt, messages, saveHistory, retryNum = 0) {
31227
31238
  const config = this.context.config;
31228
- const rlm = new RetryLanguageModel(config.llms, config.planLlms);
31229
- rlm.setContext(this.context);
31239
+ const rlm = new RetryLanguageModel(config.llms, config.planLlms, config.globalConfig?.streamFirstTimeout, config.globalConfig?.streamTokenTimeout, this.context);
31230
31240
  const request = {
31231
31241
  maxTokens: 8192,
31232
31242
  temperature: 0.7,
@@ -31683,8 +31693,7 @@ async function checkTaskReplan(agentContext) {
31683
31693
  if (!chain.planRequest || !chain.planResult) {
31684
31694
  return false;
31685
31695
  }
31686
- const rlm = new RetryLanguageModel(context.config.llms, context.config.planLlms);
31687
- rlm.setContext(agentContext);
31696
+ const rlm = new RetryLanguageModel(context.config.llms, context.config.planLlms, context.config.globalConfig?.streamFirstTimeout, context.config.globalConfig?.streamTokenTimeout, agentContext);
31688
31697
  const agentExecution = getAgentExecutionPrompt(agentContext);
31689
31698
  const prompt = `# Task Execution Status
31690
31699
  ${agentExecution}
@@ -32719,7 +32728,7 @@ class EkoDialogue {
32719
32728
  params.messageId = params.messageId ?? this.memory.genMessageId();
32720
32729
  await this.addUserMessage(params.user, params.messageId);
32721
32730
  }
32722
- const rlm = new RetryLanguageModel(this.config.llms, this.config.chatLlms);
32731
+ const rlm = new RetryLanguageModel(this.config.llms, this.config.chatLlms, this.config.globalConfig?.streamFirstTimeout, this.config.globalConfig?.streamTokenTimeout);
32723
32732
  for (let i = 0; i < 15; i++) {
32724
32733
  const messages = this.memory.buildMessages();
32725
32734
  const chatTools = [...this.buildInnerTools(params), ...this.tools];
@@ -33485,8 +33494,7 @@ request_help: Request assistance from the user; for instance, when an operation
33485
33494
  }
33486
33495
  try {
33487
33496
  let imageResult = (await screenshot.call(agentContext.agent, agentContext));
33488
- let rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms);
33489
- rlm.setContext(agentContext);
33497
+ let rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms, agentContext.context.config.globalConfig?.streamFirstTimeout, agentContext.context.config.globalConfig?.streamTokenTimeout, agentContext);
33490
33498
  let image = toImage(imageResult.imageBase64);
33491
33499
  let request = {
33492
33500
  messages: [
@@ -33734,8 +33742,7 @@ class WatchTriggerTool {
33734
33742
  const start = new Date().getTime();
33735
33743
  const timeout = (args.timeout || 5) * 60000;
33736
33744
  const frequency = Math.max(500, (args.frequency || 1) * 1000);
33737
- const rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms);
33738
- rlm.setContext(agentContext);
33745
+ const rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms, agentContext.context.config.globalConfig?.streamFirstTimeout, agentContext.context.config.globalConfig?.streamTokenTimeout, agentContext);
33739
33746
  while (new Date().getTime() - start < timeout) {
33740
33747
  await agentContext.context.checkAborted();
33741
33748
  await new Promise((resolve) => setTimeout(resolve, frequency));
@@ -34226,25 +34233,31 @@ class Agent {
34226
34233
  this.description = params.description;
34227
34234
  this.tools = params.tools;
34228
34235
  this.llms = params.llms;
34229
- this.mcpClient = params.mcpClient;
34236
+ this.mcpClients = params.mcpClients || (params.mcpClient ? [params.mcpClient] : []);
34230
34237
  this.planDescription = params.planDescription;
34231
34238
  this.requestHandler = params.requestHandler;
34232
34239
  }
34233
34240
  async run(context, agentChain) {
34234
- const mcpClient = this.mcpClient || context.config.defaultMcpClient;
34241
+ const mcpClients = this.mcpClients.length > 0
34242
+ ? this.mcpClients
34243
+ : (context.config.defaultMcpClient ? [context.config.defaultMcpClient] : []);
34235
34244
  const agentContext = new AgentContext(context, this, agentChain);
34236
34245
  try {
34237
34246
  this.agentContext = agentContext;
34238
- mcpClient &&
34239
- !mcpClient.isConnected() &&
34240
- (await mcpClient.connect(context.controller.signal));
34241
- return await this.runWithContext(agentContext, mcpClient, config$1.maxReactNum);
34247
+ for (const client of mcpClients) {
34248
+ if (!client.isConnected()) {
34249
+ await client.connect(context.controller.signal);
34250
+ }
34251
+ }
34252
+ return await this.runWithContext(agentContext, mcpClients, config$1.maxReactNum);
34242
34253
  }
34243
34254
  finally {
34244
- mcpClient && (await mcpClient.close());
34255
+ for (const client of mcpClients) {
34256
+ await client.close();
34257
+ }
34245
34258
  }
34246
34259
  }
34247
- async runWithContext(agentContext, mcpClient, maxReactNum = 100, historyMessages = []) {
34260
+ async runWithContext(agentContext, mcpClients, maxReactNum = 100, historyMessages = []) {
34248
34261
  let loopNum = 0;
34249
34262
  let checkNum = 0;
34250
34263
  this.agentContext = agentContext;
@@ -34267,18 +34280,20 @@ class Agent {
34267
34280
  },
34268
34281
  ];
34269
34282
  agentContext.messages = messages;
34270
- const rlm = new RetryLanguageModel(context.config.llms, this.llms);
34271
- rlm.setContext(agentContext);
34283
+ const rlm = new RetryLanguageModel(context.config.llms, this.llms, context.config.globalConfig?.streamFirstTimeout, context.config.globalConfig?.streamTokenTimeout, agentContext);
34284
+ const resolvedMcpClients = Array.isArray(mcpClients)
34285
+ ? mcpClients
34286
+ : (mcpClients ? [mcpClients] : []);
34272
34287
  let agentTools = tools;
34273
34288
  while (loopNum < maxReactNum) {
34274
34289
  await context.checkAborted();
34275
- if (mcpClient) {
34290
+ if (resolvedMcpClients.length > 0) {
34276
34291
  const controlMcp = await this.controlMcpTools(agentContext, messages, loopNum);
34277
34292
  if (controlMcp.mcpTools) {
34278
- const mcpTools = await this.listTools(context, mcpClient, agentNode, controlMcp.mcpParams);
34293
+ const mcpTools = await this.listMcpTools(context, resolvedMcpClients, agentNode, controlMcp.mcpParams);
34279
34294
  const usedTools = extractUsedTool(messages, agentTools);
34280
- const _agentTools = mergeTools(tools, usedTools);
34281
- agentTools = mergeTools(_agentTools, mcpTools);
34295
+ const mergedTools = mergeTools(tools, usedTools);
34296
+ agentTools = mergeTools(mergedTools, mcpTools);
34282
34297
  }
34283
34298
  }
34284
34299
  await this.handleMessages(agentContext, messages, tools);
@@ -34435,33 +34450,32 @@ class Agent {
34435
34450
  async extSysPrompt(agentContext, tools) {
34436
34451
  return "";
34437
34452
  }
34438
- async listTools(context, mcpClient, agentNode, mcpParams) {
34439
- try {
34440
- if (!mcpClient.isConnected()) {
34441
- await mcpClient.connect(context.controller.signal);
34453
+ async listMcpTools(context, clients, agentNode, mcpParams) {
34454
+ const allTools = [];
34455
+ for (const client of clients) {
34456
+ try {
34457
+ if (!client.isConnected()) {
34458
+ await client.connect(context.controller.signal);
34459
+ }
34460
+ const list = await client.listTools({
34461
+ taskId: context.taskId,
34462
+ nodeId: agentNode?.id,
34463
+ environment: config$1.platform,
34464
+ agent_name: agentNode?.name || this.name,
34465
+ params: {},
34466
+ prompt: agentNode?.task || context.chain.taskPrompt,
34467
+ ...(mcpParams || {}),
34468
+ }, context.controller.signal);
34469
+ for (const toolSchema of list) {
34470
+ const execute = this.toolExecuter(client, toolSchema.name);
34471
+ allTools.push(new McpTool(new ToolWrapper(toolSchema, execute)));
34472
+ }
34473
+ }
34474
+ catch (e) {
34475
+ Log.error("Mcp listTools error", e);
34442
34476
  }
34443
- let list = await mcpClient.listTools({
34444
- taskId: context.taskId,
34445
- nodeId: agentNode?.id,
34446
- environment: config$1.platform,
34447
- agent_name: agentNode?.name || this.name,
34448
- params: {},
34449
- prompt: agentNode?.task || context.chain.taskPrompt,
34450
- ...(mcpParams || {}),
34451
- }, context.controller.signal);
34452
- let mcpTools = [];
34453
- for (let i = 0; i < list.length; i++) {
34454
- let toolSchema = list[i];
34455
- let execute = this.toolExecuter(mcpClient, toolSchema.name);
34456
- let toolWrapper = new ToolWrapper(toolSchema, execute);
34457
- mcpTools.push(new McpTool(toolWrapper));
34458
- }
34459
- return mcpTools;
34460
- }
34461
- catch (e) {
34462
- Log.error("Mcp listTools error", e);
34463
- return [];
34464
34477
  }
34478
+ return allTools;
34465
34479
  }
34466
34480
  async controlMcpTools(agentContext, messages, loopNum) {
34467
34481
  return {
@@ -34504,9 +34518,9 @@ class Agent {
34504
34518
  };
34505
34519
  }
34506
34520
  async loadTools(context) {
34507
- if (this.mcpClient) {
34508
- let mcpTools = await this.listTools(context, this.mcpClient);
34509
- if (mcpTools && mcpTools.length > 0) {
34521
+ if (this.mcpClients.length > 0) {
34522
+ const mcpTools = await this.listMcpTools(context, this.mcpClients);
34523
+ if (mcpTools.length > 0) {
34510
34524
  return mergeTools(this.tools, mcpTools);
34511
34525
  }
34512
34526
  }
@@ -34538,8 +34552,11 @@ class Agent {
34538
34552
  get PlanDescription() {
34539
34553
  return this.planDescription;
34540
34554
  }
34555
+ get McpClients() {
34556
+ return this.mcpClients;
34557
+ }
34541
34558
  get McpClient() {
34542
- return this.mcpClient;
34559
+ return this.mcpClients[0];
34543
34560
  }
34544
34561
  get AgentContext() {
34545
34562
  return this.agentContext;
@@ -34548,8 +34565,8 @@ class Agent {
34548
34565
 
34549
34566
  const AGENT_NAME$3 = "File";
34550
34567
  class BaseFileAgent extends Agent {
34551
- constructor(work_path, llms, ext_tools, mcpClient, planDescription) {
34552
- const _tools_ = [];
34568
+ constructor(work_path, llms, ext_tools, mcpClients, planDescription) {
34569
+ const initTools = [];
34553
34570
  const prompt = work_path
34554
34571
  ? `Your working directory is: ${work_path}
34555
34572
  - When viewing file lists and outputting file paths, always include the working directory
@@ -34562,9 +34579,9 @@ class BaseFileAgent extends Agent {
34562
34579
  super({
34563
34580
  name: AGENT_NAME$3,
34564
34581
  description: `You are a file agent, handling file-related tasks such as creating, finding, reading, modifying files, etc.${prompt}`,
34565
- tools: _tools_,
34582
+ tools: initTools,
34566
34583
  llms: llms,
34567
- mcpClient: mcpClient,
34584
+ mcpClients: Array.isArray(mcpClients) ? mcpClients : (mcpClients ? [mcpClients] : []),
34568
34585
  planDescription: planDescription ||
34569
34586
  `File operation agent, handles file-related tasks such as creating, finding, reading, modifying files, etc. Only supports text file output
34570
34587
  - Output file names must be in English
@@ -34572,11 +34589,11 @@ class BaseFileAgent extends Agent {
34572
34589
  - For data-related content, combine with visualization tools for display
34573
34590
  - For visualizations, generate charts first before page generation to minimize repetitive work`,
34574
34591
  });
34575
- let init_tools = this.buildInitTools();
34592
+ let builtTools = this.buildInitTools();
34576
34593
  if (ext_tools && ext_tools.length > 0) {
34577
- init_tools = mergeTools(init_tools, ext_tools);
34594
+ builtTools = mergeTools(builtTools, ext_tools);
34578
34595
  }
34579
- init_tools.forEach((tool) => _tools_.push(tool));
34596
+ builtTools.forEach((tool) => initTools.push(tool));
34580
34597
  }
34581
34598
  async do_file_read(agentContext, path, write_variable) {
34582
34599
  let file_context = await this.file_read(agentContext, path);
@@ -34731,24 +34748,24 @@ class BaseFileAgent extends Agent {
34731
34748
 
34732
34749
  const AGENT_NAME$2 = "Shell";
34733
34750
  class BaseShellAgent extends Agent {
34734
- constructor(llms, ext_tools, mcpClient, planDescription) {
34735
- const _tools_ = [];
34751
+ constructor(llms, ext_tools, mcpClients, planDescription) {
34752
+ const initTools = [];
34736
34753
  super({
34737
34754
  name: AGENT_NAME$2,
34738
34755
  description: `Run commands in a bash shell,
34739
34756
  * You must first call create_session to create a new session when using it for the first time.
34740
34757
  * Please execute delete commands with caution, and never perform dangerous operations like \`rm -rf /\`.
34741
34758
  * Please avoid commands that may produce a very large amount of output.`,
34742
- tools: _tools_,
34759
+ tools: initTools,
34743
34760
  llms: llms,
34744
- mcpClient: mcpClient,
34761
+ mcpClients: Array.isArray(mcpClients) ? mcpClients : (mcpClients ? [mcpClients] : []),
34745
34762
  planDescription: planDescription || "Shell command agent, use to execute shell commands.",
34746
34763
  });
34747
- let init_tools = this.buildInitTools();
34764
+ let builtTools = this.buildInitTools();
34748
34765
  if (ext_tools && ext_tools.length > 0) {
34749
- init_tools = mergeTools(init_tools, ext_tools);
34766
+ builtTools = mergeTools(builtTools, ext_tools);
34750
34767
  }
34751
- init_tools.forEach((tool) => _tools_.push(tool));
34768
+ builtTools.forEach((tool) => initTools.push(tool));
34752
34769
  }
34753
34770
  buildInitTools() {
34754
34771
  return [
@@ -34813,8 +34830,8 @@ class BaseShellAgent extends Agent {
34813
34830
 
34814
34831
  const AGENT_NAME$1 = "Computer";
34815
34832
  class BaseComputerAgent extends Agent {
34816
- constructor(llms, ext_tools, mcpClient, keyboardKeys) {
34817
- const _tools_ = [];
34833
+ constructor(llms, ext_tools, mcpClients, keyboardKeys) {
34834
+ const initTools = [];
34818
34835
  super({
34819
34836
  name: AGENT_NAME$1,
34820
34837
  description: `You are a computer operation agent, who interacts with the computer using mouse and keyboard, completing specified tasks step by step based on the given tasks and screenshots. After each of your operations, you will receive the latest computer screenshot to evaluate the task execution status.
@@ -34822,9 +34839,9 @@ This is a computer GUI interface, observe the execution through screenshots, and
34822
34839
  * COMPUTER OPERATIONS:
34823
34840
  - You can operate the application using shortcuts.
34824
34841
  - If stuck, try alternative approaches`,
34825
- tools: _tools_,
34842
+ tools: initTools,
34826
34843
  llms: llms,
34827
- mcpClient: mcpClient,
34844
+ mcpClients: Array.isArray(mcpClients) ? mcpClients : (mcpClients ? [mcpClients] : []),
34828
34845
  planDescription: "Computer operation agent, interact with the computer using the mouse and keyboard."
34829
34846
  });
34830
34847
  if (!keyboardKeys) {
@@ -34855,11 +34872,11 @@ This is a computer GUI interface, observe the execution through screenshots, and
34855
34872
  ];
34856
34873
  }
34857
34874
  }
34858
- let init_tools = this.buildInitTools(keyboardKeys);
34875
+ let builtTools = this.buildInitTools(keyboardKeys);
34859
34876
  if (ext_tools && ext_tools.length > 0) {
34860
- init_tools = mergeTools(init_tools, ext_tools);
34877
+ builtTools = mergeTools(builtTools, ext_tools);
34861
34878
  }
34862
- init_tools.forEach((tool) => _tools_.push(tool));
34879
+ builtTools.forEach((tool) => initTools.push(tool));
34863
34880
  }
34864
34881
  buildInitTools(keyboardKeys) {
34865
34882
  return [
@@ -35986,7 +36003,7 @@ function run_build_dom_tree() {
35986
36003
  }
35987
36004
 
35988
36005
  class BaseBrowserLabelsAgent extends BaseBrowserAgent {
35989
- constructor(llms, ext_tools, mcpClient) {
36006
+ constructor(llms, ext_tools, mcpClients) {
35990
36007
  let description = `You are a browser operation agent, use structured commands to interact with the browser.
35991
36008
  * This is a browser GUI interface where you need to analyze webpages by taking screenshot and page element structures, and specify action sequences to complete designated tasks.
35992
36009
  * For your first visit, please start by calling either the \`navigate_to\` or \`current_page\` tool. After each action you perform, I will provide you with updated information about the current state, including page screenshots and structured element data that has been specially processed for easier analysis.
@@ -36021,20 +36038,20 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
36021
36038
  - When filling out a form, fields that are not dependent on each other should be filled simultaneously
36022
36039
  - Avoid parallel processing for dependent operations, such as those that need to wait for page loading, DOM changes, redirects, subsequent operations that depend on the results of previous operations, or operations that may interfere with each other and affect the same page elements. In these cases, please do not use parallelization.`;
36023
36040
  }
36024
- const _tools_ = [];
36041
+ const initTools = [];
36025
36042
  super({
36026
36043
  name: AGENT_NAME,
36027
36044
  description: description,
36028
- tools: _tools_,
36045
+ tools: initTools,
36029
36046
  llms: llms,
36030
- mcpClient: mcpClient,
36047
+ mcpClients: Array.isArray(mcpClients) ? mcpClients : (mcpClients ? [mcpClients] : []),
36031
36048
  planDescription: "Browser operation agent, interact with the browser using the mouse and keyboard.",
36032
36049
  });
36033
- let init_tools = this.buildInitTools();
36050
+ let builtTools = this.buildInitTools();
36034
36051
  if (ext_tools && ext_tools.length > 0) {
36035
- init_tools = mergeTools(init_tools, ext_tools);
36052
+ builtTools = mergeTools(builtTools, ext_tools);
36036
36053
  }
36037
- init_tools.forEach((tool) => _tools_.push(tool));
36054
+ builtTools.forEach((tool) => initTools.push(tool));
36038
36055
  }
36039
36056
  async input_text(agentContext, index, text, enter) {
36040
36057
  await this.execute_script(agentContext, typing, [{ index, text, enter }]);
@@ -36739,7 +36756,7 @@ function scroll_by(params) {
36739
36756
  }
36740
36757
 
36741
36758
  class BaseBrowserScreenAgent extends BaseBrowserAgent {
36742
- constructor(llms, ext_tools, mcpClient) {
36759
+ constructor(llms, ext_tools, mcpClients) {
36743
36760
  const description = `You are a browser operation agent, use a mouse and keyboard to interact with a browser.
36744
36761
  * This is a browser GUI interface, observe the webpage execution through screenshots, and specify action sequences to complete designated tasks.
36745
36762
  * For the first visit, please call the \`navigate_to\` or \`current_page\` tool first. After that, each of your actions will return a screenshot of the page.
@@ -36751,20 +36768,20 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
36751
36768
  - Wait for elements to load
36752
36769
  - Scroll pages and handle infinite scroll
36753
36770
  - YOU CAN DO ANYTHING ON THE BROWSER - including clicking on elements, filling forms, submitting data, etc.`;
36754
- const _tools_ = [];
36771
+ const initTools = [];
36755
36772
  super({
36756
36773
  name: AGENT_NAME,
36757
36774
  description: description,
36758
- tools: _tools_,
36775
+ tools: initTools,
36759
36776
  llms: llms,
36760
- mcpClient: mcpClient,
36777
+ mcpClients: Array.isArray(mcpClients) ? mcpClients : (mcpClients ? [mcpClients] : []),
36761
36778
  planDescription: "Browser operation agent, interact with the browser using the mouse and keyboard.",
36762
36779
  });
36763
- let init_tools = this.buildInitTools();
36780
+ let builtTools = this.buildInitTools();
36764
36781
  if (ext_tools && ext_tools.length > 0) {
36765
- init_tools = mergeTools(init_tools, ext_tools);
36782
+ builtTools = mergeTools(builtTools, ext_tools);
36766
36783
  }
36767
- init_tools.forEach((tool) => _tools_.push(tool));
36784
+ builtTools.forEach((tool) => initTools.push(tool));
36768
36785
  }
36769
36786
  buildInitTools() {
36770
36787
  return [