@jarvis-agent/core 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/base.d.ts +5 -3
- package/dist/agent/base.d.ts.map +1 -1
- package/dist/agent/browser/browser_labels.d.ts +1 -1
- package/dist/agent/browser/browser_labels.d.ts.map +1 -1
- package/dist/agent/browser/browser_screen.d.ts +1 -1
- package/dist/agent/browser/browser_screen.d.ts.map +1 -1
- package/dist/agent/computer.d.ts +1 -1
- package/dist/agent/computer.d.ts.map +1 -1
- package/dist/agent/file.d.ts +1 -1
- package/dist/agent/file.d.ts.map +1 -1
- package/dist/agent/shell.d.ts +1 -1
- package/dist/agent/shell.d.ts.map +1 -1
- package/dist/config/index.d.ts +5 -1
- package/dist/config/index.d.ts.map +1 -1
- package/dist/core/context.d.ts.map +1 -1
- package/dist/core/dialogue.d.ts.map +1 -1
- package/dist/core/plan.d.ts.map +1 -1
- package/dist/core/replan.d.ts.map +1 -1
- package/dist/index.cjs.js +141 -125
- package/dist/index.cjs.js.map +1 -1
- package/dist/index.esm.js +107 -90
- package/dist/index.esm.js.map +1 -1
- package/dist/memory/index.d.ts.map +1 -1
- package/dist/tools/human_interact.d.ts.map +1 -1
- package/dist/tools/watch_trigger.d.ts.map +1 -1
- package/dist/types/core.types.d.ts +19 -0
- package/dist/types/core.types.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/index.esm.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const
|
|
1
|
+
const defaultConfig = {
|
|
2
2
|
name: "Eko",
|
|
3
3
|
platform: "mac",
|
|
4
4
|
maxReactNum: 500,
|
|
@@ -14,7 +14,18 @@ const config$1 = {
|
|
|
14
14
|
parallelToolCalls: true,
|
|
15
15
|
expertMode: false,
|
|
16
16
|
expertModeTodoLoopNum: 10,
|
|
17
|
+
streamFirstTimeout: 30000,
|
|
18
|
+
streamTokenTimeout: 180000,
|
|
17
19
|
};
|
|
20
|
+
let config$1 = { ...defaultConfig };
|
|
21
|
+
function mergeGlobalConfig(userConfig) {
|
|
22
|
+
if (userConfig) {
|
|
23
|
+
config$1 = { ...defaultConfig, ...userConfig };
|
|
24
|
+
}
|
|
25
|
+
else {
|
|
26
|
+
config$1 = { ...defaultConfig };
|
|
27
|
+
}
|
|
28
|
+
}
|
|
18
29
|
|
|
19
30
|
var LogLevel;
|
|
20
31
|
(function (LogLevel) {
|
|
@@ -23926,6 +23937,7 @@ class Context {
|
|
|
23926
23937
|
this.chain = chain;
|
|
23927
23938
|
this.variables = new Map();
|
|
23928
23939
|
this.controller = new AbortController();
|
|
23940
|
+
mergeGlobalConfig(config.globalConfig);
|
|
23929
23941
|
}
|
|
23930
23942
|
async checkAborted(noCheckPause) {
|
|
23931
23943
|
if (this.controller.signal.aborted) {
|
|
@@ -29806,8 +29818,7 @@ async function compressAgentMessages(agentContext, messages, tools) {
|
|
|
29806
29818
|
}
|
|
29807
29819
|
async function doCompressAgentMessages(agentContext, messages, tools) {
|
|
29808
29820
|
const ekoConfig = agentContext.context.config;
|
|
29809
|
-
const rlm = new RetryLanguageModel(ekoConfig.llms, ekoConfig.compressLlms);
|
|
29810
|
-
rlm.setContext(agentContext);
|
|
29821
|
+
const rlm = new RetryLanguageModel(ekoConfig.llms, ekoConfig.compressLlms, ekoConfig.globalConfig?.streamFirstTimeout, ekoConfig.globalConfig?.streamTokenTimeout, agentContext);
|
|
29811
29822
|
// extract used tool
|
|
29812
29823
|
const usedTools = extractUsedTool(messages, tools);
|
|
29813
29824
|
const snapshotTool = new TaskSnapshotTool();
|
|
@@ -31225,8 +31236,7 @@ class Planner {
|
|
|
31225
31236
|
}
|
|
31226
31237
|
async doPlan(taskPrompt, messages, saveHistory, retryNum = 0) {
|
|
31227
31238
|
const config = this.context.config;
|
|
31228
|
-
const rlm = new RetryLanguageModel(config.llms, config.planLlms);
|
|
31229
|
-
rlm.setContext(this.context);
|
|
31239
|
+
const rlm = new RetryLanguageModel(config.llms, config.planLlms, config.globalConfig?.streamFirstTimeout, config.globalConfig?.streamTokenTimeout, this.context);
|
|
31230
31240
|
const request = {
|
|
31231
31241
|
maxTokens: 8192,
|
|
31232
31242
|
temperature: 0.7,
|
|
@@ -31683,8 +31693,7 @@ async function checkTaskReplan(agentContext) {
|
|
|
31683
31693
|
if (!chain.planRequest || !chain.planResult) {
|
|
31684
31694
|
return false;
|
|
31685
31695
|
}
|
|
31686
|
-
const rlm = new RetryLanguageModel(context.config.llms, context.config.planLlms);
|
|
31687
|
-
rlm.setContext(agentContext);
|
|
31696
|
+
const rlm = new RetryLanguageModel(context.config.llms, context.config.planLlms, context.config.globalConfig?.streamFirstTimeout, context.config.globalConfig?.streamTokenTimeout, agentContext);
|
|
31688
31697
|
const agentExecution = getAgentExecutionPrompt(agentContext);
|
|
31689
31698
|
const prompt = `# Task Execution Status
|
|
31690
31699
|
${agentExecution}
|
|
@@ -32719,7 +32728,7 @@ class EkoDialogue {
|
|
|
32719
32728
|
params.messageId = params.messageId ?? this.memory.genMessageId();
|
|
32720
32729
|
await this.addUserMessage(params.user, params.messageId);
|
|
32721
32730
|
}
|
|
32722
|
-
const rlm = new RetryLanguageModel(this.config.llms, this.config.chatLlms);
|
|
32731
|
+
const rlm = new RetryLanguageModel(this.config.llms, this.config.chatLlms, this.config.globalConfig?.streamFirstTimeout, this.config.globalConfig?.streamTokenTimeout);
|
|
32723
32732
|
for (let i = 0; i < 15; i++) {
|
|
32724
32733
|
const messages = this.memory.buildMessages();
|
|
32725
32734
|
const chatTools = [...this.buildInnerTools(params), ...this.tools];
|
|
@@ -33485,8 +33494,7 @@ request_help: Request assistance from the user; for instance, when an operation
|
|
|
33485
33494
|
}
|
|
33486
33495
|
try {
|
|
33487
33496
|
let imageResult = (await screenshot.call(agentContext.agent, agentContext));
|
|
33488
|
-
let rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms);
|
|
33489
|
-
rlm.setContext(agentContext);
|
|
33497
|
+
let rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms, agentContext.context.config.globalConfig?.streamFirstTimeout, agentContext.context.config.globalConfig?.streamTokenTimeout, agentContext);
|
|
33490
33498
|
let image = toImage(imageResult.imageBase64);
|
|
33491
33499
|
let request = {
|
|
33492
33500
|
messages: [
|
|
@@ -33734,8 +33742,7 @@ class WatchTriggerTool {
|
|
|
33734
33742
|
const start = new Date().getTime();
|
|
33735
33743
|
const timeout = (args.timeout || 5) * 60000;
|
|
33736
33744
|
const frequency = Math.max(500, (args.frequency || 1) * 1000);
|
|
33737
|
-
const rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms);
|
|
33738
|
-
rlm.setContext(agentContext);
|
|
33745
|
+
const rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms, agentContext.context.config.globalConfig?.streamFirstTimeout, agentContext.context.config.globalConfig?.streamTokenTimeout, agentContext);
|
|
33739
33746
|
while (new Date().getTime() - start < timeout) {
|
|
33740
33747
|
await agentContext.context.checkAborted();
|
|
33741
33748
|
await new Promise((resolve) => setTimeout(resolve, frequency));
|
|
@@ -34226,25 +34233,31 @@ class Agent {
|
|
|
34226
34233
|
this.description = params.description;
|
|
34227
34234
|
this.tools = params.tools;
|
|
34228
34235
|
this.llms = params.llms;
|
|
34229
|
-
this.
|
|
34236
|
+
this.mcpClients = params.mcpClients || (params.mcpClient ? [params.mcpClient] : []);
|
|
34230
34237
|
this.planDescription = params.planDescription;
|
|
34231
34238
|
this.requestHandler = params.requestHandler;
|
|
34232
34239
|
}
|
|
34233
34240
|
async run(context, agentChain) {
|
|
34234
|
-
const
|
|
34241
|
+
const mcpClients = this.mcpClients.length > 0
|
|
34242
|
+
? this.mcpClients
|
|
34243
|
+
: (context.config.defaultMcpClient ? [context.config.defaultMcpClient] : []);
|
|
34235
34244
|
const agentContext = new AgentContext(context, this, agentChain);
|
|
34236
34245
|
try {
|
|
34237
34246
|
this.agentContext = agentContext;
|
|
34238
|
-
|
|
34239
|
-
!
|
|
34240
|
-
|
|
34241
|
-
|
|
34247
|
+
for (const client of mcpClients) {
|
|
34248
|
+
if (!client.isConnected()) {
|
|
34249
|
+
await client.connect(context.controller.signal);
|
|
34250
|
+
}
|
|
34251
|
+
}
|
|
34252
|
+
return await this.runWithContext(agentContext, mcpClients, config$1.maxReactNum);
|
|
34242
34253
|
}
|
|
34243
34254
|
finally {
|
|
34244
|
-
|
|
34255
|
+
for (const client of mcpClients) {
|
|
34256
|
+
await client.close();
|
|
34257
|
+
}
|
|
34245
34258
|
}
|
|
34246
34259
|
}
|
|
34247
|
-
async runWithContext(agentContext,
|
|
34260
|
+
async runWithContext(agentContext, mcpClients, maxReactNum = 100, historyMessages = []) {
|
|
34248
34261
|
let loopNum = 0;
|
|
34249
34262
|
let checkNum = 0;
|
|
34250
34263
|
this.agentContext = agentContext;
|
|
@@ -34267,18 +34280,20 @@ class Agent {
|
|
|
34267
34280
|
},
|
|
34268
34281
|
];
|
|
34269
34282
|
agentContext.messages = messages;
|
|
34270
|
-
const rlm = new RetryLanguageModel(context.config.llms, this.llms);
|
|
34271
|
-
|
|
34283
|
+
const rlm = new RetryLanguageModel(context.config.llms, this.llms, context.config.globalConfig?.streamFirstTimeout, context.config.globalConfig?.streamTokenTimeout, agentContext);
|
|
34284
|
+
const resolvedMcpClients = Array.isArray(mcpClients)
|
|
34285
|
+
? mcpClients
|
|
34286
|
+
: (mcpClients ? [mcpClients] : []);
|
|
34272
34287
|
let agentTools = tools;
|
|
34273
34288
|
while (loopNum < maxReactNum) {
|
|
34274
34289
|
await context.checkAborted();
|
|
34275
|
-
if (
|
|
34290
|
+
if (resolvedMcpClients.length > 0) {
|
|
34276
34291
|
const controlMcp = await this.controlMcpTools(agentContext, messages, loopNum);
|
|
34277
34292
|
if (controlMcp.mcpTools) {
|
|
34278
|
-
const mcpTools = await this.
|
|
34293
|
+
const mcpTools = await this.listMcpTools(context, resolvedMcpClients, agentNode, controlMcp.mcpParams);
|
|
34279
34294
|
const usedTools = extractUsedTool(messages, agentTools);
|
|
34280
|
-
const
|
|
34281
|
-
agentTools = mergeTools(
|
|
34295
|
+
const mergedTools = mergeTools(tools, usedTools);
|
|
34296
|
+
agentTools = mergeTools(mergedTools, mcpTools);
|
|
34282
34297
|
}
|
|
34283
34298
|
}
|
|
34284
34299
|
await this.handleMessages(agentContext, messages, tools);
|
|
@@ -34435,33 +34450,32 @@ class Agent {
|
|
|
34435
34450
|
async extSysPrompt(agentContext, tools) {
|
|
34436
34451
|
return "";
|
|
34437
34452
|
}
|
|
34438
|
-
async
|
|
34439
|
-
|
|
34440
|
-
|
|
34441
|
-
|
|
34453
|
+
async listMcpTools(context, clients, agentNode, mcpParams) {
|
|
34454
|
+
const allTools = [];
|
|
34455
|
+
for (const client of clients) {
|
|
34456
|
+
try {
|
|
34457
|
+
if (!client.isConnected()) {
|
|
34458
|
+
await client.connect(context.controller.signal);
|
|
34459
|
+
}
|
|
34460
|
+
const list = await client.listTools({
|
|
34461
|
+
taskId: context.taskId,
|
|
34462
|
+
nodeId: agentNode?.id,
|
|
34463
|
+
environment: config$1.platform,
|
|
34464
|
+
agent_name: agentNode?.name || this.name,
|
|
34465
|
+
params: {},
|
|
34466
|
+
prompt: agentNode?.task || context.chain.taskPrompt,
|
|
34467
|
+
...(mcpParams || {}),
|
|
34468
|
+
}, context.controller.signal);
|
|
34469
|
+
for (const toolSchema of list) {
|
|
34470
|
+
const execute = this.toolExecuter(client, toolSchema.name);
|
|
34471
|
+
allTools.push(new McpTool(new ToolWrapper(toolSchema, execute)));
|
|
34472
|
+
}
|
|
34473
|
+
}
|
|
34474
|
+
catch (e) {
|
|
34475
|
+
Log.error("Mcp listTools error", e);
|
|
34442
34476
|
}
|
|
34443
|
-
let list = await mcpClient.listTools({
|
|
34444
|
-
taskId: context.taskId,
|
|
34445
|
-
nodeId: agentNode?.id,
|
|
34446
|
-
environment: config$1.platform,
|
|
34447
|
-
agent_name: agentNode?.name || this.name,
|
|
34448
|
-
params: {},
|
|
34449
|
-
prompt: agentNode?.task || context.chain.taskPrompt,
|
|
34450
|
-
...(mcpParams || {}),
|
|
34451
|
-
}, context.controller.signal);
|
|
34452
|
-
let mcpTools = [];
|
|
34453
|
-
for (let i = 0; i < list.length; i++) {
|
|
34454
|
-
let toolSchema = list[i];
|
|
34455
|
-
let execute = this.toolExecuter(mcpClient, toolSchema.name);
|
|
34456
|
-
let toolWrapper = new ToolWrapper(toolSchema, execute);
|
|
34457
|
-
mcpTools.push(new McpTool(toolWrapper));
|
|
34458
|
-
}
|
|
34459
|
-
return mcpTools;
|
|
34460
|
-
}
|
|
34461
|
-
catch (e) {
|
|
34462
|
-
Log.error("Mcp listTools error", e);
|
|
34463
|
-
return [];
|
|
34464
34477
|
}
|
|
34478
|
+
return allTools;
|
|
34465
34479
|
}
|
|
34466
34480
|
async controlMcpTools(agentContext, messages, loopNum) {
|
|
34467
34481
|
return {
|
|
@@ -34504,9 +34518,9 @@ class Agent {
|
|
|
34504
34518
|
};
|
|
34505
34519
|
}
|
|
34506
34520
|
async loadTools(context) {
|
|
34507
|
-
if (this.
|
|
34508
|
-
|
|
34509
|
-
if (mcpTools
|
|
34521
|
+
if (this.mcpClients.length > 0) {
|
|
34522
|
+
const mcpTools = await this.listMcpTools(context, this.mcpClients);
|
|
34523
|
+
if (mcpTools.length > 0) {
|
|
34510
34524
|
return mergeTools(this.tools, mcpTools);
|
|
34511
34525
|
}
|
|
34512
34526
|
}
|
|
@@ -34538,8 +34552,11 @@ class Agent {
|
|
|
34538
34552
|
get PlanDescription() {
|
|
34539
34553
|
return this.planDescription;
|
|
34540
34554
|
}
|
|
34555
|
+
get McpClients() {
|
|
34556
|
+
return this.mcpClients;
|
|
34557
|
+
}
|
|
34541
34558
|
get McpClient() {
|
|
34542
|
-
return this.
|
|
34559
|
+
return this.mcpClients[0];
|
|
34543
34560
|
}
|
|
34544
34561
|
get AgentContext() {
|
|
34545
34562
|
return this.agentContext;
|
|
@@ -34548,8 +34565,8 @@ class Agent {
|
|
|
34548
34565
|
|
|
34549
34566
|
const AGENT_NAME$3 = "File";
|
|
34550
34567
|
class BaseFileAgent extends Agent {
|
|
34551
|
-
constructor(work_path, llms, ext_tools,
|
|
34552
|
-
const
|
|
34568
|
+
constructor(work_path, llms, ext_tools, mcpClients, planDescription) {
|
|
34569
|
+
const initTools = [];
|
|
34553
34570
|
const prompt = work_path
|
|
34554
34571
|
? `Your working directory is: ${work_path}
|
|
34555
34572
|
- When viewing file lists and outputting file paths, always include the working directory
|
|
@@ -34562,9 +34579,9 @@ class BaseFileAgent extends Agent {
|
|
|
34562
34579
|
super({
|
|
34563
34580
|
name: AGENT_NAME$3,
|
|
34564
34581
|
description: `You are a file agent, handling file-related tasks such as creating, finding, reading, modifying files, etc.${prompt}`,
|
|
34565
|
-
tools:
|
|
34582
|
+
tools: initTools,
|
|
34566
34583
|
llms: llms,
|
|
34567
|
-
|
|
34584
|
+
mcpClients: Array.isArray(mcpClients) ? mcpClients : (mcpClients ? [mcpClients] : []),
|
|
34568
34585
|
planDescription: planDescription ||
|
|
34569
34586
|
`File operation agent, handles file-related tasks such as creating, finding, reading, modifying files, etc. Only supports text file output
|
|
34570
34587
|
- Output file names must be in English
|
|
@@ -34572,11 +34589,11 @@ class BaseFileAgent extends Agent {
|
|
|
34572
34589
|
- For data-related content, combine with visualization tools for display
|
|
34573
34590
|
- For visualizations, generate charts first before page generation to minimize repetitive work`,
|
|
34574
34591
|
});
|
|
34575
|
-
let
|
|
34592
|
+
let builtTools = this.buildInitTools();
|
|
34576
34593
|
if (ext_tools && ext_tools.length > 0) {
|
|
34577
|
-
|
|
34594
|
+
builtTools = mergeTools(builtTools, ext_tools);
|
|
34578
34595
|
}
|
|
34579
|
-
|
|
34596
|
+
builtTools.forEach((tool) => initTools.push(tool));
|
|
34580
34597
|
}
|
|
34581
34598
|
async do_file_read(agentContext, path, write_variable) {
|
|
34582
34599
|
let file_context = await this.file_read(agentContext, path);
|
|
@@ -34731,24 +34748,24 @@ class BaseFileAgent extends Agent {
|
|
|
34731
34748
|
|
|
34732
34749
|
const AGENT_NAME$2 = "Shell";
|
|
34733
34750
|
class BaseShellAgent extends Agent {
|
|
34734
|
-
constructor(llms, ext_tools,
|
|
34735
|
-
const
|
|
34751
|
+
constructor(llms, ext_tools, mcpClients, planDescription) {
|
|
34752
|
+
const initTools = [];
|
|
34736
34753
|
super({
|
|
34737
34754
|
name: AGENT_NAME$2,
|
|
34738
34755
|
description: `Run commands in a bash shell,
|
|
34739
34756
|
* You must first call create_session to create a new session when using it for the first time.
|
|
34740
34757
|
* Please execute delete commands with caution, and never perform dangerous operations like \`rm -rf /\`.
|
|
34741
34758
|
* Please avoid commands that may produce a very large amount of output.`,
|
|
34742
|
-
tools:
|
|
34759
|
+
tools: initTools,
|
|
34743
34760
|
llms: llms,
|
|
34744
|
-
|
|
34761
|
+
mcpClients: Array.isArray(mcpClients) ? mcpClients : (mcpClients ? [mcpClients] : []),
|
|
34745
34762
|
planDescription: planDescription || "Shell command agent, use to execute shell commands.",
|
|
34746
34763
|
});
|
|
34747
|
-
let
|
|
34764
|
+
let builtTools = this.buildInitTools();
|
|
34748
34765
|
if (ext_tools && ext_tools.length > 0) {
|
|
34749
|
-
|
|
34766
|
+
builtTools = mergeTools(builtTools, ext_tools);
|
|
34750
34767
|
}
|
|
34751
|
-
|
|
34768
|
+
builtTools.forEach((tool) => initTools.push(tool));
|
|
34752
34769
|
}
|
|
34753
34770
|
buildInitTools() {
|
|
34754
34771
|
return [
|
|
@@ -34813,8 +34830,8 @@ class BaseShellAgent extends Agent {
|
|
|
34813
34830
|
|
|
34814
34831
|
const AGENT_NAME$1 = "Computer";
|
|
34815
34832
|
class BaseComputerAgent extends Agent {
|
|
34816
|
-
constructor(llms, ext_tools,
|
|
34817
|
-
const
|
|
34833
|
+
constructor(llms, ext_tools, mcpClients, keyboardKeys) {
|
|
34834
|
+
const initTools = [];
|
|
34818
34835
|
super({
|
|
34819
34836
|
name: AGENT_NAME$1,
|
|
34820
34837
|
description: `You are a computer operation agent, who interacts with the computer using mouse and keyboard, completing specified tasks step by step based on the given tasks and screenshots. After each of your operations, you will receive the latest computer screenshot to evaluate the task execution status.
|
|
@@ -34822,9 +34839,9 @@ This is a computer GUI interface, observe the execution through screenshots, and
|
|
|
34822
34839
|
* COMPUTER OPERATIONS:
|
|
34823
34840
|
- You can operate the application using shortcuts.
|
|
34824
34841
|
- If stuck, try alternative approaches`,
|
|
34825
|
-
tools:
|
|
34842
|
+
tools: initTools,
|
|
34826
34843
|
llms: llms,
|
|
34827
|
-
|
|
34844
|
+
mcpClients: Array.isArray(mcpClients) ? mcpClients : (mcpClients ? [mcpClients] : []),
|
|
34828
34845
|
planDescription: "Computer operation agent, interact with the computer using the mouse and keyboard."
|
|
34829
34846
|
});
|
|
34830
34847
|
if (!keyboardKeys) {
|
|
@@ -34855,11 +34872,11 @@ This is a computer GUI interface, observe the execution through screenshots, and
|
|
|
34855
34872
|
];
|
|
34856
34873
|
}
|
|
34857
34874
|
}
|
|
34858
|
-
let
|
|
34875
|
+
let builtTools = this.buildInitTools(keyboardKeys);
|
|
34859
34876
|
if (ext_tools && ext_tools.length > 0) {
|
|
34860
|
-
|
|
34877
|
+
builtTools = mergeTools(builtTools, ext_tools);
|
|
34861
34878
|
}
|
|
34862
|
-
|
|
34879
|
+
builtTools.forEach((tool) => initTools.push(tool));
|
|
34863
34880
|
}
|
|
34864
34881
|
buildInitTools(keyboardKeys) {
|
|
34865
34882
|
return [
|
|
@@ -35986,7 +36003,7 @@ function run_build_dom_tree() {
|
|
|
35986
36003
|
}
|
|
35987
36004
|
|
|
35988
36005
|
class BaseBrowserLabelsAgent extends BaseBrowserAgent {
|
|
35989
|
-
constructor(llms, ext_tools,
|
|
36006
|
+
constructor(llms, ext_tools, mcpClients) {
|
|
35990
36007
|
let description = `You are a browser operation agent, use structured commands to interact with the browser.
|
|
35991
36008
|
* This is a browser GUI interface where you need to analyze webpages by taking screenshot and page element structures, and specify action sequences to complete designated tasks.
|
|
35992
36009
|
* For your first visit, please start by calling either the \`navigate_to\` or \`current_page\` tool. After each action you perform, I will provide you with updated information about the current state, including page screenshots and structured element data that has been specially processed for easier analysis.
|
|
@@ -36021,20 +36038,20 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
|
|
|
36021
36038
|
- When filling out a form, fields that are not dependent on each other should be filled simultaneously
|
|
36022
36039
|
- Avoid parallel processing for dependent operations, such as those that need to wait for page loading, DOM changes, redirects, subsequent operations that depend on the results of previous operations, or operations that may interfere with each other and affect the same page elements. In these cases, please do not use parallelization.`;
|
|
36023
36040
|
}
|
|
36024
|
-
const
|
|
36041
|
+
const initTools = [];
|
|
36025
36042
|
super({
|
|
36026
36043
|
name: AGENT_NAME,
|
|
36027
36044
|
description: description,
|
|
36028
|
-
tools:
|
|
36045
|
+
tools: initTools,
|
|
36029
36046
|
llms: llms,
|
|
36030
|
-
|
|
36047
|
+
mcpClients: Array.isArray(mcpClients) ? mcpClients : (mcpClients ? [mcpClients] : []),
|
|
36031
36048
|
planDescription: "Browser operation agent, interact with the browser using the mouse and keyboard.",
|
|
36032
36049
|
});
|
|
36033
|
-
let
|
|
36050
|
+
let builtTools = this.buildInitTools();
|
|
36034
36051
|
if (ext_tools && ext_tools.length > 0) {
|
|
36035
|
-
|
|
36052
|
+
builtTools = mergeTools(builtTools, ext_tools);
|
|
36036
36053
|
}
|
|
36037
|
-
|
|
36054
|
+
builtTools.forEach((tool) => initTools.push(tool));
|
|
36038
36055
|
}
|
|
36039
36056
|
async input_text(agentContext, index, text, enter) {
|
|
36040
36057
|
await this.execute_script(agentContext, typing, [{ index, text, enter }]);
|
|
@@ -36739,7 +36756,7 @@ function scroll_by(params) {
|
|
|
36739
36756
|
}
|
|
36740
36757
|
|
|
36741
36758
|
class BaseBrowserScreenAgent extends BaseBrowserAgent {
|
|
36742
|
-
constructor(llms, ext_tools,
|
|
36759
|
+
constructor(llms, ext_tools, mcpClients) {
|
|
36743
36760
|
const description = `You are a browser operation agent, use a mouse and keyboard to interact with a browser.
|
|
36744
36761
|
* This is a browser GUI interface, observe the webpage execution through screenshots, and specify action sequences to complete designated tasks.
|
|
36745
36762
|
* For the first visit, please call the \`navigate_to\` or \`current_page\` tool first. After that, each of your actions will return a screenshot of the page.
|
|
@@ -36751,20 +36768,20 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
|
|
|
36751
36768
|
- Wait for elements to load
|
|
36752
36769
|
- Scroll pages and handle infinite scroll
|
|
36753
36770
|
- YOU CAN DO ANYTHING ON THE BROWSER - including clicking on elements, filling forms, submitting data, etc.`;
|
|
36754
|
-
const
|
|
36771
|
+
const initTools = [];
|
|
36755
36772
|
super({
|
|
36756
36773
|
name: AGENT_NAME,
|
|
36757
36774
|
description: description,
|
|
36758
|
-
tools:
|
|
36775
|
+
tools: initTools,
|
|
36759
36776
|
llms: llms,
|
|
36760
|
-
|
|
36777
|
+
mcpClients: Array.isArray(mcpClients) ? mcpClients : (mcpClients ? [mcpClients] : []),
|
|
36761
36778
|
planDescription: "Browser operation agent, interact with the browser using the mouse and keyboard.",
|
|
36762
36779
|
});
|
|
36763
|
-
let
|
|
36780
|
+
let builtTools = this.buildInitTools();
|
|
36764
36781
|
if (ext_tools && ext_tools.length > 0) {
|
|
36765
|
-
|
|
36782
|
+
builtTools = mergeTools(builtTools, ext_tools);
|
|
36766
36783
|
}
|
|
36767
|
-
|
|
36784
|
+
builtTools.forEach((tool) => initTools.push(tool));
|
|
36768
36785
|
}
|
|
36769
36786
|
buildInitTools() {
|
|
36770
36787
|
return [
|