@eko-ai/eko 2.0.9 → 2.1.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs.js CHANGED
@@ -9,7 +9,7 @@ const config = {
9
9
  platform: "mac",
10
10
  maxReactNum: 200,
11
11
  maxTokens: 16000,
12
- compressThreshold: 60,
12
+ compressThreshold: 80,
13
13
  largeTextLength: 5000,
14
14
  shortTextLength: 800,
15
15
  };
@@ -67,7 +67,7 @@ class Logger {
67
67
  };
68
68
  let formattedMessage = '';
69
69
  if (this.dateFormat) {
70
- formattedMessage += `[${new Date().toISOString()}] `;
70
+ formattedMessage += `[${new Date().toLocaleString()}] `;
71
71
  }
72
72
  formattedMessage += `[${levelNames[level] || 'UNKNOWN'}] `;
73
73
  if (this.prefix) {
@@ -139,8 +139,238 @@ class Logger {
139
139
  }
140
140
  const Log = new Logger();
141
141
 
142
+ function sleep(time) {
143
+ return new Promise((resolve) => setTimeout(() => resolve(), time));
144
+ }
145
+ function uuidv4() {
146
+ return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) {
147
+ const r = (Math.random() * 16) | 0;
148
+ const v = c === "x" ? r : (r & 0x3) | 0x8;
149
+ return v.toString(16);
150
+ });
151
+ }
152
+ function call_timeout(fun, timeout, error_callback) {
153
+ return new Promise(async (resolve, reject) => {
154
+ let timer = setTimeout(() => {
155
+ reject(new Error("Timeout"));
156
+ error_callback && error_callback("Timeout");
157
+ }, timeout);
158
+ try {
159
+ const result = await fun();
160
+ clearTimeout(timer);
161
+ resolve(result);
162
+ }
163
+ catch (e) {
164
+ clearTimeout(timer);
165
+ reject(e);
166
+ error_callback && error_callback(e + "");
167
+ }
168
+ });
169
+ }
170
+ function convertToolSchema(tool) {
171
+ if ("function" in tool) {
172
+ return {
173
+ type: "function",
174
+ name: tool.function.name,
175
+ description: tool.function.description,
176
+ parameters: tool.function.parameters,
177
+ };
178
+ }
179
+ else if ("input_schema" in tool) {
180
+ return {
181
+ type: "function",
182
+ name: tool.name,
183
+ description: tool.description,
184
+ parameters: tool.input_schema,
185
+ };
186
+ }
187
+ else if ("inputSchema" in tool) {
188
+ return {
189
+ type: "function",
190
+ name: tool.name,
191
+ description: tool.description,
192
+ parameters: tool.inputSchema,
193
+ };
194
+ }
195
+ else {
196
+ return {
197
+ type: "function",
198
+ name: tool.name,
199
+ description: tool.description,
200
+ parameters: tool.parameters,
201
+ };
202
+ }
203
+ }
204
+ function toImage(imageData) {
205
+ let image = null;
206
+ if (imageData.startsWith("http://") || imageData.startsWith("https://")) {
207
+ image = new URL(imageData);
208
+ }
209
+ else {
210
+ if (imageData.startsWith("data:image/")) {
211
+ imageData = imageData.substring(imageData.indexOf(",") + 1);
212
+ }
213
+ // @ts-ignore
214
+ if (typeof Buffer != "undefined") {
215
+ // @ts-ignore
216
+ const buffer = Buffer.from(imageData, "base64");
217
+ image = new Uint8Array(buffer);
218
+ }
219
+ else {
220
+ const binaryString = atob(imageData);
221
+ image = new Uint8Array(binaryString.length);
222
+ for (let i = 0; i < binaryString.length; i++) {
223
+ image[i] = binaryString.charCodeAt(i);
224
+ }
225
+ }
226
+ }
227
+ return image;
228
+ }
229
+ function mergeTools(tools1, tools2) {
230
+ let tools = [];
231
+ let toolMap2 = tools2.reduce((map, tool) => {
232
+ map[tool.name] = tool;
233
+ return map;
234
+ }, {});
235
+ for (let i = 0; i < tools1.length; i++) {
236
+ let tool1 = tools1[i];
237
+ let tool2 = toolMap2[tool1.name];
238
+ if (tool2) {
239
+ tools.push(tool2);
240
+ delete toolMap2[tool1.name];
241
+ }
242
+ else {
243
+ tools.push(tool1);
244
+ }
245
+ }
246
+ for (let i = 0; i < tools2.length; i++) {
247
+ let tool2 = tools2[i];
248
+ if (toolMap2[tool2.name]) {
249
+ tools.push(tool2);
250
+ }
251
+ }
252
+ return tools;
253
+ }
254
+ function mergeAgents(agents1, agents2) {
255
+ let tools = [];
256
+ let toolMap2 = agents2.reduce((map, tool) => {
257
+ map[tool.Name] = tool;
258
+ return map;
259
+ }, {});
260
+ for (let i = 0; i < agents1.length; i++) {
261
+ let tool1 = agents1[i];
262
+ let tool2 = toolMap2[tool1.Name];
263
+ if (tool2) {
264
+ tools.push(tool2);
265
+ delete toolMap2[tool1.Name];
266
+ }
267
+ else {
268
+ tools.push(tool1);
269
+ }
270
+ }
271
+ for (let i = 0; i < agents2.length; i++) {
272
+ let tool2 = agents2[i];
273
+ if (toolMap2[tool2.Name]) {
274
+ tools.push(tool2);
275
+ }
276
+ }
277
+ return tools;
278
+ }
279
+ function sub(str, maxLength, appendPoint = true) {
280
+ if (!str) {
281
+ return "";
282
+ }
283
+ if (str.length > maxLength) {
284
+ return str.substring(0, maxLength) + (appendPoint ? "..." : "");
285
+ }
286
+ return str;
287
+ }
288
+ function fixXmlTag(code) {
289
+ function fixDoubleChar(code) {
290
+ const stack = [];
291
+ for (let i = 0; i < code.length; i++) {
292
+ let s = code[i];
293
+ if (s === "<") {
294
+ stack.push(">");
295
+ }
296
+ else if (s === ">") {
297
+ stack.pop();
298
+ }
299
+ else if (s === '"') {
300
+ if (stack[stack.length - 1] === '"') {
301
+ stack.pop();
302
+ }
303
+ else {
304
+ stack.push('"');
305
+ }
306
+ }
307
+ }
308
+ const missingParts = [];
309
+ while (stack.length > 0) {
310
+ missingParts.push(stack.pop());
311
+ }
312
+ return code + missingParts.join("");
313
+ }
314
+ let eIdx = code.lastIndexOf(" ");
315
+ let endStr = eIdx > -1 ? code.substring(eIdx + 1) : "";
316
+ if (code.endsWith("=")) {
317
+ code += '""';
318
+ }
319
+ else if (endStr == "name" ||
320
+ endStr == "input" ||
321
+ endStr == "output" ||
322
+ endStr == "items" ||
323
+ endStr == "event" ||
324
+ endStr == "loop") {
325
+ let idx1 = code.lastIndexOf(">");
326
+ let idx2 = code.lastIndexOf("<");
327
+ if (idx1 < idx2 && code.lastIndexOf(" ") > idx2) {
328
+ code += '=""';
329
+ }
330
+ }
331
+ code = fixDoubleChar(code);
332
+ const stack = [];
333
+ function isSelfClosing(tag) {
334
+ return tag.endsWith("/>");
335
+ }
336
+ for (let i = 0; i < code.length; i++) {
337
+ let s = code[i];
338
+ if (s === "<") {
339
+ const isEndTag = code[i + 1] === "/";
340
+ let endIndex = code.indexOf(">", i);
341
+ let tagContent = code.slice(i, endIndex + 1);
342
+ if (isSelfClosing(tagContent)) ;
343
+ else if (isEndTag) {
344
+ stack.pop();
345
+ }
346
+ else {
347
+ stack.push(tagContent);
348
+ }
349
+ if (endIndex == -1) {
350
+ break;
351
+ }
352
+ i = endIndex;
353
+ }
354
+ }
355
+ const missingParts = [];
356
+ while (stack.length > 0) {
357
+ const top = stack.pop();
358
+ if (top.startsWith("<")) {
359
+ let arr = top.match(/<(\w+)/);
360
+ const tagName = arr[1];
361
+ missingParts.push(`</${tagName}>`);
362
+ }
363
+ else {
364
+ missingParts.push(top);
365
+ }
366
+ }
367
+ let completedCode = code + missingParts.join("");
368
+ return completedCode;
369
+ }
370
+
142
371
  class Context {
143
372
  constructor(taskId, config, agents, chain) {
373
+ this.paused = false;
144
374
  this.taskId = taskId;
145
375
  this.config = config;
146
376
  this.agents = agents;
@@ -148,13 +378,21 @@ class Context {
148
378
  this.variables = new Map();
149
379
  this.controller = new AbortController();
150
380
  }
151
- checkAborted() {
381
+ async checkAborted() {
152
382
  // this.controller.signal.throwIfAborted();
153
383
  if (this.controller.signal.aborted) {
154
384
  const error = new Error("Operation was interrupted");
155
385
  error.name = "AbortError";
156
386
  throw error;
157
387
  }
388
+ while (this.paused) {
389
+ await sleep(500);
390
+ if (this.controller.signal.aborted) {
391
+ const error = new Error("Operation was interrupted");
392
+ error.name = "AbortError";
393
+ throw error;
394
+ }
395
+ }
158
396
  }
159
397
  }
160
398
  class AgentContext {
@@ -8443,235 +8681,6 @@ createOpenAI({
8443
8681
  // strict for OpenAI API
8444
8682
  });
8445
8683
 
8446
- function sleep(time) {
8447
- return new Promise((resolve) => setTimeout(() => resolve(), time));
8448
- }
8449
- function uuidv4() {
8450
- return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) {
8451
- const r = (Math.random() * 16) | 0;
8452
- const v = c === "x" ? r : (r & 0x3) | 0x8;
8453
- return v.toString(16);
8454
- });
8455
- }
8456
- function call_timeout(fun, timeout, error_callback) {
8457
- return new Promise(async (resolve, reject) => {
8458
- let timer = setTimeout(() => {
8459
- reject(new Error("Timeout"));
8460
- error_callback && error_callback("Timeout");
8461
- }, timeout);
8462
- try {
8463
- const result = await fun();
8464
- clearTimeout(timer);
8465
- resolve(result);
8466
- }
8467
- catch (e) {
8468
- clearTimeout(timer);
8469
- reject(e);
8470
- error_callback && error_callback(e + "");
8471
- }
8472
- });
8473
- }
8474
- function convertToolSchema(tool) {
8475
- if ("function" in tool) {
8476
- return {
8477
- type: "function",
8478
- name: tool.function.name,
8479
- description: tool.function.description,
8480
- parameters: tool.function.parameters,
8481
- };
8482
- }
8483
- else if ("input_schema" in tool) {
8484
- return {
8485
- type: "function",
8486
- name: tool.name,
8487
- description: tool.description,
8488
- parameters: tool.input_schema,
8489
- };
8490
- }
8491
- else if ("inputSchema" in tool) {
8492
- return {
8493
- type: "function",
8494
- name: tool.name,
8495
- description: tool.description,
8496
- parameters: tool.inputSchema,
8497
- };
8498
- }
8499
- else {
8500
- return {
8501
- type: "function",
8502
- name: tool.name,
8503
- description: tool.description,
8504
- parameters: tool.parameters,
8505
- };
8506
- }
8507
- }
8508
- function toImage(imageData) {
8509
- let image = null;
8510
- if (imageData.startsWith("http://") || imageData.startsWith("https://")) {
8511
- image = new URL(imageData);
8512
- }
8513
- else {
8514
- if (imageData.startsWith("data:image/")) {
8515
- imageData = imageData.substring(imageData.indexOf(",") + 1);
8516
- }
8517
- // @ts-ignore
8518
- if (typeof Buffer != "undefined") {
8519
- // @ts-ignore
8520
- const buffer = Buffer.from(imageData, "base64");
8521
- image = new Uint8Array(buffer);
8522
- }
8523
- else {
8524
- const binaryString = atob(imageData);
8525
- image = new Uint8Array(binaryString.length);
8526
- for (let i = 0; i < binaryString.length; i++) {
8527
- image[i] = binaryString.charCodeAt(i);
8528
- }
8529
- }
8530
- }
8531
- return image;
8532
- }
8533
- function mergeTools(tools1, tools2) {
8534
- let tools = [];
8535
- let toolMap2 = tools2.reduce((map, tool) => {
8536
- map[tool.name] = tool;
8537
- return map;
8538
- }, {});
8539
- for (let i = 0; i < tools1.length; i++) {
8540
- let tool1 = tools1[i];
8541
- let tool2 = toolMap2[tool1.name];
8542
- if (tool2) {
8543
- tools.push(tool2);
8544
- delete toolMap2[tool1.name];
8545
- }
8546
- else {
8547
- tools.push(tool1);
8548
- }
8549
- }
8550
- for (let i = 0; i < tools2.length; i++) {
8551
- let tool2 = tools2[i];
8552
- if (toolMap2[tool2.name]) {
8553
- tools.push(tool2);
8554
- }
8555
- }
8556
- return tools;
8557
- }
8558
- function mergeAgents(agents1, agents2) {
8559
- let tools = [];
8560
- let toolMap2 = agents2.reduce((map, tool) => {
8561
- map[tool.Name] = tool;
8562
- return map;
8563
- }, {});
8564
- for (let i = 0; i < agents1.length; i++) {
8565
- let tool1 = agents1[i];
8566
- let tool2 = toolMap2[tool1.Name];
8567
- if (tool2) {
8568
- tools.push(tool2);
8569
- delete toolMap2[tool1.Name];
8570
- }
8571
- else {
8572
- tools.push(tool1);
8573
- }
8574
- }
8575
- for (let i = 0; i < agents2.length; i++) {
8576
- let tool2 = agents2[i];
8577
- if (toolMap2[tool2.Name]) {
8578
- tools.push(tool2);
8579
- }
8580
- }
8581
- return tools;
8582
- }
8583
- function sub(str, maxLength, appendPoint = true) {
8584
- if (!str) {
8585
- return "";
8586
- }
8587
- if (str.length > maxLength) {
8588
- return str.substring(0, maxLength) + (appendPoint ? "..." : "");
8589
- }
8590
- return str;
8591
- }
8592
- function fixXmlTag(code) {
8593
- function fixDoubleChar(code) {
8594
- const stack = [];
8595
- for (let i = 0; i < code.length; i++) {
8596
- let s = code[i];
8597
- if (s === "<") {
8598
- stack.push(">");
8599
- }
8600
- else if (s === ">") {
8601
- stack.pop();
8602
- }
8603
- else if (s === '"') {
8604
- if (stack[stack.length - 1] === '"') {
8605
- stack.pop();
8606
- }
8607
- else {
8608
- stack.push('"');
8609
- }
8610
- }
8611
- }
8612
- const missingParts = [];
8613
- while (stack.length > 0) {
8614
- missingParts.push(stack.pop());
8615
- }
8616
- return code + missingParts.join("");
8617
- }
8618
- let eIdx = code.lastIndexOf(" ");
8619
- let endStr = eIdx > -1 ? code.substring(eIdx + 1) : "";
8620
- if (code.endsWith("=")) {
8621
- code += '""';
8622
- }
8623
- else if (endStr == "name" ||
8624
- endStr == "input" ||
8625
- endStr == "output" ||
8626
- endStr == "items" ||
8627
- endStr == "event" ||
8628
- endStr == "loop") {
8629
- let idx1 = code.lastIndexOf(">");
8630
- let idx2 = code.lastIndexOf("<");
8631
- if (idx1 < idx2 && code.lastIndexOf(" ") > idx2) {
8632
- code += '=""';
8633
- }
8634
- }
8635
- code = fixDoubleChar(code);
8636
- const stack = [];
8637
- function isSelfClosing(tag) {
8638
- return tag.endsWith("/>");
8639
- }
8640
- for (let i = 0; i < code.length; i++) {
8641
- let s = code[i];
8642
- if (s === "<") {
8643
- const isEndTag = code[i + 1] === "/";
8644
- let endIndex = code.indexOf(">", i);
8645
- let tagContent = code.slice(i, endIndex + 1);
8646
- if (isSelfClosing(tagContent)) ;
8647
- else if (isEndTag) {
8648
- stack.pop();
8649
- }
8650
- else {
8651
- stack.push(tagContent);
8652
- }
8653
- if (endIndex == -1) {
8654
- break;
8655
- }
8656
- i = endIndex;
8657
- }
8658
- }
8659
- const missingParts = [];
8660
- while (stack.length > 0) {
8661
- const top = stack.pop();
8662
- if (top.startsWith("<")) {
8663
- let arr = top.match(/<(\w+)/);
8664
- const tagName = arr[1];
8665
- missingParts.push(`</${tagName}>`);
8666
- }
8667
- else {
8668
- missingParts.push(top);
8669
- }
8670
- }
8671
- let completedCode = code + missingParts.join("");
8672
- return completedCode;
8673
- }
8674
-
8675
8684
  // src/anthropic-provider.ts
8676
8685
  var anthropicErrorDataSchema = z.object({
8677
8686
  type: z.literal("error"),
@@ -14736,7 +14745,7 @@ class RetryLanguageModel {
14736
14745
  toolChoice: request.toolChoice,
14737
14746
  },
14738
14747
  prompt: request.messages,
14739
- maxTokens: request.maxTokens || config.maxTokens,
14748
+ maxTokens: request.maxTokens,
14740
14749
  temperature: request.temperature,
14741
14750
  topP: request.topP,
14742
14751
  topK: request.topK,
@@ -14745,6 +14754,7 @@ class RetryLanguageModel {
14745
14754
  });
14746
14755
  }
14747
14756
  async doGenerate(options) {
14757
+ const maxTokens = options.maxTokens;
14748
14758
  const names = [...this.names, ...this.names];
14749
14759
  for (let i = 0; i < names.length; i++) {
14750
14760
  const name = names[i];
@@ -14752,6 +14762,10 @@ class RetryLanguageModel {
14752
14762
  if (!llm) {
14753
14763
  continue;
14754
14764
  }
14765
+ if (!maxTokens) {
14766
+ options.maxTokens =
14767
+ this.llms[name].config?.maxTokens || config.maxTokens;
14768
+ }
14755
14769
  try {
14756
14770
  let result = await llm.doGenerate(options);
14757
14771
  if (Log.isEnableDebug()) {
@@ -14783,7 +14797,7 @@ class RetryLanguageModel {
14783
14797
  toolChoice: request.toolChoice,
14784
14798
  },
14785
14799
  prompt: request.messages,
14786
- maxTokens: request.maxTokens || config.maxTokens,
14800
+ maxTokens: request.maxTokens,
14787
14801
  temperature: request.temperature,
14788
14802
  topP: request.topP,
14789
14803
  topK: request.topK,
@@ -14792,6 +14806,7 @@ class RetryLanguageModel {
14792
14806
  });
14793
14807
  }
14794
14808
  async doStream(options) {
14809
+ const maxTokens = options.maxTokens;
14795
14810
  const names = [...this.names, ...this.names];
14796
14811
  for (let i = 0; i < names.length; i++) {
14797
14812
  const name = names[i];
@@ -14799,6 +14814,10 @@ class RetryLanguageModel {
14799
14814
  if (!llm) {
14800
14815
  continue;
14801
14816
  }
14817
+ if (!maxTokens) {
14818
+ options.maxTokens =
14819
+ this.llms[name].config?.maxTokens || config.maxTokens;
14820
+ }
14802
14821
  try {
14803
14822
  const controller = new AbortController();
14804
14823
  const signal = options.abortSignal
@@ -17876,7 +17895,7 @@ class HumanInteractTool {
17876
17895
  this.name = TOOL_NAME$3;
17877
17896
  this.noPlan = true;
17878
17897
  this.description = `AI interacts with humans:
17879
- confirm: Ask the user to confirm whether to execute an operation, especially when performing dangerous actions such as deleting system files.
17898
+ confirm: Ask the user to confirm whether to execute an operation, especially when performing dangerous actions such as deleting system files, users will choose Yes or No.
17880
17899
  input: Prompt the user to enter text; for example, when a task is ambiguous, the AI can choose to ask the user for details, and the user can respond by inputting.
17881
17900
  select: Allow the user to make a choice; in situations that require selection, the AI can ask the user to make a decision.
17882
17901
  request_help: Request assistance from the user; for instance, when an operation is blocked, the AI can ask the user for help, such as needing to log into a website or solve a CAPTCHA.`;
@@ -17972,7 +17991,7 @@ const TOOL_NAME$2 = "task_node_status";
17972
17991
  class TaskNodeStatusTool {
17973
17992
  constructor() {
17974
17993
  this.name = TOOL_NAME$2;
17975
- this.description = `After completing each step of the task, you need to call this tool to update the status of the task node.`;
17994
+ this.description = `After completing each step of the task, you need to call this tool to update the status of the task node, and think about the tasks to be processed and the next action plan.`;
17976
17995
  this.parameters = {
17977
17996
  type: "object",
17978
17997
  properties: {
@@ -17990,8 +18009,12 @@ class TaskNodeStatusTool {
17990
18009
  type: "number",
17991
18010
  },
17992
18011
  },
18012
+ thought: {
18013
+ type: "string",
18014
+ description: "Current thinking content, which can be analysis of the problem, assumptions, insights, reflections, or a summary of the previous, suggest the next action step to be taken, which should be specific, executable, and verifiable."
18015
+ },
17993
18016
  },
17994
- required: ["doneIds", "todoIds"],
18017
+ required: ["doneIds", "todoIds", "thought"],
17995
18018
  };
17996
18019
  }
17997
18020
  async execute(args, agentContext) {
@@ -18131,7 +18154,7 @@ class McpTool {
18131
18154
 
18132
18155
  const AGENT_SYSTEM_TEMPLATE = `
18133
18156
  You are {name}, an autonomous AI agent for {agent} agent.
18134
- UTC datetime: {datetime}
18157
+ Current datetime: {datetime}
18135
18158
 
18136
18159
  # Task Description
18137
18160
  {description}
@@ -18155,10 +18178,10 @@ The output language should follow the language corresponding to the user's task.
18155
18178
  const HUMAN_PROMPT = `
18156
18179
  * HUMAN INTERACT
18157
18180
  During the task execution process, you can use the \`${TOOL_NAME$3}\` tool to interact with humans, please call it in the following situations:
18158
- - When performing dangerous operations such as deleting files, confirmation from humans is required
18159
- - When encountering obstacles while accessing websites, such as requiring user login, you need to request human assistance
18181
+ - When performing dangerous operations such as deleting files, confirmation from humans is required.
18182
+ - When encountering obstacles while visiting a website, such as requiring user login or captcha, you need to request for manual assistance.
18160
18183
  - When requesting login, please only call the function when a login dialog box is clearly displayed.
18161
- - Try not to use the \`${TOOL_NAME$3}\` tool
18184
+ - Try to minimize the use of \`${TOOL_NAME$3}\` tool.
18162
18185
  `;
18163
18186
  const VARIABLE_PROMPT = `
18164
18187
  * VARIABLE STORAGE
@@ -18234,7 +18257,7 @@ function getAgentSystemPrompt(agent, agentNode, context, tools, extSysPrompt) {
18234
18257
  return AGENT_SYSTEM_TEMPLATE.replace("{name}", config.name)
18235
18258
  .replace("{agent}", agent.Name)
18236
18259
  .replace("{description}", agent.Description)
18237
- .replace("{datetime}", new Date().toISOString())
18260
+ .replace("{datetime}", new Date().toLocaleString())
18238
18261
  .replace("{prompt}", prompt)
18239
18262
  .replace("{nodePrompt}", nodePrompt)
18240
18263
  .trim();
@@ -18288,7 +18311,7 @@ class Agent {
18288
18311
  let rlm = new RetryLanguageModel(context.config.llms, this.llms);
18289
18312
  let agentTools = tools;
18290
18313
  while (loopNum < maxReactNum) {
18291
- context.checkAborted();
18314
+ await context.checkAborted();
18292
18315
  if (mcpClient) {
18293
18316
  let controlMcp = await this.controlMcpTools(agentContext, messages, loopNum);
18294
18317
  if (controlMcp.mcpTools) {
@@ -18557,6 +18580,9 @@ class Agent {
18557
18580
  }
18558
18581
  return this.tools;
18559
18582
  }
18583
+ addTool(tool) {
18584
+ this.tools.push(tool);
18585
+ }
18560
18586
  get Name() {
18561
18587
  return this.name;
18562
18588
  }
@@ -18600,7 +18626,7 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18600
18626
  const reader = result.stream.getReader();
18601
18627
  try {
18602
18628
  while (true) {
18603
- context.checkAborted();
18629
+ await context.checkAborted();
18604
18630
  const { done, value } = await reader.read();
18605
18631
  if (done) {
18606
18632
  break;
@@ -18759,7 +18785,7 @@ class BaseChatAgent extends Agent {
18759
18785
 
18760
18786
  const PLAN_SYSTEM_TEMPLATE = `
18761
18787
  You are {name}, an autonomous AI Agent Planner.
18762
- UTC datetime: {datetime}
18788
+ Current datetime: {datetime}
18763
18789
 
18764
18790
  ## Task Description
18765
18791
  Your task is to understand the user's requirements, dynamically plan the user's tasks based on the Agent list, and please follow the steps below:
@@ -18916,12 +18942,12 @@ Output result:
18916
18942
  ];
18917
18943
  const PLAN_USER_TEMPLATE = `
18918
18944
  User Platform: {platform}
18919
- Task Description: {taskPrompt}
18945
+ Task Description: {task_prompt}
18920
18946
  `;
18921
18947
  const PLAN_USER_TASK_WEBSITE_TEMPLATE = `
18922
18948
  User Platform: {platform}
18923
18949
  Task Website: {task_website}
18924
- Task Description: {taskPrompt}
18950
+ Task Description: {task_prompt}
18925
18951
  `;
18926
18952
  async function getPlanSystemPrompt(context) {
18927
18953
  let agents_prompt = "";
@@ -18949,22 +18975,26 @@ async function getPlanSystemPrompt(context) {
18949
18975
  }
18950
18976
  return PLAN_SYSTEM_TEMPLATE.replace("{name}", config.name)
18951
18977
  .replace("{agents}", agents_prompt.trim())
18952
- .replace("{datetime}", new Date().toISOString())
18978
+ .replace("{datetime}", new Date().toLocaleString())
18953
18979
  .replace("{example_prompt}", example_prompt)
18954
18980
  .trim();
18955
18981
  }
18956
- function getPlanUserPrompt(taskPrompt, task_website) {
18982
+ function getPlanUserPrompt(task_prompt, task_website, ext_prompt) {
18983
+ let prompt = "";
18957
18984
  if (task_website) {
18958
- return PLAN_USER_TASK_WEBSITE_TEMPLATE.replace("{taskPrompt}", taskPrompt)
18985
+ prompt = PLAN_USER_TASK_WEBSITE_TEMPLATE.replace("{task_prompt}", task_prompt)
18959
18986
  .replace("{platform}", config.platform)
18960
- .replace("{task_website}", task_website)
18961
- .trim();
18987
+ .replace("{task_website}", task_website);
18962
18988
  }
18963
18989
  else {
18964
- return PLAN_USER_TEMPLATE.replace("{taskPrompt}", taskPrompt)
18965
- .replace("{platform}", config.platform)
18966
- .trim();
18990
+ prompt = PLAN_USER_TEMPLATE.replace("{task_prompt}", task_prompt)
18991
+ .replace("{platform}", config.platform);
18992
+ }
18993
+ prompt = prompt.trim();
18994
+ if (ext_prompt) {
18995
+ prompt += `\n${ext_prompt.trim()}`;
18967
18996
  }
18997
+ return prompt;
18968
18998
  }
18969
18999
 
18970
19000
  class Planner {
@@ -19007,7 +19037,7 @@ class Planner {
19007
19037
  content: [
19008
19038
  {
19009
19039
  type: "text",
19010
- text: getPlanUserPrompt(taskPrompt, this.context.variables.get("task_website")),
19040
+ text: getPlanUserPrompt(taskPrompt, this.context.variables.get("task_website"), this.context.variables.get("plan_ext_prompt")),
19011
19041
  },
19012
19042
  ],
19013
19043
  },
@@ -19024,7 +19054,7 @@ class Planner {
19024
19054
  let streamText = "";
19025
19055
  try {
19026
19056
  while (true) {
19027
- this.context.checkAborted();
19057
+ await this.context.checkAborted();
19028
19058
  const { done, value } = await reader.read();
19029
19059
  if (done) {
19030
19060
  break;
@@ -19163,7 +19193,7 @@ class Eko {
19163
19193
  }, {});
19164
19194
  let results = [];
19165
19195
  for (let i = 0; i < workflow.agents.length; i++) {
19166
- context.checkAborted();
19196
+ await context.checkAborted();
19167
19197
  let agentNode = workflow.agents[i];
19168
19198
  let agent = agentMap[agentNode.name];
19169
19199
  if (!agent) {
@@ -19200,6 +19230,16 @@ class Eko {
19200
19230
  return false;
19201
19231
  }
19202
19232
  }
19233
+ pauseTask(taskId, paused) {
19234
+ let context = this.taskMap.get(taskId);
19235
+ if (context) {
19236
+ context.paused = paused;
19237
+ return true;
19238
+ }
19239
+ else {
19240
+ return false;
19241
+ }
19242
+ }
19203
19243
  addAgent(agent) {
19204
19244
  this.config.agents = this.config.agents || [];
19205
19245
  this.config.agents.push(agent);
@@ -20064,6 +20104,16 @@ class BaseBrowserAgent extends Agent {
20064
20104
  }
20065
20105
  return null;
20066
20106
  }
20107
+ toolUseNames(messages) {
20108
+ let toolNames = [];
20109
+ for (let i = 0; i < messages.length; i++) {
20110
+ let message = messages[i];
20111
+ if (message.role == "tool") {
20112
+ toolNames.push(message.content[0].toolName);
20113
+ }
20114
+ }
20115
+ return toolNames;
20116
+ }
20067
20117
  async execute_mcp_script(agentContext, script) {
20068
20118
  return;
20069
20119
  }
@@ -20104,8 +20154,8 @@ function run_build_dom_tree() {
20104
20154
  'name',
20105
20155
  'role',
20106
20156
  'class',
20107
- // 'href',
20108
- 'tabindex',
20157
+ 'src',
20158
+ 'href',
20109
20159
  'aria-label',
20110
20160
  'placeholder',
20111
20161
  'value',
@@ -20154,6 +20204,12 @@ function run_build_dom_tree() {
20154
20204
  let classList = value.split(" ").slice(0, 3);
20155
20205
  value = classList.join(" ");
20156
20206
  }
20207
+ else if ((key == "src" || key == "href") && value && value.length > 200) {
20208
+ continue;
20209
+ }
20210
+ else if ((key == "src" || key == "href") && value && value.startsWith("/")) {
20211
+ value = window.location.origin + value;
20212
+ }
20157
20213
  if (key && value) {
20158
20214
  attributes_str += ` ${key}="${value}"`;
20159
20215
  }
@@ -20712,15 +20768,13 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20712
20768
  }, [index]);
20713
20769
  await sleep(200);
20714
20770
  }
20715
- async scroll_mouse_wheel(agentContext, amount) {
20716
- await this.execute_script(agentContext, (amount) => {
20717
- let viewportHeight = window.innerHeight ||
20718
- document.documentElement.clientHeight ||
20719
- document.body.clientHeight;
20720
- let y = Math.max(20, Math.min(viewportHeight / 10, 200));
20721
- window.scrollBy(0, y * amount);
20722
- }, [amount]);
20771
+ async scroll_mouse_wheel(agentContext, amount, extract_page_content) {
20772
+ await this.execute_script(agentContext, scroll_by, [{ amount }]);
20723
20773
  await sleep(200);
20774
+ if (extract_page_content) {
20775
+ let page_content = await this.extract_page_content(agentContext);
20776
+ return "This is the latest page content:\n" + page_content;
20777
+ }
20724
20778
  }
20725
20779
  async hover_to_element(agentContext, index) {
20726
20780
  await this.execute_script(agentContext, hover_to, [{ index }]);
@@ -20741,6 +20795,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20741
20795
  for (let i = 0; i < 5; i++) {
20742
20796
  await sleep(200);
20743
20797
  await this.execute_script(agentContext, run_build_dom_tree, []);
20798
+ await sleep(50);
20744
20799
  element_result = (await this.execute_script(agentContext, () => {
20745
20800
  return window.get_clickable_elements(true);
20746
20801
  }, []));
@@ -20748,7 +20803,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20748
20803
  break;
20749
20804
  }
20750
20805
  }
20751
- await sleep(50);
20806
+ await sleep(100);
20752
20807
  let screenshot = await this.screenshot(agentContext);
20753
20808
  // agentContext.variables.set("selector_map", element_result.selector_map);
20754
20809
  let pseudoHtml = element_result.element_str;
@@ -20882,7 +20937,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20882
20937
  },
20883
20938
  {
20884
20939
  name: "scroll_mouse_wheel",
20885
- description: "Scroll the mouse wheel at current position, prioritize using extract_page_content, only scroll when you need to load more content",
20940
+ description: "Scroll the mouse wheel at current position, only scroll when you need to load more content",
20886
20941
  parameters: {
20887
20942
  type: "object",
20888
20943
  properties: {
@@ -20896,13 +20951,17 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20896
20951
  type: "string",
20897
20952
  enum: ["up", "down"],
20898
20953
  },
20954
+ extract_page_content: {
20955
+ type: "boolean",
20956
+ description: "After scrolling is completed, whether to extract the current latest page content",
20957
+ },
20899
20958
  },
20900
20959
  required: ["amount", "direction"],
20901
20960
  },
20902
20961
  execute: async (args, agentContext) => {
20903
20962
  return await this.callInnerTool(async () => {
20904
20963
  let amount = args.amount;
20905
- await this.scroll_mouse_wheel(agentContext, args.direction == "up" ? -amount : amount);
20964
+ await this.scroll_mouse_wheel(agentContext, args.direction == "up" ? -amount : amount, args.extract_page_content == true);
20906
20965
  });
20907
20966
  },
20908
20967
  },
@@ -21024,6 +21083,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21024
21083
  ];
21025
21084
  }
21026
21085
  async handleMessages(agentContext, messages, tools) {
21086
+ const pseudoHtmlDescription = "This is the latest screenshot and page element information.\nindex and element:\n";
21027
21087
  let lastTool = this.lastToolResult(messages);
21028
21088
  if (lastTool &&
21029
21089
  lastTool.toolName !== "extract_page_content" &&
@@ -21042,14 +21102,55 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21042
21102
  },
21043
21103
  {
21044
21104
  type: "text",
21045
- text: "This is the latest screenshot and page element information.\nindex and element:\n" +
21046
- result.pseudoHtml,
21105
+ text: pseudoHtmlDescription + result.pseudoHtml,
21047
21106
  },
21048
21107
  ],
21049
21108
  });
21050
21109
  }
21110
+ if (messages.length > 10) {
21111
+ // compressed pseudoHtml
21112
+ for (let i = 2; i < messages.length - 3; i++) {
21113
+ let message = messages[i];
21114
+ if (message.role == "user" && message.content.length == 2) {
21115
+ let content = message.content;
21116
+ for (let j = 0; j < content.length; j++) {
21117
+ let _content = content[j];
21118
+ if (_content.type == "text" &&
21119
+ _content.text.startsWith(pseudoHtmlDescription)) {
21120
+ _content.text = this.removePseudoHtmlAttr(_content.text, [
21121
+ "class",
21122
+ "src",
21123
+ "href",
21124
+ ]);
21125
+ }
21126
+ }
21127
+ }
21128
+ }
21129
+ }
21051
21130
  super.handleMessages(agentContext, messages, tools);
21052
21131
  }
21132
+ removePseudoHtmlAttr(pseudoHtml, remove_attrs) {
21133
+ return pseudoHtml
21134
+ .split("\n")
21135
+ .map((line) => {
21136
+ if (!line.startsWith("[") || line.indexOf("]:<") == -1) {
21137
+ return line;
21138
+ }
21139
+ for (let i = 0; i < remove_attrs.length; i++) {
21140
+ let sIdx = line.indexOf(remove_attrs[i] + '="');
21141
+ if (sIdx == -1) {
21142
+ continue;
21143
+ }
21144
+ let eIdx = line.indexOf('"', sIdx + remove_attrs[i].length + 3);
21145
+ if (eIdx == -1) {
21146
+ continue;
21147
+ }
21148
+ line = line.substring(0, sIdx) + line.substring(eIdx + 1).trim().replace('" >', '">');
21149
+ }
21150
+ return line;
21151
+ })
21152
+ .join("\n");
21153
+ }
21053
21154
  }
21054
21155
  function typing(params) {
21055
21156
  let { index, text, enter } = params;
@@ -21188,6 +21289,50 @@ function select_option(params) {
21188
21289
  selectedText: option.text.trim(),
21189
21290
  };
21190
21291
  }
21292
+ function scroll_by(params) {
21293
+ const amount = params.amount;
21294
+ const documentElement = document.documentElement || document.body;
21295
+ if (documentElement.scrollHeight > window.innerHeight * 1.2) {
21296
+ const y = Math.max(20, Math.min((window.innerHeight || documentElement.clientHeight) / 10, 200));
21297
+ window.scrollBy(0, y * amount);
21298
+ return;
21299
+ }
21300
+ function findScrollableElements() {
21301
+ const allElements = Array.from(document.querySelectorAll("*"));
21302
+ return allElements.filter((el) => {
21303
+ const style = window.getComputedStyle(el);
21304
+ const overflowY = style.getPropertyValue("overflow-y");
21305
+ return ((overflowY === "auto" || overflowY === "scroll") &&
21306
+ el.scrollHeight > el.clientHeight);
21307
+ });
21308
+ }
21309
+ function getVisibleArea(element) {
21310
+ const rect = element.getBoundingClientRect();
21311
+ const viewportHeight = window.innerHeight || documentElement.clientHeight;
21312
+ const viewportWidth = window.innerWidth || documentElement.clientWidth;
21313
+ const visibleLeft = Math.max(0, Math.min(rect.left, viewportWidth));
21314
+ const visibleRight = Math.max(0, Math.min(rect.right, viewportWidth));
21315
+ const visibleTop = Math.max(0, Math.min(rect.top, viewportHeight));
21316
+ const visibleBottom = Math.max(0, Math.min(rect.bottom, viewportHeight));
21317
+ const visibleWidth = visibleRight - visibleLeft;
21318
+ const visibleHeight = visibleBottom - visibleTop;
21319
+ return visibleWidth * visibleHeight;
21320
+ }
21321
+ const scrollableElements = findScrollableElements();
21322
+ if (scrollableElements.length === 0) {
21323
+ const y = Math.max(20, Math.min((window.innerHeight || documentElement.clientHeight) / 10, 200));
21324
+ window.scrollBy(0, y * amount);
21325
+ return false;
21326
+ }
21327
+ const sortedElements = scrollableElements.sort((a, b) => {
21328
+ return getVisibleArea(b) - getVisibleArea(a);
21329
+ });
21330
+ const largestElement = sortedElements[0];
21331
+ const viewportHeight = largestElement.clientHeight;
21332
+ const y = Math.max(20, Math.min(viewportHeight / 10, 200));
21333
+ largestElement.scrollBy(0, y * amount);
21334
+ return true;
21335
+ }
21191
21336
 
21192
21337
  class BaseBrowserScreenAgent extends BaseBrowserAgent {
21193
21338
  constructor(llms, ext_tools, mcpClient) {