@eko-ai/eko 2.0.9 → 2.1.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.esm.js CHANGED
@@ -3,7 +3,7 @@ const config = {
3
3
  platform: "mac",
4
4
  maxReactNum: 200,
5
5
  maxTokens: 16000,
6
- compressThreshold: 60,
6
+ compressThreshold: 80,
7
7
  largeTextLength: 5000,
8
8
  shortTextLength: 800,
9
9
  };
@@ -61,7 +61,7 @@ class Logger {
61
61
  };
62
62
  let formattedMessage = '';
63
63
  if (this.dateFormat) {
64
- formattedMessage += `[${new Date().toISOString()}] `;
64
+ formattedMessage += `[${new Date().toLocaleString()}] `;
65
65
  }
66
66
  formattedMessage += `[${levelNames[level] || 'UNKNOWN'}] `;
67
67
  if (this.prefix) {
@@ -133,8 +133,238 @@ class Logger {
133
133
  }
134
134
  const Log = new Logger();
135
135
 
136
+ function sleep(time) {
137
+ return new Promise((resolve) => setTimeout(() => resolve(), time));
138
+ }
139
+ function uuidv4() {
140
+ return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) {
141
+ const r = (Math.random() * 16) | 0;
142
+ const v = c === "x" ? r : (r & 0x3) | 0x8;
143
+ return v.toString(16);
144
+ });
145
+ }
146
+ function call_timeout(fun, timeout, error_callback) {
147
+ return new Promise(async (resolve, reject) => {
148
+ let timer = setTimeout(() => {
149
+ reject(new Error("Timeout"));
150
+ error_callback && error_callback("Timeout");
151
+ }, timeout);
152
+ try {
153
+ const result = await fun();
154
+ clearTimeout(timer);
155
+ resolve(result);
156
+ }
157
+ catch (e) {
158
+ clearTimeout(timer);
159
+ reject(e);
160
+ error_callback && error_callback(e + "");
161
+ }
162
+ });
163
+ }
164
+ function convertToolSchema(tool) {
165
+ if ("function" in tool) {
166
+ return {
167
+ type: "function",
168
+ name: tool.function.name,
169
+ description: tool.function.description,
170
+ parameters: tool.function.parameters,
171
+ };
172
+ }
173
+ else if ("input_schema" in tool) {
174
+ return {
175
+ type: "function",
176
+ name: tool.name,
177
+ description: tool.description,
178
+ parameters: tool.input_schema,
179
+ };
180
+ }
181
+ else if ("inputSchema" in tool) {
182
+ return {
183
+ type: "function",
184
+ name: tool.name,
185
+ description: tool.description,
186
+ parameters: tool.inputSchema,
187
+ };
188
+ }
189
+ else {
190
+ return {
191
+ type: "function",
192
+ name: tool.name,
193
+ description: tool.description,
194
+ parameters: tool.parameters,
195
+ };
196
+ }
197
+ }
198
+ function toImage(imageData) {
199
+ let image = null;
200
+ if (imageData.startsWith("http://") || imageData.startsWith("https://")) {
201
+ image = new URL(imageData);
202
+ }
203
+ else {
204
+ if (imageData.startsWith("data:image/")) {
205
+ imageData = imageData.substring(imageData.indexOf(",") + 1);
206
+ }
207
+ // @ts-ignore
208
+ if (typeof Buffer != "undefined") {
209
+ // @ts-ignore
210
+ const buffer = Buffer.from(imageData, "base64");
211
+ image = new Uint8Array(buffer);
212
+ }
213
+ else {
214
+ const binaryString = atob(imageData);
215
+ image = new Uint8Array(binaryString.length);
216
+ for (let i = 0; i < binaryString.length; i++) {
217
+ image[i] = binaryString.charCodeAt(i);
218
+ }
219
+ }
220
+ }
221
+ return image;
222
+ }
223
+ function mergeTools(tools1, tools2) {
224
+ let tools = [];
225
+ let toolMap2 = tools2.reduce((map, tool) => {
226
+ map[tool.name] = tool;
227
+ return map;
228
+ }, {});
229
+ for (let i = 0; i < tools1.length; i++) {
230
+ let tool1 = tools1[i];
231
+ let tool2 = toolMap2[tool1.name];
232
+ if (tool2) {
233
+ tools.push(tool2);
234
+ delete toolMap2[tool1.name];
235
+ }
236
+ else {
237
+ tools.push(tool1);
238
+ }
239
+ }
240
+ for (let i = 0; i < tools2.length; i++) {
241
+ let tool2 = tools2[i];
242
+ if (toolMap2[tool2.name]) {
243
+ tools.push(tool2);
244
+ }
245
+ }
246
+ return tools;
247
+ }
248
+ function mergeAgents(agents1, agents2) {
249
+ let tools = [];
250
+ let toolMap2 = agents2.reduce((map, tool) => {
251
+ map[tool.Name] = tool;
252
+ return map;
253
+ }, {});
254
+ for (let i = 0; i < agents1.length; i++) {
255
+ let tool1 = agents1[i];
256
+ let tool2 = toolMap2[tool1.Name];
257
+ if (tool2) {
258
+ tools.push(tool2);
259
+ delete toolMap2[tool1.Name];
260
+ }
261
+ else {
262
+ tools.push(tool1);
263
+ }
264
+ }
265
+ for (let i = 0; i < agents2.length; i++) {
266
+ let tool2 = agents2[i];
267
+ if (toolMap2[tool2.Name]) {
268
+ tools.push(tool2);
269
+ }
270
+ }
271
+ return tools;
272
+ }
273
+ function sub(str, maxLength, appendPoint = true) {
274
+ if (!str) {
275
+ return "";
276
+ }
277
+ if (str.length > maxLength) {
278
+ return str.substring(0, maxLength) + (appendPoint ? "..." : "");
279
+ }
280
+ return str;
281
+ }
282
+ function fixXmlTag(code) {
283
+ function fixDoubleChar(code) {
284
+ const stack = [];
285
+ for (let i = 0; i < code.length; i++) {
286
+ let s = code[i];
287
+ if (s === "<") {
288
+ stack.push(">");
289
+ }
290
+ else if (s === ">") {
291
+ stack.pop();
292
+ }
293
+ else if (s === '"') {
294
+ if (stack[stack.length - 1] === '"') {
295
+ stack.pop();
296
+ }
297
+ else {
298
+ stack.push('"');
299
+ }
300
+ }
301
+ }
302
+ const missingParts = [];
303
+ while (stack.length > 0) {
304
+ missingParts.push(stack.pop());
305
+ }
306
+ return code + missingParts.join("");
307
+ }
308
+ let eIdx = code.lastIndexOf(" ");
309
+ let endStr = eIdx > -1 ? code.substring(eIdx + 1) : "";
310
+ if (code.endsWith("=")) {
311
+ code += '""';
312
+ }
313
+ else if (endStr == "name" ||
314
+ endStr == "input" ||
315
+ endStr == "output" ||
316
+ endStr == "items" ||
317
+ endStr == "event" ||
318
+ endStr == "loop") {
319
+ let idx1 = code.lastIndexOf(">");
320
+ let idx2 = code.lastIndexOf("<");
321
+ if (idx1 < idx2 && code.lastIndexOf(" ") > idx2) {
322
+ code += '=""';
323
+ }
324
+ }
325
+ code = fixDoubleChar(code);
326
+ const stack = [];
327
+ function isSelfClosing(tag) {
328
+ return tag.endsWith("/>");
329
+ }
330
+ for (let i = 0; i < code.length; i++) {
331
+ let s = code[i];
332
+ if (s === "<") {
333
+ const isEndTag = code[i + 1] === "/";
334
+ let endIndex = code.indexOf(">", i);
335
+ let tagContent = code.slice(i, endIndex + 1);
336
+ if (isSelfClosing(tagContent)) ;
337
+ else if (isEndTag) {
338
+ stack.pop();
339
+ }
340
+ else {
341
+ stack.push(tagContent);
342
+ }
343
+ if (endIndex == -1) {
344
+ break;
345
+ }
346
+ i = endIndex;
347
+ }
348
+ }
349
+ const missingParts = [];
350
+ while (stack.length > 0) {
351
+ const top = stack.pop();
352
+ if (top.startsWith("<")) {
353
+ let arr = top.match(/<(\w+)/);
354
+ const tagName = arr[1];
355
+ missingParts.push(`</${tagName}>`);
356
+ }
357
+ else {
358
+ missingParts.push(top);
359
+ }
360
+ }
361
+ let completedCode = code + missingParts.join("");
362
+ return completedCode;
363
+ }
364
+
136
365
  class Context {
137
366
  constructor(taskId, config, agents, chain) {
367
+ this.paused = false;
138
368
  this.taskId = taskId;
139
369
  this.config = config;
140
370
  this.agents = agents;
@@ -142,13 +372,21 @@ class Context {
142
372
  this.variables = new Map();
143
373
  this.controller = new AbortController();
144
374
  }
145
- checkAborted() {
375
+ async checkAborted() {
146
376
  // this.controller.signal.throwIfAborted();
147
377
  if (this.controller.signal.aborted) {
148
378
  const error = new Error("Operation was interrupted");
149
379
  error.name = "AbortError";
150
380
  throw error;
151
381
  }
382
+ while (this.paused) {
383
+ await sleep(500);
384
+ if (this.controller.signal.aborted) {
385
+ const error = new Error("Operation was interrupted");
386
+ error.name = "AbortError";
387
+ throw error;
388
+ }
389
+ }
152
390
  }
153
391
  }
154
392
  class AgentContext {
@@ -8437,235 +8675,6 @@ createOpenAI({
8437
8675
  // strict for OpenAI API
8438
8676
  });
8439
8677
 
8440
- function sleep(time) {
8441
- return new Promise((resolve) => setTimeout(() => resolve(), time));
8442
- }
8443
- function uuidv4() {
8444
- return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) {
8445
- const r = (Math.random() * 16) | 0;
8446
- const v = c === "x" ? r : (r & 0x3) | 0x8;
8447
- return v.toString(16);
8448
- });
8449
- }
8450
- function call_timeout(fun, timeout, error_callback) {
8451
- return new Promise(async (resolve, reject) => {
8452
- let timer = setTimeout(() => {
8453
- reject(new Error("Timeout"));
8454
- error_callback && error_callback("Timeout");
8455
- }, timeout);
8456
- try {
8457
- const result = await fun();
8458
- clearTimeout(timer);
8459
- resolve(result);
8460
- }
8461
- catch (e) {
8462
- clearTimeout(timer);
8463
- reject(e);
8464
- error_callback && error_callback(e + "");
8465
- }
8466
- });
8467
- }
8468
- function convertToolSchema(tool) {
8469
- if ("function" in tool) {
8470
- return {
8471
- type: "function",
8472
- name: tool.function.name,
8473
- description: tool.function.description,
8474
- parameters: tool.function.parameters,
8475
- };
8476
- }
8477
- else if ("input_schema" in tool) {
8478
- return {
8479
- type: "function",
8480
- name: tool.name,
8481
- description: tool.description,
8482
- parameters: tool.input_schema,
8483
- };
8484
- }
8485
- else if ("inputSchema" in tool) {
8486
- return {
8487
- type: "function",
8488
- name: tool.name,
8489
- description: tool.description,
8490
- parameters: tool.inputSchema,
8491
- };
8492
- }
8493
- else {
8494
- return {
8495
- type: "function",
8496
- name: tool.name,
8497
- description: tool.description,
8498
- parameters: tool.parameters,
8499
- };
8500
- }
8501
- }
8502
- function toImage(imageData) {
8503
- let image = null;
8504
- if (imageData.startsWith("http://") || imageData.startsWith("https://")) {
8505
- image = new URL(imageData);
8506
- }
8507
- else {
8508
- if (imageData.startsWith("data:image/")) {
8509
- imageData = imageData.substring(imageData.indexOf(",") + 1);
8510
- }
8511
- // @ts-ignore
8512
- if (typeof Buffer != "undefined") {
8513
- // @ts-ignore
8514
- const buffer = Buffer.from(imageData, "base64");
8515
- image = new Uint8Array(buffer);
8516
- }
8517
- else {
8518
- const binaryString = atob(imageData);
8519
- image = new Uint8Array(binaryString.length);
8520
- for (let i = 0; i < binaryString.length; i++) {
8521
- image[i] = binaryString.charCodeAt(i);
8522
- }
8523
- }
8524
- }
8525
- return image;
8526
- }
8527
- function mergeTools(tools1, tools2) {
8528
- let tools = [];
8529
- let toolMap2 = tools2.reduce((map, tool) => {
8530
- map[tool.name] = tool;
8531
- return map;
8532
- }, {});
8533
- for (let i = 0; i < tools1.length; i++) {
8534
- let tool1 = tools1[i];
8535
- let tool2 = toolMap2[tool1.name];
8536
- if (tool2) {
8537
- tools.push(tool2);
8538
- delete toolMap2[tool1.name];
8539
- }
8540
- else {
8541
- tools.push(tool1);
8542
- }
8543
- }
8544
- for (let i = 0; i < tools2.length; i++) {
8545
- let tool2 = tools2[i];
8546
- if (toolMap2[tool2.name]) {
8547
- tools.push(tool2);
8548
- }
8549
- }
8550
- return tools;
8551
- }
8552
- function mergeAgents(agents1, agents2) {
8553
- let tools = [];
8554
- let toolMap2 = agents2.reduce((map, tool) => {
8555
- map[tool.Name] = tool;
8556
- return map;
8557
- }, {});
8558
- for (let i = 0; i < agents1.length; i++) {
8559
- let tool1 = agents1[i];
8560
- let tool2 = toolMap2[tool1.Name];
8561
- if (tool2) {
8562
- tools.push(tool2);
8563
- delete toolMap2[tool1.Name];
8564
- }
8565
- else {
8566
- tools.push(tool1);
8567
- }
8568
- }
8569
- for (let i = 0; i < agents2.length; i++) {
8570
- let tool2 = agents2[i];
8571
- if (toolMap2[tool2.Name]) {
8572
- tools.push(tool2);
8573
- }
8574
- }
8575
- return tools;
8576
- }
8577
- function sub(str, maxLength, appendPoint = true) {
8578
- if (!str) {
8579
- return "";
8580
- }
8581
- if (str.length > maxLength) {
8582
- return str.substring(0, maxLength) + (appendPoint ? "..." : "");
8583
- }
8584
- return str;
8585
- }
8586
- function fixXmlTag(code) {
8587
- function fixDoubleChar(code) {
8588
- const stack = [];
8589
- for (let i = 0; i < code.length; i++) {
8590
- let s = code[i];
8591
- if (s === "<") {
8592
- stack.push(">");
8593
- }
8594
- else if (s === ">") {
8595
- stack.pop();
8596
- }
8597
- else if (s === '"') {
8598
- if (stack[stack.length - 1] === '"') {
8599
- stack.pop();
8600
- }
8601
- else {
8602
- stack.push('"');
8603
- }
8604
- }
8605
- }
8606
- const missingParts = [];
8607
- while (stack.length > 0) {
8608
- missingParts.push(stack.pop());
8609
- }
8610
- return code + missingParts.join("");
8611
- }
8612
- let eIdx = code.lastIndexOf(" ");
8613
- let endStr = eIdx > -1 ? code.substring(eIdx + 1) : "";
8614
- if (code.endsWith("=")) {
8615
- code += '""';
8616
- }
8617
- else if (endStr == "name" ||
8618
- endStr == "input" ||
8619
- endStr == "output" ||
8620
- endStr == "items" ||
8621
- endStr == "event" ||
8622
- endStr == "loop") {
8623
- let idx1 = code.lastIndexOf(">");
8624
- let idx2 = code.lastIndexOf("<");
8625
- if (idx1 < idx2 && code.lastIndexOf(" ") > idx2) {
8626
- code += '=""';
8627
- }
8628
- }
8629
- code = fixDoubleChar(code);
8630
- const stack = [];
8631
- function isSelfClosing(tag) {
8632
- return tag.endsWith("/>");
8633
- }
8634
- for (let i = 0; i < code.length; i++) {
8635
- let s = code[i];
8636
- if (s === "<") {
8637
- const isEndTag = code[i + 1] === "/";
8638
- let endIndex = code.indexOf(">", i);
8639
- let tagContent = code.slice(i, endIndex + 1);
8640
- if (isSelfClosing(tagContent)) ;
8641
- else if (isEndTag) {
8642
- stack.pop();
8643
- }
8644
- else {
8645
- stack.push(tagContent);
8646
- }
8647
- if (endIndex == -1) {
8648
- break;
8649
- }
8650
- i = endIndex;
8651
- }
8652
- }
8653
- const missingParts = [];
8654
- while (stack.length > 0) {
8655
- const top = stack.pop();
8656
- if (top.startsWith("<")) {
8657
- let arr = top.match(/<(\w+)/);
8658
- const tagName = arr[1];
8659
- missingParts.push(`</${tagName}>`);
8660
- }
8661
- else {
8662
- missingParts.push(top);
8663
- }
8664
- }
8665
- let completedCode = code + missingParts.join("");
8666
- return completedCode;
8667
- }
8668
-
8669
8678
  // src/anthropic-provider.ts
8670
8679
  var anthropicErrorDataSchema = z.object({
8671
8680
  type: z.literal("error"),
@@ -14701,7 +14710,7 @@ class RetryLanguageModel {
14701
14710
  toolChoice: request.toolChoice,
14702
14711
  },
14703
14712
  prompt: request.messages,
14704
- maxTokens: request.maxTokens || config.maxTokens,
14713
+ maxTokens: request.maxTokens,
14705
14714
  temperature: request.temperature,
14706
14715
  topP: request.topP,
14707
14716
  topK: request.topK,
@@ -14710,6 +14719,7 @@ class RetryLanguageModel {
14710
14719
  });
14711
14720
  }
14712
14721
  async doGenerate(options) {
14722
+ const maxTokens = options.maxTokens;
14713
14723
  const names = [...this.names, ...this.names];
14714
14724
  for (let i = 0; i < names.length; i++) {
14715
14725
  const name = names[i];
@@ -14717,6 +14727,10 @@ class RetryLanguageModel {
14717
14727
  if (!llm) {
14718
14728
  continue;
14719
14729
  }
14730
+ if (!maxTokens) {
14731
+ options.maxTokens =
14732
+ this.llms[name].config?.maxTokens || config.maxTokens;
14733
+ }
14720
14734
  try {
14721
14735
  let result = await llm.doGenerate(options);
14722
14736
  if (Log.isEnableDebug()) {
@@ -14748,7 +14762,7 @@ class RetryLanguageModel {
14748
14762
  toolChoice: request.toolChoice,
14749
14763
  },
14750
14764
  prompt: request.messages,
14751
- maxTokens: request.maxTokens || config.maxTokens,
14765
+ maxTokens: request.maxTokens,
14752
14766
  temperature: request.temperature,
14753
14767
  topP: request.topP,
14754
14768
  topK: request.topK,
@@ -14757,6 +14771,7 @@ class RetryLanguageModel {
14757
14771
  });
14758
14772
  }
14759
14773
  async doStream(options) {
14774
+ const maxTokens = options.maxTokens;
14760
14775
  const names = [...this.names, ...this.names];
14761
14776
  for (let i = 0; i < names.length; i++) {
14762
14777
  const name = names[i];
@@ -14764,6 +14779,10 @@ class RetryLanguageModel {
14764
14779
  if (!llm) {
14765
14780
  continue;
14766
14781
  }
14782
+ if (!maxTokens) {
14783
+ options.maxTokens =
14784
+ this.llms[name].config?.maxTokens || config.maxTokens;
14785
+ }
14767
14786
  try {
14768
14787
  const controller = new AbortController();
14769
14788
  const signal = options.abortSignal
@@ -17841,7 +17860,7 @@ class HumanInteractTool {
17841
17860
  this.name = TOOL_NAME$3;
17842
17861
  this.noPlan = true;
17843
17862
  this.description = `AI interacts with humans:
17844
- confirm: Ask the user to confirm whether to execute an operation, especially when performing dangerous actions such as deleting system files.
17863
+ confirm: Ask the user to confirm whether to execute an operation, especially when performing dangerous actions such as deleting system files, users will choose Yes or No.
17845
17864
  input: Prompt the user to enter text; for example, when a task is ambiguous, the AI can choose to ask the user for details, and the user can respond by inputting.
17846
17865
  select: Allow the user to make a choice; in situations that require selection, the AI can ask the user to make a decision.
17847
17866
  request_help: Request assistance from the user; for instance, when an operation is blocked, the AI can ask the user for help, such as needing to log into a website or solve a CAPTCHA.`;
@@ -17937,7 +17956,7 @@ const TOOL_NAME$2 = "task_node_status";
17937
17956
  class TaskNodeStatusTool {
17938
17957
  constructor() {
17939
17958
  this.name = TOOL_NAME$2;
17940
- this.description = `After completing each step of the task, you need to call this tool to update the status of the task node.`;
17959
+ this.description = `After completing each step of the task, you need to call this tool to update the status of the task node, and think about the tasks to be processed and the next action plan.`;
17941
17960
  this.parameters = {
17942
17961
  type: "object",
17943
17962
  properties: {
@@ -17955,8 +17974,12 @@ class TaskNodeStatusTool {
17955
17974
  type: "number",
17956
17975
  },
17957
17976
  },
17977
+ thought: {
17978
+ type: "string",
17979
+ description: "Current thinking content, which can be analysis of the problem, assumptions, insights, reflections, or a summary of the previous, suggest the next action step to be taken, which should be specific, executable, and verifiable."
17980
+ },
17958
17981
  },
17959
- required: ["doneIds", "todoIds"],
17982
+ required: ["doneIds", "todoIds", "thought"],
17960
17983
  };
17961
17984
  }
17962
17985
  async execute(args, agentContext) {
@@ -18096,7 +18119,7 @@ class McpTool {
18096
18119
 
18097
18120
  const AGENT_SYSTEM_TEMPLATE = `
18098
18121
  You are {name}, an autonomous AI agent for {agent} agent.
18099
- UTC datetime: {datetime}
18122
+ Current datetime: {datetime}
18100
18123
 
18101
18124
  # Task Description
18102
18125
  {description}
@@ -18120,10 +18143,10 @@ The output language should follow the language corresponding to the user's task.
18120
18143
  const HUMAN_PROMPT = `
18121
18144
  * HUMAN INTERACT
18122
18145
  During the task execution process, you can use the \`${TOOL_NAME$3}\` tool to interact with humans, please call it in the following situations:
18123
- - When performing dangerous operations such as deleting files, confirmation from humans is required
18124
- - When encountering obstacles while accessing websites, such as requiring user login, you need to request human assistance
18146
+ - When performing dangerous operations such as deleting files, confirmation from humans is required.
18147
+ - When encountering obstacles while visiting a website, such as requiring user login or captcha, you need to request for manual assistance.
18125
18148
  - When requesting login, please only call the function when a login dialog box is clearly displayed.
18126
- - Try not to use the \`${TOOL_NAME$3}\` tool
18149
+ - Try to minimize the use of \`${TOOL_NAME$3}\` tool.
18127
18150
  `;
18128
18151
  const VARIABLE_PROMPT = `
18129
18152
  * VARIABLE STORAGE
@@ -18199,7 +18222,7 @@ function getAgentSystemPrompt(agent, agentNode, context, tools, extSysPrompt) {
18199
18222
  return AGENT_SYSTEM_TEMPLATE.replace("{name}", config.name)
18200
18223
  .replace("{agent}", agent.Name)
18201
18224
  .replace("{description}", agent.Description)
18202
- .replace("{datetime}", new Date().toISOString())
18225
+ .replace("{datetime}", new Date().toLocaleString())
18203
18226
  .replace("{prompt}", prompt)
18204
18227
  .replace("{nodePrompt}", nodePrompt)
18205
18228
  .trim();
@@ -18253,7 +18276,7 @@ class Agent {
18253
18276
  let rlm = new RetryLanguageModel(context.config.llms, this.llms);
18254
18277
  let agentTools = tools;
18255
18278
  while (loopNum < maxReactNum) {
18256
- context.checkAborted();
18279
+ await context.checkAborted();
18257
18280
  if (mcpClient) {
18258
18281
  let controlMcp = await this.controlMcpTools(agentContext, messages, loopNum);
18259
18282
  if (controlMcp.mcpTools) {
@@ -18522,6 +18545,9 @@ class Agent {
18522
18545
  }
18523
18546
  return this.tools;
18524
18547
  }
18548
+ addTool(tool) {
18549
+ this.tools.push(tool);
18550
+ }
18525
18551
  get Name() {
18526
18552
  return this.name;
18527
18553
  }
@@ -18565,7 +18591,7 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18565
18591
  const reader = result.stream.getReader();
18566
18592
  try {
18567
18593
  while (true) {
18568
- context.checkAborted();
18594
+ await context.checkAborted();
18569
18595
  const { done, value } = await reader.read();
18570
18596
  if (done) {
18571
18597
  break;
@@ -18724,7 +18750,7 @@ class BaseChatAgent extends Agent {
18724
18750
 
18725
18751
  const PLAN_SYSTEM_TEMPLATE = `
18726
18752
  You are {name}, an autonomous AI Agent Planner.
18727
- UTC datetime: {datetime}
18753
+ Current datetime: {datetime}
18728
18754
 
18729
18755
  ## Task Description
18730
18756
  Your task is to understand the user's requirements, dynamically plan the user's tasks based on the Agent list, and please follow the steps below:
@@ -18881,12 +18907,12 @@ Output result:
18881
18907
  ];
18882
18908
  const PLAN_USER_TEMPLATE = `
18883
18909
  User Platform: {platform}
18884
- Task Description: {taskPrompt}
18910
+ Task Description: {task_prompt}
18885
18911
  `;
18886
18912
  const PLAN_USER_TASK_WEBSITE_TEMPLATE = `
18887
18913
  User Platform: {platform}
18888
18914
  Task Website: {task_website}
18889
- Task Description: {taskPrompt}
18915
+ Task Description: {task_prompt}
18890
18916
  `;
18891
18917
  async function getPlanSystemPrompt(context) {
18892
18918
  let agents_prompt = "";
@@ -18914,22 +18940,26 @@ async function getPlanSystemPrompt(context) {
18914
18940
  }
18915
18941
  return PLAN_SYSTEM_TEMPLATE.replace("{name}", config.name)
18916
18942
  .replace("{agents}", agents_prompt.trim())
18917
- .replace("{datetime}", new Date().toISOString())
18943
+ .replace("{datetime}", new Date().toLocaleString())
18918
18944
  .replace("{example_prompt}", example_prompt)
18919
18945
  .trim();
18920
18946
  }
18921
- function getPlanUserPrompt(taskPrompt, task_website) {
18947
+ function getPlanUserPrompt(task_prompt, task_website, ext_prompt) {
18948
+ let prompt = "";
18922
18949
  if (task_website) {
18923
- return PLAN_USER_TASK_WEBSITE_TEMPLATE.replace("{taskPrompt}", taskPrompt)
18950
+ prompt = PLAN_USER_TASK_WEBSITE_TEMPLATE.replace("{task_prompt}", task_prompt)
18924
18951
  .replace("{platform}", config.platform)
18925
- .replace("{task_website}", task_website)
18926
- .trim();
18952
+ .replace("{task_website}", task_website);
18927
18953
  }
18928
18954
  else {
18929
- return PLAN_USER_TEMPLATE.replace("{taskPrompt}", taskPrompt)
18930
- .replace("{platform}", config.platform)
18931
- .trim();
18955
+ prompt = PLAN_USER_TEMPLATE.replace("{task_prompt}", task_prompt)
18956
+ .replace("{platform}", config.platform);
18957
+ }
18958
+ prompt = prompt.trim();
18959
+ if (ext_prompt) {
18960
+ prompt += `\n${ext_prompt.trim()}`;
18932
18961
  }
18962
+ return prompt;
18933
18963
  }
18934
18964
 
18935
18965
  class Planner {
@@ -18972,7 +19002,7 @@ class Planner {
18972
19002
  content: [
18973
19003
  {
18974
19004
  type: "text",
18975
- text: getPlanUserPrompt(taskPrompt, this.context.variables.get("task_website")),
19005
+ text: getPlanUserPrompt(taskPrompt, this.context.variables.get("task_website"), this.context.variables.get("plan_ext_prompt")),
18976
19006
  },
18977
19007
  ],
18978
19008
  },
@@ -18989,7 +19019,7 @@ class Planner {
18989
19019
  let streamText = "";
18990
19020
  try {
18991
19021
  while (true) {
18992
- this.context.checkAborted();
19022
+ await this.context.checkAborted();
18993
19023
  const { done, value } = await reader.read();
18994
19024
  if (done) {
18995
19025
  break;
@@ -19128,7 +19158,7 @@ class Eko {
19128
19158
  }, {});
19129
19159
  let results = [];
19130
19160
  for (let i = 0; i < workflow.agents.length; i++) {
19131
- context.checkAborted();
19161
+ await context.checkAborted();
19132
19162
  let agentNode = workflow.agents[i];
19133
19163
  let agent = agentMap[agentNode.name];
19134
19164
  if (!agent) {
@@ -19165,6 +19195,16 @@ class Eko {
19165
19195
  return false;
19166
19196
  }
19167
19197
  }
19198
+ pauseTask(taskId, paused) {
19199
+ let context = this.taskMap.get(taskId);
19200
+ if (context) {
19201
+ context.paused = paused;
19202
+ return true;
19203
+ }
19204
+ else {
19205
+ return false;
19206
+ }
19207
+ }
19168
19208
  addAgent(agent) {
19169
19209
  this.config.agents = this.config.agents || [];
19170
19210
  this.config.agents.push(agent);
@@ -20029,6 +20069,16 @@ class BaseBrowserAgent extends Agent {
20029
20069
  }
20030
20070
  return null;
20031
20071
  }
20072
+ toolUseNames(messages) {
20073
+ let toolNames = [];
20074
+ for (let i = 0; i < messages.length; i++) {
20075
+ let message = messages[i];
20076
+ if (message.role == "tool") {
20077
+ toolNames.push(message.content[0].toolName);
20078
+ }
20079
+ }
20080
+ return toolNames;
20081
+ }
20032
20082
  async execute_mcp_script(agentContext, script) {
20033
20083
  return;
20034
20084
  }
@@ -20069,8 +20119,8 @@ function run_build_dom_tree() {
20069
20119
  'name',
20070
20120
  'role',
20071
20121
  'class',
20072
- // 'href',
20073
- 'tabindex',
20122
+ 'src',
20123
+ 'href',
20074
20124
  'aria-label',
20075
20125
  'placeholder',
20076
20126
  'value',
@@ -20119,6 +20169,12 @@ function run_build_dom_tree() {
20119
20169
  let classList = value.split(" ").slice(0, 3);
20120
20170
  value = classList.join(" ");
20121
20171
  }
20172
+ else if ((key == "src" || key == "href") && value && value.length > 200) {
20173
+ continue;
20174
+ }
20175
+ else if ((key == "src" || key == "href") && value && value.startsWith("/")) {
20176
+ value = window.location.origin + value;
20177
+ }
20122
20178
  if (key && value) {
20123
20179
  attributes_str += ` ${key}="${value}"`;
20124
20180
  }
@@ -20677,15 +20733,13 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20677
20733
  }, [index]);
20678
20734
  await sleep(200);
20679
20735
  }
20680
- async scroll_mouse_wheel(agentContext, amount) {
20681
- await this.execute_script(agentContext, (amount) => {
20682
- let viewportHeight = window.innerHeight ||
20683
- document.documentElement.clientHeight ||
20684
- document.body.clientHeight;
20685
- let y = Math.max(20, Math.min(viewportHeight / 10, 200));
20686
- window.scrollBy(0, y * amount);
20687
- }, [amount]);
20736
+ async scroll_mouse_wheel(agentContext, amount, extract_page_content) {
20737
+ await this.execute_script(agentContext, scroll_by, [{ amount }]);
20688
20738
  await sleep(200);
20739
+ if (extract_page_content) {
20740
+ let page_content = await this.extract_page_content(agentContext);
20741
+ return "This is the latest page content:\n" + page_content;
20742
+ }
20689
20743
  }
20690
20744
  async hover_to_element(agentContext, index) {
20691
20745
  await this.execute_script(agentContext, hover_to, [{ index }]);
@@ -20706,6 +20760,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20706
20760
  for (let i = 0; i < 5; i++) {
20707
20761
  await sleep(200);
20708
20762
  await this.execute_script(agentContext, run_build_dom_tree, []);
20763
+ await sleep(50);
20709
20764
  element_result = (await this.execute_script(agentContext, () => {
20710
20765
  return window.get_clickable_elements(true);
20711
20766
  }, []));
@@ -20713,7 +20768,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20713
20768
  break;
20714
20769
  }
20715
20770
  }
20716
- await sleep(50);
20771
+ await sleep(100);
20717
20772
  let screenshot = await this.screenshot(agentContext);
20718
20773
  // agentContext.variables.set("selector_map", element_result.selector_map);
20719
20774
  let pseudoHtml = element_result.element_str;
@@ -20847,7 +20902,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20847
20902
  },
20848
20903
  {
20849
20904
  name: "scroll_mouse_wheel",
20850
- description: "Scroll the mouse wheel at current position, prioritize using extract_page_content, only scroll when you need to load more content",
20905
+ description: "Scroll the mouse wheel at current position, only scroll when you need to load more content",
20851
20906
  parameters: {
20852
20907
  type: "object",
20853
20908
  properties: {
@@ -20861,13 +20916,17 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20861
20916
  type: "string",
20862
20917
  enum: ["up", "down"],
20863
20918
  },
20919
+ extract_page_content: {
20920
+ type: "boolean",
20921
+ description: "After scrolling is completed, whether to extract the current latest page content",
20922
+ },
20864
20923
  },
20865
20924
  required: ["amount", "direction"],
20866
20925
  },
20867
20926
  execute: async (args, agentContext) => {
20868
20927
  return await this.callInnerTool(async () => {
20869
20928
  let amount = args.amount;
20870
- await this.scroll_mouse_wheel(agentContext, args.direction == "up" ? -amount : amount);
20929
+ await this.scroll_mouse_wheel(agentContext, args.direction == "up" ? -amount : amount, args.extract_page_content == true);
20871
20930
  });
20872
20931
  },
20873
20932
  },
@@ -20989,6 +21048,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20989
21048
  ];
20990
21049
  }
20991
21050
  async handleMessages(agentContext, messages, tools) {
21051
+ const pseudoHtmlDescription = "This is the latest screenshot and page element information.\nindex and element:\n";
20992
21052
  let lastTool = this.lastToolResult(messages);
20993
21053
  if (lastTool &&
20994
21054
  lastTool.toolName !== "extract_page_content" &&
@@ -21007,14 +21067,55 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21007
21067
  },
21008
21068
  {
21009
21069
  type: "text",
21010
- text: "This is the latest screenshot and page element information.\nindex and element:\n" +
21011
- result.pseudoHtml,
21070
+ text: pseudoHtmlDescription + result.pseudoHtml,
21012
21071
  },
21013
21072
  ],
21014
21073
  });
21015
21074
  }
21075
+ if (messages.length > 10) {
21076
+ // compressed pseudoHtml
21077
+ for (let i = 2; i < messages.length - 3; i++) {
21078
+ let message = messages[i];
21079
+ if (message.role == "user" && message.content.length == 2) {
21080
+ let content = message.content;
21081
+ for (let j = 0; j < content.length; j++) {
21082
+ let _content = content[j];
21083
+ if (_content.type == "text" &&
21084
+ _content.text.startsWith(pseudoHtmlDescription)) {
21085
+ _content.text = this.removePseudoHtmlAttr(_content.text, [
21086
+ "class",
21087
+ "src",
21088
+ "href",
21089
+ ]);
21090
+ }
21091
+ }
21092
+ }
21093
+ }
21094
+ }
21016
21095
  super.handleMessages(agentContext, messages, tools);
21017
21096
  }
21097
+ removePseudoHtmlAttr(pseudoHtml, remove_attrs) {
21098
+ return pseudoHtml
21099
+ .split("\n")
21100
+ .map((line) => {
21101
+ if (!line.startsWith("[") || line.indexOf("]:<") == -1) {
21102
+ return line;
21103
+ }
21104
+ for (let i = 0; i < remove_attrs.length; i++) {
21105
+ let sIdx = line.indexOf(remove_attrs[i] + '="');
21106
+ if (sIdx == -1) {
21107
+ continue;
21108
+ }
21109
+ let eIdx = line.indexOf('"', sIdx + remove_attrs[i].length + 3);
21110
+ if (eIdx == -1) {
21111
+ continue;
21112
+ }
21113
+ line = line.substring(0, sIdx) + line.substring(eIdx + 1).trim().replace('" >', '">');
21114
+ }
21115
+ return line;
21116
+ })
21117
+ .join("\n");
21118
+ }
21018
21119
  }
21019
21120
  function typing(params) {
21020
21121
  let { index, text, enter } = params;
@@ -21153,6 +21254,50 @@ function select_option(params) {
21153
21254
  selectedText: option.text.trim(),
21154
21255
  };
21155
21256
  }
21257
+ function scroll_by(params) {
21258
+ const amount = params.amount;
21259
+ const documentElement = document.documentElement || document.body;
21260
+ if (documentElement.scrollHeight > window.innerHeight * 1.2) {
21261
+ const y = Math.max(20, Math.min((window.innerHeight || documentElement.clientHeight) / 10, 200));
21262
+ window.scrollBy(0, y * amount);
21263
+ return;
21264
+ }
21265
+ function findScrollableElements() {
21266
+ const allElements = Array.from(document.querySelectorAll("*"));
21267
+ return allElements.filter((el) => {
21268
+ const style = window.getComputedStyle(el);
21269
+ const overflowY = style.getPropertyValue("overflow-y");
21270
+ return ((overflowY === "auto" || overflowY === "scroll") &&
21271
+ el.scrollHeight > el.clientHeight);
21272
+ });
21273
+ }
21274
+ function getVisibleArea(element) {
21275
+ const rect = element.getBoundingClientRect();
21276
+ const viewportHeight = window.innerHeight || documentElement.clientHeight;
21277
+ const viewportWidth = window.innerWidth || documentElement.clientWidth;
21278
+ const visibleLeft = Math.max(0, Math.min(rect.left, viewportWidth));
21279
+ const visibleRight = Math.max(0, Math.min(rect.right, viewportWidth));
21280
+ const visibleTop = Math.max(0, Math.min(rect.top, viewportHeight));
21281
+ const visibleBottom = Math.max(0, Math.min(rect.bottom, viewportHeight));
21282
+ const visibleWidth = visibleRight - visibleLeft;
21283
+ const visibleHeight = visibleBottom - visibleTop;
21284
+ return visibleWidth * visibleHeight;
21285
+ }
21286
+ const scrollableElements = findScrollableElements();
21287
+ if (scrollableElements.length === 0) {
21288
+ const y = Math.max(20, Math.min((window.innerHeight || documentElement.clientHeight) / 10, 200));
21289
+ window.scrollBy(0, y * amount);
21290
+ return false;
21291
+ }
21292
+ const sortedElements = scrollableElements.sort((a, b) => {
21293
+ return getVisibleArea(b) - getVisibleArea(a);
21294
+ });
21295
+ const largestElement = sortedElements[0];
21296
+ const viewportHeight = largestElement.clientHeight;
21297
+ const y = Math.max(20, Math.min(viewportHeight / 10, 200));
21298
+ largestElement.scrollBy(0, y * amount);
21299
+ return true;
21300
+ }
21156
21301
 
21157
21302
  class BaseBrowserScreenAgent extends BaseBrowserAgent {
21158
21303
  constructor(llms, ext_tools, mcpClient) {