@eko-ai/eko 2.1.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +8 -11
  2. package/dist/agent/base.d.ts +5 -2
  3. package/dist/agent/base.d.ts.map +1 -1
  4. package/dist/agent/browser/browser_base.d.ts +1 -1
  5. package/dist/agent/browser/browser_base.d.ts.map +1 -1
  6. package/dist/agent/browser/browser_labels.d.ts +4 -2
  7. package/dist/agent/browser/browser_labels.d.ts.map +1 -1
  8. package/dist/agent/browser/browser_screen.d.ts +1 -1
  9. package/dist/agent/browser/browser_screen.d.ts.map +1 -1
  10. package/dist/agent/browser/build_dom_tree.d.ts.map +1 -1
  11. package/dist/agent/file.d.ts +5 -0
  12. package/dist/agent/file.d.ts.map +1 -1
  13. package/dist/agent/timer.d.ts +5 -0
  14. package/dist/agent/timer.d.ts.map +1 -1
  15. package/dist/common/utils.d.ts.map +1 -1
  16. package/dist/common/xml.d.ts.map +1 -1
  17. package/dist/config/index.d.ts +2 -1
  18. package/dist/config/index.d.ts.map +1 -1
  19. package/dist/core/context.d.ts +2 -1
  20. package/dist/core/context.d.ts.map +1 -1
  21. package/dist/core/index.d.ts +1 -0
  22. package/dist/core/index.d.ts.map +1 -1
  23. package/dist/core/plan.d.ts.map +1 -1
  24. package/dist/index.cjs.js +819 -345
  25. package/dist/index.cjs.js.map +1 -1
  26. package/dist/index.esm.js +819 -345
  27. package/dist/index.esm.js.map +1 -1
  28. package/dist/llm/index.d.ts.map +1 -1
  29. package/dist/memory/index.d.ts.map +1 -1
  30. package/dist/prompt/plan.d.ts +1 -1
  31. package/dist/prompt/plan.d.ts.map +1 -1
  32. package/dist/tools/foreach_task.d.ts.map +1 -1
  33. package/dist/tools/human_interact.d.ts +1 -0
  34. package/dist/tools/human_interact.d.ts.map +1 -1
  35. package/dist/tools/task_node_status.d.ts.map +1 -1
  36. package/dist/tools/watch_trigger.d.ts +1 -0
  37. package/dist/tools/watch_trigger.d.ts.map +1 -1
  38. package/dist/types/core.types.d.ts +1 -1
  39. package/dist/types/core.types.d.ts.map +1 -1
  40. package/dist/types/llm.types.d.ts +1 -0
  41. package/dist/types/llm.types.d.ts.map +1 -1
  42. package/package.json +1 -1
package/dist/index.cjs.js CHANGED
@@ -7,11 +7,12 @@ var buffer = require('buffer');
7
7
  const config = {
8
8
  name: "Eko",
9
9
  platform: "mac",
10
- maxReactNum: 200,
10
+ maxReactNum: 500,
11
11
  maxTokens: 16000,
12
12
  compressThreshold: 80,
13
13
  largeTextLength: 5000,
14
- shortTextLength: 800,
14
+ fileTextMaxLength: 20000,
15
+ maxDialogueImgFileNum: 2,
15
16
  };
16
17
 
17
18
  var LogLevel;
@@ -67,7 +68,7 @@ class Logger {
67
68
  };
68
69
  let formattedMessage = '';
69
70
  if (this.dateFormat) {
70
- formattedMessage += `[${new Date().toISOString()}] `;
71
+ formattedMessage += `[${new Date().toLocaleString()}] `;
71
72
  }
72
73
  formattedMessage += `[${levelNames[level] || 'UNKNOWN'}] `;
73
74
  if (this.prefix) {
@@ -139,8 +140,240 @@ class Logger {
139
140
  }
140
141
  const Log = new Logger();
141
142
 
143
+ function sleep(time) {
144
+ return new Promise((resolve) => setTimeout(() => resolve(), time));
145
+ }
146
+ function uuidv4() {
147
+ return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) {
148
+ const r = (Math.random() * 16) | 0;
149
+ const v = c === "x" ? r : (r & 0x3) | 0x8;
150
+ return v.toString(16);
151
+ });
152
+ }
153
+ function call_timeout(fun, timeout, error_callback) {
154
+ return new Promise(async (resolve, reject) => {
155
+ let timer = setTimeout(() => {
156
+ reject(new Error("Timeout"));
157
+ error_callback && error_callback("Timeout");
158
+ }, timeout);
159
+ try {
160
+ const result = await fun();
161
+ clearTimeout(timer);
162
+ resolve(result);
163
+ }
164
+ catch (e) {
165
+ clearTimeout(timer);
166
+ reject(e);
167
+ error_callback && error_callback(e + "");
168
+ }
169
+ });
170
+ }
171
+ function convertToolSchema(tool) {
172
+ if ("function" in tool) {
173
+ return {
174
+ type: "function",
175
+ name: tool.function.name,
176
+ description: tool.function.description,
177
+ parameters: tool.function.parameters,
178
+ };
179
+ }
180
+ else if ("input_schema" in tool) {
181
+ return {
182
+ type: "function",
183
+ name: tool.name,
184
+ description: tool.description,
185
+ parameters: tool.input_schema,
186
+ };
187
+ }
188
+ else if ("inputSchema" in tool) {
189
+ return {
190
+ type: "function",
191
+ name: tool.name,
192
+ description: tool.description,
193
+ parameters: tool.inputSchema,
194
+ };
195
+ }
196
+ else {
197
+ return {
198
+ type: "function",
199
+ name: tool.name,
200
+ description: tool.description,
201
+ parameters: tool.parameters,
202
+ };
203
+ }
204
+ }
205
+ function toImage(imageData) {
206
+ let image = null;
207
+ if (imageData.startsWith("http://") || imageData.startsWith("https://")) {
208
+ image = new URL(imageData);
209
+ }
210
+ else {
211
+ if (imageData.startsWith("data:image/")) {
212
+ imageData = imageData.substring(imageData.indexOf(",") + 1);
213
+ }
214
+ // @ts-ignore
215
+ if (typeof Buffer != "undefined") {
216
+ // @ts-ignore
217
+ const buffer = Buffer.from(imageData, "base64");
218
+ image = new Uint8Array(buffer);
219
+ }
220
+ else {
221
+ const binaryString = atob(imageData);
222
+ image = new Uint8Array(binaryString.length);
223
+ for (let i = 0; i < binaryString.length; i++) {
224
+ image[i] = binaryString.charCodeAt(i);
225
+ }
226
+ }
227
+ }
228
+ return image;
229
+ }
230
+ function mergeTools(tools1, tools2) {
231
+ let tools = [];
232
+ let toolMap2 = tools2.reduce((map, tool) => {
233
+ map[tool.name] = tool;
234
+ return map;
235
+ }, {});
236
+ let names = [];
237
+ for (let i = 0; i < tools1.length; i++) {
238
+ let tool1 = tools1[i];
239
+ let tool2 = toolMap2[tool1.name];
240
+ if (tool2) {
241
+ tools.push(tool2);
242
+ delete toolMap2[tool1.name];
243
+ }
244
+ else {
245
+ tools.push(tool1);
246
+ }
247
+ }
248
+ for (let i = 0; i < tools2.length; i++) {
249
+ let tool2 = tools2[i];
250
+ if (toolMap2[tool2.name] && names.indexOf(tool2.name) === -1) {
251
+ tools.push(tool2);
252
+ names.push(tool2.name);
253
+ }
254
+ }
255
+ return tools;
256
+ }
257
+ function mergeAgents(agents1, agents2) {
258
+ let tools = [];
259
+ let toolMap2 = agents2.reduce((map, tool) => {
260
+ map[tool.Name] = tool;
261
+ return map;
262
+ }, {});
263
+ for (let i = 0; i < agents1.length; i++) {
264
+ let tool1 = agents1[i];
265
+ let tool2 = toolMap2[tool1.Name];
266
+ if (tool2) {
267
+ tools.push(tool2);
268
+ delete toolMap2[tool1.Name];
269
+ }
270
+ else {
271
+ tools.push(tool1);
272
+ }
273
+ }
274
+ for (let i = 0; i < agents2.length; i++) {
275
+ let tool2 = agents2[i];
276
+ if (toolMap2[tool2.Name]) {
277
+ tools.push(tool2);
278
+ }
279
+ }
280
+ return tools;
281
+ }
282
+ function sub(str, maxLength, appendPoint = true) {
283
+ if (!str) {
284
+ return "";
285
+ }
286
+ if (str.length > maxLength) {
287
+ return str.substring(0, maxLength) + (appendPoint ? "..." : "");
288
+ }
289
+ return str;
290
+ }
291
+ function fixXmlTag(code) {
292
+ function fixDoubleChar(code) {
293
+ const stack = [];
294
+ for (let i = 0; i < code.length; i++) {
295
+ let s = code[i];
296
+ if (s === "<") {
297
+ stack.push(">");
298
+ }
299
+ else if (s === ">") {
300
+ stack.pop();
301
+ }
302
+ else if (s === '"') {
303
+ if (stack[stack.length - 1] === '"') {
304
+ stack.pop();
305
+ }
306
+ else {
307
+ stack.push('"');
308
+ }
309
+ }
310
+ }
311
+ const missingParts = [];
312
+ while (stack.length > 0) {
313
+ missingParts.push(stack.pop());
314
+ }
315
+ return code + missingParts.join("");
316
+ }
317
+ let eIdx = code.lastIndexOf(" ");
318
+ let endStr = eIdx > -1 ? code.substring(eIdx + 1) : "";
319
+ if (code.endsWith("=")) {
320
+ code += '""';
321
+ }
322
+ else if (endStr == "name" ||
323
+ endStr == "input" ||
324
+ endStr == "output" ||
325
+ endStr == "items" ||
326
+ endStr == "event" ||
327
+ endStr == "loop") {
328
+ let idx1 = code.lastIndexOf(">");
329
+ let idx2 = code.lastIndexOf("<");
330
+ if (idx1 < idx2 && code.lastIndexOf(" ") > idx2) {
331
+ code += '=""';
332
+ }
333
+ }
334
+ code = fixDoubleChar(code);
335
+ const stack = [];
336
+ function isSelfClosing(tag) {
337
+ return tag.endsWith("/>");
338
+ }
339
+ for (let i = 0; i < code.length; i++) {
340
+ let s = code[i];
341
+ if (s === "<") {
342
+ const isEndTag = code[i + 1] === "/";
343
+ let endIndex = code.indexOf(">", i);
344
+ let tagContent = code.slice(i, endIndex + 1);
345
+ if (isSelfClosing(tagContent)) ;
346
+ else if (isEndTag) {
347
+ stack.pop();
348
+ }
349
+ else {
350
+ stack.push(tagContent);
351
+ }
352
+ if (endIndex == -1) {
353
+ break;
354
+ }
355
+ i = endIndex;
356
+ }
357
+ }
358
+ const missingParts = [];
359
+ while (stack.length > 0) {
360
+ const top = stack.pop();
361
+ if (top.startsWith("<")) {
362
+ let arr = top.match(/<(\w+)/);
363
+ const tagName = arr[1];
364
+ missingParts.push(`</${tagName}>`);
365
+ }
366
+ else {
367
+ missingParts.push(top);
368
+ }
369
+ }
370
+ let completedCode = code + missingParts.join("");
371
+ return completedCode;
372
+ }
373
+
142
374
  class Context {
143
375
  constructor(taskId, config, agents, chain) {
376
+ this.paused = false;
144
377
  this.taskId = taskId;
145
378
  this.config = config;
146
379
  this.agents = agents;
@@ -148,13 +381,21 @@ class Context {
148
381
  this.variables = new Map();
149
382
  this.controller = new AbortController();
150
383
  }
151
- checkAborted() {
384
+ async checkAborted() {
152
385
  // this.controller.signal.throwIfAborted();
153
386
  if (this.controller.signal.aborted) {
154
387
  const error = new Error("Operation was interrupted");
155
388
  error.name = "AbortError";
156
389
  throw error;
157
390
  }
391
+ while (this.paused) {
392
+ await sleep(500);
393
+ if (this.controller.signal.aborted) {
394
+ const error = new Error("Operation was interrupted");
395
+ error.name = "AbortError";
396
+ throw error;
397
+ }
398
+ }
158
399
  }
159
400
  }
160
401
  class AgentContext {
@@ -8443,235 +8684,6 @@ createOpenAI({
8443
8684
  // strict for OpenAI API
8444
8685
  });
8445
8686
 
8446
- function sleep(time) {
8447
- return new Promise((resolve) => setTimeout(() => resolve(), time));
8448
- }
8449
- function uuidv4() {
8450
- return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) {
8451
- const r = (Math.random() * 16) | 0;
8452
- const v = c === "x" ? r : (r & 0x3) | 0x8;
8453
- return v.toString(16);
8454
- });
8455
- }
8456
- function call_timeout(fun, timeout, error_callback) {
8457
- return new Promise(async (resolve, reject) => {
8458
- let timer = setTimeout(() => {
8459
- reject(new Error("Timeout"));
8460
- error_callback && error_callback("Timeout");
8461
- }, timeout);
8462
- try {
8463
- const result = await fun();
8464
- clearTimeout(timer);
8465
- resolve(result);
8466
- }
8467
- catch (e) {
8468
- clearTimeout(timer);
8469
- reject(e);
8470
- error_callback && error_callback(e + "");
8471
- }
8472
- });
8473
- }
8474
- function convertToolSchema(tool) {
8475
- if ("function" in tool) {
8476
- return {
8477
- type: "function",
8478
- name: tool.function.name,
8479
- description: tool.function.description,
8480
- parameters: tool.function.parameters,
8481
- };
8482
- }
8483
- else if ("input_schema" in tool) {
8484
- return {
8485
- type: "function",
8486
- name: tool.name,
8487
- description: tool.description,
8488
- parameters: tool.input_schema,
8489
- };
8490
- }
8491
- else if ("inputSchema" in tool) {
8492
- return {
8493
- type: "function",
8494
- name: tool.name,
8495
- description: tool.description,
8496
- parameters: tool.inputSchema,
8497
- };
8498
- }
8499
- else {
8500
- return {
8501
- type: "function",
8502
- name: tool.name,
8503
- description: tool.description,
8504
- parameters: tool.parameters,
8505
- };
8506
- }
8507
- }
8508
- function toImage(imageData) {
8509
- let image = null;
8510
- if (imageData.startsWith("http://") || imageData.startsWith("https://")) {
8511
- image = new URL(imageData);
8512
- }
8513
- else {
8514
- if (imageData.startsWith("data:image/")) {
8515
- imageData = imageData.substring(imageData.indexOf(",") + 1);
8516
- }
8517
- // @ts-ignore
8518
- if (typeof Buffer != "undefined") {
8519
- // @ts-ignore
8520
- const buffer = Buffer.from(imageData, "base64");
8521
- image = new Uint8Array(buffer);
8522
- }
8523
- else {
8524
- const binaryString = atob(imageData);
8525
- image = new Uint8Array(binaryString.length);
8526
- for (let i = 0; i < binaryString.length; i++) {
8527
- image[i] = binaryString.charCodeAt(i);
8528
- }
8529
- }
8530
- }
8531
- return image;
8532
- }
8533
- function mergeTools(tools1, tools2) {
8534
- let tools = [];
8535
- let toolMap2 = tools2.reduce((map, tool) => {
8536
- map[tool.name] = tool;
8537
- return map;
8538
- }, {});
8539
- for (let i = 0; i < tools1.length; i++) {
8540
- let tool1 = tools1[i];
8541
- let tool2 = toolMap2[tool1.name];
8542
- if (tool2) {
8543
- tools.push(tool2);
8544
- delete toolMap2[tool1.name];
8545
- }
8546
- else {
8547
- tools.push(tool1);
8548
- }
8549
- }
8550
- for (let i = 0; i < tools2.length; i++) {
8551
- let tool2 = tools2[i];
8552
- if (toolMap2[tool2.name]) {
8553
- tools.push(tool2);
8554
- }
8555
- }
8556
- return tools;
8557
- }
8558
- function mergeAgents(agents1, agents2) {
8559
- let tools = [];
8560
- let toolMap2 = agents2.reduce((map, tool) => {
8561
- map[tool.Name] = tool;
8562
- return map;
8563
- }, {});
8564
- for (let i = 0; i < agents1.length; i++) {
8565
- let tool1 = agents1[i];
8566
- let tool2 = toolMap2[tool1.Name];
8567
- if (tool2) {
8568
- tools.push(tool2);
8569
- delete toolMap2[tool1.Name];
8570
- }
8571
- else {
8572
- tools.push(tool1);
8573
- }
8574
- }
8575
- for (let i = 0; i < agents2.length; i++) {
8576
- let tool2 = agents2[i];
8577
- if (toolMap2[tool2.Name]) {
8578
- tools.push(tool2);
8579
- }
8580
- }
8581
- return tools;
8582
- }
8583
- function sub(str, maxLength, appendPoint = true) {
8584
- if (!str) {
8585
- return "";
8586
- }
8587
- if (str.length > maxLength) {
8588
- return str.substring(0, maxLength) + (appendPoint ? "..." : "");
8589
- }
8590
- return str;
8591
- }
8592
- function fixXmlTag(code) {
8593
- function fixDoubleChar(code) {
8594
- const stack = [];
8595
- for (let i = 0; i < code.length; i++) {
8596
- let s = code[i];
8597
- if (s === "<") {
8598
- stack.push(">");
8599
- }
8600
- else if (s === ">") {
8601
- stack.pop();
8602
- }
8603
- else if (s === '"') {
8604
- if (stack[stack.length - 1] === '"') {
8605
- stack.pop();
8606
- }
8607
- else {
8608
- stack.push('"');
8609
- }
8610
- }
8611
- }
8612
- const missingParts = [];
8613
- while (stack.length > 0) {
8614
- missingParts.push(stack.pop());
8615
- }
8616
- return code + missingParts.join("");
8617
- }
8618
- let eIdx = code.lastIndexOf(" ");
8619
- let endStr = eIdx > -1 ? code.substring(eIdx + 1) : "";
8620
- if (code.endsWith("=")) {
8621
- code += '""';
8622
- }
8623
- else if (endStr == "name" ||
8624
- endStr == "input" ||
8625
- endStr == "output" ||
8626
- endStr == "items" ||
8627
- endStr == "event" ||
8628
- endStr == "loop") {
8629
- let idx1 = code.lastIndexOf(">");
8630
- let idx2 = code.lastIndexOf("<");
8631
- if (idx1 < idx2 && code.lastIndexOf(" ") > idx2) {
8632
- code += '=""';
8633
- }
8634
- }
8635
- code = fixDoubleChar(code);
8636
- const stack = [];
8637
- function isSelfClosing(tag) {
8638
- return tag.endsWith("/>");
8639
- }
8640
- for (let i = 0; i < code.length; i++) {
8641
- let s = code[i];
8642
- if (s === "<") {
8643
- const isEndTag = code[i + 1] === "/";
8644
- let endIndex = code.indexOf(">", i);
8645
- let tagContent = code.slice(i, endIndex + 1);
8646
- if (isSelfClosing(tagContent)) ;
8647
- else if (isEndTag) {
8648
- stack.pop();
8649
- }
8650
- else {
8651
- stack.push(tagContent);
8652
- }
8653
- if (endIndex == -1) {
8654
- break;
8655
- }
8656
- i = endIndex;
8657
- }
8658
- }
8659
- const missingParts = [];
8660
- while (stack.length > 0) {
8661
- const top = stack.pop();
8662
- if (top.startsWith("<")) {
8663
- let arr = top.match(/<(\w+)/);
8664
- const tagName = arr[1];
8665
- missingParts.push(`</${tagName}>`);
8666
- }
8667
- else {
8668
- missingParts.push(top);
8669
- }
8670
- }
8671
- let completedCode = code + missingParts.join("");
8672
- return completedCode;
8673
- }
8674
-
8675
8687
  // src/anthropic-provider.ts
8676
8688
  var anthropicErrorDataSchema = z.object({
8677
8689
  type: z.literal("error"),
@@ -14736,7 +14748,7 @@ class RetryLanguageModel {
14736
14748
  toolChoice: request.toolChoice,
14737
14749
  },
14738
14750
  prompt: request.messages,
14739
- maxTokens: request.maxTokens || config.maxTokens,
14751
+ maxTokens: request.maxTokens,
14740
14752
  temperature: request.temperature,
14741
14753
  topP: request.topP,
14742
14754
  topK: request.topK,
@@ -14745,6 +14757,7 @@ class RetryLanguageModel {
14745
14757
  });
14746
14758
  }
14747
14759
  async doGenerate(options) {
14760
+ const maxTokens = options.maxTokens;
14748
14761
  const names = [...this.names, ...this.names];
14749
14762
  for (let i = 0; i < names.length; i++) {
14750
14763
  const name = names[i];
@@ -14752,6 +14765,10 @@ class RetryLanguageModel {
14752
14765
  if (!llm) {
14753
14766
  continue;
14754
14767
  }
14768
+ if (!maxTokens) {
14769
+ options.maxTokens =
14770
+ this.llms[name].config?.maxTokens || config.maxTokens;
14771
+ }
14755
14772
  try {
14756
14773
  let result = await llm.doGenerate(options);
14757
14774
  if (Log.isEnableDebug()) {
@@ -14783,7 +14800,7 @@ class RetryLanguageModel {
14783
14800
  toolChoice: request.toolChoice,
14784
14801
  },
14785
14802
  prompt: request.messages,
14786
- maxTokens: request.maxTokens || config.maxTokens,
14803
+ maxTokens: request.maxTokens,
14787
14804
  temperature: request.temperature,
14788
14805
  topP: request.topP,
14789
14806
  topK: request.topK,
@@ -14792,6 +14809,7 @@ class RetryLanguageModel {
14792
14809
  });
14793
14810
  }
14794
14811
  async doStream(options) {
14812
+ const maxTokens = options.maxTokens;
14795
14813
  const names = [...this.names, ...this.names];
14796
14814
  for (let i = 0; i < names.length; i++) {
14797
14815
  const name = names[i];
@@ -14799,6 +14817,10 @@ class RetryLanguageModel {
14799
14817
  if (!llm) {
14800
14818
  continue;
14801
14819
  }
14820
+ if (!maxTokens) {
14821
+ options.maxTokens =
14822
+ this.llms[name].config?.maxTokens || config.maxTokens;
14823
+ }
14802
14824
  try {
14803
14825
  const controller = new AbortController();
14804
14826
  const signal = options.abortSignal
@@ -14855,10 +14877,7 @@ class RetryLanguageModel {
14855
14877
  return createOpenAI({
14856
14878
  apiKey: llm.apiKey,
14857
14879
  baseURL: llm.config?.baseURL,
14858
- }).languageModel(llm.model, {
14859
- // disable_parallel_tool_use
14860
- parallelToolCalls: llm.config?.parallelToolCalls || false,
14861
- });
14880
+ }).languageModel(llm.model);
14862
14881
  }
14863
14882
  else if (llm.provider == "anthropic") {
14864
14883
  return createAnthropic({
@@ -17503,10 +17522,17 @@ function extractAgentXmlNode(agentXml, nodeId) {
17503
17522
  let nodesNode = doc.getElementsByTagName("nodes");
17504
17523
  if (nodesNode.length > 0) {
17505
17524
  let nodes = nodesNode[0].childNodes;
17525
+ let _nodeId = 0;
17506
17526
  for (let i = 0; i < nodes.length; i++) {
17507
17527
  let node = nodes[i];
17508
- if (node.nodeType == 1 && node.getAttribute("id") == nodeId + "") {
17509
- return node;
17528
+ if (node.nodeType == 1) {
17529
+ if (node.getAttribute("id") == null || node.getAttribute("id") == "") {
17530
+ node.setAttribute("id", _nodeId + "");
17531
+ }
17532
+ _nodeId++;
17533
+ if (node.getAttribute("id") == nodeId + "") {
17534
+ return node;
17535
+ }
17510
17536
  }
17511
17537
  }
17512
17538
  }
@@ -17660,14 +17686,14 @@ async function compressAgentMessages(agentContext, rlm, messages, tools) {
17660
17686
  if (callback) {
17661
17687
  await callback.onMessage({
17662
17688
  taskId: agentContext.context.taskId,
17663
- agentName: toolCall.toolName,
17689
+ agentName: agentContext.agent.Name,
17664
17690
  nodeId: agentContext.agentChain.agent.id,
17665
17691
  type: "tool_result",
17666
17692
  toolId: toolCall.toolCallId,
17667
17693
  toolName: toolCall.toolName,
17668
17694
  params: args,
17669
17695
  toolResult: toolResult,
17670
- });
17696
+ }, agentContext);
17671
17697
  }
17672
17698
  // handle original messages
17673
17699
  let firstToolIndex = 3;
@@ -17686,6 +17712,7 @@ async function compressAgentMessages(agentContext, rlm, messages, tools) {
17686
17712
  function handleLargeContextMessages(messages) {
17687
17713
  let imageNum = 0;
17688
17714
  let fileNum = 0;
17715
+ let maxNum = config.maxDialogueImgFileNum;
17689
17716
  let longTextTools = {};
17690
17717
  for (let i = messages.length - 1; i >= 0; i--) {
17691
17718
  let message = messages[i];
@@ -17693,7 +17720,7 @@ function handleLargeContextMessages(messages) {
17693
17720
  for (let j = 0; j < message.content.length; j++) {
17694
17721
  let content = message.content[j];
17695
17722
  if (content.type == "image") {
17696
- if (++imageNum == 1) {
17723
+ if (++imageNum <= maxNum) {
17697
17724
  break;
17698
17725
  }
17699
17726
  content = {
@@ -17703,7 +17730,7 @@ function handleLargeContextMessages(messages) {
17703
17730
  message.content[j] = content;
17704
17731
  }
17705
17732
  else if (content.type == "file") {
17706
- if (++fileNum == 1) {
17733
+ if (++fileNum <= maxNum) {
17707
17734
  break;
17708
17735
  }
17709
17736
  content = {
@@ -17724,7 +17751,7 @@ function handleLargeContextMessages(messages) {
17724
17751
  for (let r = 0; r < toolContent.length; r++) {
17725
17752
  let _content = toolContent[r];
17726
17753
  if (_content.type == "image") {
17727
- if (++imageNum == 1) {
17754
+ if (++imageNum <= maxNum) {
17728
17755
  break;
17729
17756
  }
17730
17757
  _content = {
@@ -17746,7 +17773,7 @@ function handleLargeContextMessages(messages) {
17746
17773
  }
17747
17774
  _content = {
17748
17775
  type: "text",
17749
- text: _content.text.substring(0, config.shortTextLength) + "...",
17776
+ text: _content.text.substring(0, config.largeTextLength) + "...",
17750
17777
  };
17751
17778
  toolContent[r] = _content;
17752
17779
  }
@@ -17839,7 +17866,7 @@ const TOOL_NAME$4 = "foreach_task";
17839
17866
  class ForeachTaskTool {
17840
17867
  constructor() {
17841
17868
  this.name = TOOL_NAME$4;
17842
- this.description = `When executing the \`forEach\` node, please use it to complete the tasks corresponding to that forEach node, which will complete all tasks under the entire forEach node.`;
17869
+ this.description = `When executing the \`forEach\` node, please use the current tool for counting to ensure tasks are executed sequentially, the tool needs to be called with each loop iteration.`;
17843
17870
  this.parameters = {
17844
17871
  type: "object",
17845
17872
  properties: {
@@ -17847,12 +17874,19 @@ class ForeachTaskTool {
17847
17874
  type: "number",
17848
17875
  description: "forEach node ID.",
17849
17876
  },
17877
+ progress: {
17878
+ type: "string",
17879
+ description: "Current execution progress.",
17880
+ },
17881
+ next_step: {
17882
+ type: "string",
17883
+ description: "Next task description.",
17884
+ },
17850
17885
  },
17851
- required: ["nodeId"],
17886
+ required: ["nodeId", "progress", "next_step"],
17852
17887
  };
17853
17888
  }
17854
17889
  async execute(args, agentContext) {
17855
- // 调用 forEach Agent 单独逻辑, 根据上下文判断并循环执行
17856
17890
  let nodeId = args.nodeId;
17857
17891
  let agentXml = agentContext.agentChain.agent.xml;
17858
17892
  let node = extractAgentXmlNode(agentXml, nodeId);
@@ -17863,10 +17897,27 @@ class ForeachTaskTool {
17863
17897
  throw new Error("Node ID is not a forEach node: " + nodeId);
17864
17898
  }
17865
17899
  let items = node.getAttribute("items");
17900
+ let varValue = null;
17901
+ let resultText = "Recorded";
17866
17902
  if (items && items != "list") {
17867
- agentContext.context.variables.get(items.trim());
17903
+ varValue = agentContext.context.variables.get(items.trim());
17904
+ if (varValue) {
17905
+ let key = "foreach_" + nodeId;
17906
+ let loop_count = agentContext.variables.get(key) || 0;
17907
+ if (loop_count % 5 == 0) {
17908
+ resultText = `Variable information associated with the current loop task.\nvariable_name: ${items.trim()}\nvariable_value: ${varValue}`;
17909
+ }
17910
+ agentContext.variables.set(key, ++loop_count);
17911
+ }
17868
17912
  }
17869
- return null;
17913
+ return {
17914
+ content: [
17915
+ {
17916
+ type: "text",
17917
+ text: resultText,
17918
+ },
17919
+ ],
17920
+ };
17870
17921
  }
17871
17922
  }
17872
17923
 
@@ -17938,6 +17989,11 @@ request_help: Request assistance from the user; for instance, when an operation
17938
17989
  break;
17939
17990
  case "request_help":
17940
17991
  if (callback.onHumanHelp) {
17992
+ if (args.helpType == "request_login" &&
17993
+ (await this.checkIsLogined(agentContext))) {
17994
+ resultText = "Already logged in";
17995
+ break;
17996
+ }
17941
17997
  let result = await callback.onHumanHelp(agentContext, (args.helpType || "request_assistance"), args.prompt);
17942
17998
  resultText = `request_help result: ${result ? "Solved" : "Unresolved"}`;
17943
17999
  }
@@ -17966,16 +18022,56 @@ request_help: Request assistance from the user; for instance, when an operation
17966
18022
  };
17967
18023
  }
17968
18024
  }
18025
+ async checkIsLogined(agentContext) {
18026
+ let screenshot = agentContext.agent["screenshot"];
18027
+ if (!screenshot) {
18028
+ return false;
18029
+ }
18030
+ try {
18031
+ let imageResult = (await screenshot.call(agentContext.agent, agentContext));
18032
+ let rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms);
18033
+ let image = toImage(imageResult.imageBase64);
18034
+ let request = {
18035
+ messages: [
18036
+ {
18037
+ role: "user",
18038
+ content: [
18039
+ {
18040
+ type: "image",
18041
+ image: image,
18042
+ mimeType: imageResult.imageType,
18043
+ },
18044
+ {
18045
+ type: "text",
18046
+ text: "Check if the current website is logged in. If not logged in, output `NOT_LOGIN`. If logged in, output `LOGGED_IN`. Output directly without explanation.",
18047
+ },
18048
+ ],
18049
+ },
18050
+ ],
18051
+ abortSignal: agentContext.context.controller.signal,
18052
+ };
18053
+ let result = await rlm.call(request);
18054
+ return result.text && result.text.indexOf("LOGGED_IN") > -1;
18055
+ }
18056
+ catch (error) {
18057
+ console.error("Error auto checking login status:", error);
18058
+ return false;
18059
+ }
18060
+ }
17969
18061
  }
17970
18062
 
17971
18063
  const TOOL_NAME$2 = "task_node_status";
17972
18064
  class TaskNodeStatusTool {
17973
18065
  constructor() {
17974
18066
  this.name = TOOL_NAME$2;
17975
- this.description = `After completing each step of the task, you need to call this tool to update the status of the task node.`;
18067
+ this.description = `After completing each step of the task, you need to call this tool to update the status of the task node, and think about the tasks to be processed and the next action plan.`;
17976
18068
  this.parameters = {
17977
18069
  type: "object",
17978
18070
  properties: {
18071
+ thought: {
18072
+ type: "string",
18073
+ description: "Current thinking content, which can be analysis of the problem, assumptions, insights, reflections, or a summary of the previous, suggest the next action step to be taken, which should be specific, executable, and verifiable."
18074
+ },
17979
18075
  doneIds: {
17980
18076
  type: "array",
17981
18077
  description: "List of completed node IDs.",
@@ -17991,7 +18087,7 @@ class TaskNodeStatusTool {
17991
18087
  },
17992
18088
  },
17993
18089
  },
17994
- required: ["doneIds", "todoIds"],
18090
+ required: ["thought", "doneIds", "todoIds"],
17995
18091
  };
17996
18092
  }
17997
18093
  async execute(args, agentContext) {
@@ -18099,21 +18195,156 @@ const TOOL_NAME = "watch_trigger";
18099
18195
  class WatchTriggerTool {
18100
18196
  constructor() {
18101
18197
  this.name = TOOL_NAME;
18102
- this.description = `When executing the \`watch\` node, please use it to complete the tasks corresponding to that watch node. It will complete all tasks under the entire watch node.`;
18198
+ this.description = `When executing the \`watch\` node, please use it to monitor DOM element changes, it will block the listener until the element changes or times out.`;
18103
18199
  this.parameters = {
18104
18200
  type: "object",
18105
18201
  properties: {
18106
18202
  nodeId: {
18107
18203
  type: "number",
18108
- description: "forEach node ID."
18204
+ description: "watch node ID.",
18205
+ },
18206
+ watch_area: {
18207
+ type: "array",
18208
+ description: "Element changes in monitoring area, eg: [x, y, width, height].",
18209
+ items: {
18210
+ type: "number",
18211
+ },
18212
+ },
18213
+ watch_index: {
18214
+ type: "array",
18215
+ description: "The index of elements to be monitoring multiple elements simultaneously.",
18216
+ items: {
18217
+ type: "number",
18218
+ },
18219
+ },
18220
+ frequency: {
18221
+ type: "number",
18222
+ description: "Check frequency, how many seconds between each check, default 1 seconds.",
18223
+ default: 1,
18224
+ minimum: 0.5,
18225
+ maximum: 30,
18226
+ },
18227
+ timeout: {
18228
+ type: "number",
18229
+ description: "Timeout in minute, default 5 minutes.",
18230
+ default: 5,
18231
+ minimum: 1,
18232
+ maximum: 30,
18109
18233
  },
18110
18234
  },
18111
18235
  required: ["nodeId"],
18112
18236
  };
18113
18237
  }
18114
18238
  async execute(args, agentContext) {
18115
- // TODO Listen for changes to the DOM or file, and execute nodes
18116
- return null;
18239
+ let nodeId = args.nodeId;
18240
+ let agentXml = agentContext.agentChain.agent.xml;
18241
+ let node = extractAgentXmlNode(agentXml, nodeId);
18242
+ if (node == null) {
18243
+ throw new Error("Node ID does not exist: " + nodeId);
18244
+ }
18245
+ if (node.tagName !== "watch") {
18246
+ throw new Error("Node ID is not a watch node: " + nodeId);
18247
+ }
18248
+ let task_description = node.getElementsByTagName("description")[0]?.textContent || "";
18249
+ if (!task_description) {
18250
+ return {
18251
+ content: [
18252
+ {
18253
+ type: "text",
18254
+ text: "The watch node does not have a description, skip.",
18255
+ },
18256
+ ],
18257
+ };
18258
+ }
18259
+ const screenshot = agentContext.agent["screenshot"];
18260
+ const image1Result = (await screenshot.call(agentContext.agent, agentContext));
18261
+ const image1 = toImage(image1Result.imageBase64);
18262
+ const start = new Date().getTime();
18263
+ const timeout = (args.timeout || 5) * 60000;
18264
+ const frequency = Math.max(500, (args.frequency = args.frequency || 1) * 1000);
18265
+ let rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms);
18266
+ while (new Date().getTime() - start < timeout) {
18267
+ await agentContext.context.checkAborted();
18268
+ await new Promise((resolve) => setTimeout(resolve, frequency));
18269
+ const image2Result = (await screenshot.call(agentContext.agent, agentContext));
18270
+ const image2 = toImage(image2Result.imageBase64);
18271
+ const changeResult = await this.is_dom_change(agentContext, rlm, image1, image1Result.imageType, image2, image2Result.imageType, task_description);
18272
+ if (changeResult.changed) {
18273
+ return {
18274
+ content: [
18275
+ {
18276
+ type: "text",
18277
+ text: changeResult.changeInfo || "DOM change detected.",
18278
+ },
18279
+ ],
18280
+ };
18281
+ }
18282
+ }
18283
+ return {
18284
+ content: [
18285
+ {
18286
+ type: "text",
18287
+ text: "Timeout reached, no DOM changes detected.",
18288
+ },
18289
+ ],
18290
+ };
18291
+ }
18292
+ async is_dom_change(agentContext, rlm, image1, image1Type, image2, image2Type, task_description) {
18293
+ try {
18294
+ let request = {
18295
+ messages: [
18296
+ {
18297
+ role: "system",
18298
+ content: `You are a tool for detecting element changes. Given a task description, compare two images to determine whether the changes described in the task have occurred.
18299
+ If the changes have occurred, return an json with \`changed\` set to true and \`changeInfo\` containing a description of the changes. If no changes have occurred, return an object with \`changed\` set to false.
18300
+
18301
+ ## Example
18302
+ User: Monitor new messages in group chat
18303
+ ### No changes detected
18304
+ Output:
18305
+ {
18306
+ "changed": false
18307
+ }
18308
+ ### Change detected
18309
+ Output:
18310
+ {
18311
+ "changed": true,
18312
+ "changeInfo": "New message received in the group chat. The message content is: 'Hello, how are you?'"
18313
+ }`,
18314
+ },
18315
+ {
18316
+ role: "user",
18317
+ content: [
18318
+ {
18319
+ type: "image",
18320
+ image: image1,
18321
+ mimeType: image1Type,
18322
+ },
18323
+ {
18324
+ type: "image",
18325
+ image: image2,
18326
+ mimeType: image2Type,
18327
+ },
18328
+ {
18329
+ type: "text",
18330
+ text: task_description,
18331
+ },
18332
+ ],
18333
+ },
18334
+ ],
18335
+ abortSignal: agentContext.context.controller.signal,
18336
+ };
18337
+ const result = await rlm.call(request);
18338
+ let resultText = result.text || "{}";
18339
+ resultText = resultText.substring(resultText.indexOf("{"), resultText.lastIndexOf("}") + 1);
18340
+ return JSON.parse(resultText);
18341
+ }
18342
+ catch (error) {
18343
+ Log.error("Error in is_dom_change:", error);
18344
+ }
18345
+ return {
18346
+ changed: false,
18347
+ };
18117
18348
  }
18118
18349
  }
18119
18350
 
@@ -18131,7 +18362,7 @@ class McpTool {
18131
18362
 
18132
18363
  const AGENT_SYSTEM_TEMPLATE = `
18133
18364
  You are {name}, an autonomous AI agent for {agent} agent.
18134
- UTC datetime: {datetime}
18365
+ Current datetime: {datetime}
18135
18366
 
18136
18367
  # Task Description
18137
18368
  {description}
@@ -18175,7 +18406,7 @@ repetitive tasks, when executing to the forEach node, require the use of the \`$
18175
18406
  `;
18176
18407
  const WATCH_NODE = `
18177
18408
  <!-- monitor task node, the loop attribute specifies whether to listen in a loop or listen once -->
18178
- <watch event="dom or file" loop="true">
18409
+ <watch event="dom" loop="true">
18179
18410
  <description>Monitor task description</description>
18180
18411
  <trigger>
18181
18412
  <node>Trigger step node</node>
@@ -18184,7 +18415,7 @@ const WATCH_NODE = `
18184
18415
  </watch>`;
18185
18416
  const WATCH_PROMPT = `
18186
18417
  * watch node
18187
- monitor changes in webpage DOM or file content, when executing to the watch node, require the use of the \`${TOOL_NAME}\` tool.
18418
+ monitor changes in webpage DOM elements, when executing to the watch node, require the use of the \`${TOOL_NAME}\` tool.
18188
18419
  `;
18189
18420
  function getAgentSystemPrompt(agent, agentNode, context, tools, extSysPrompt) {
18190
18421
  let prompt = "";
@@ -18220,7 +18451,7 @@ function getAgentSystemPrompt(agent, agentNode, context, tools, extSysPrompt) {
18220
18451
  }
18221
18452
  if (context.chain.agents.length > 1) {
18222
18453
  prompt += "\n Main task: " + context.chain.taskPrompt;
18223
- prompt += "\n# Pre-task execution results";
18454
+ prompt += "\n\n# Pre-task execution results";
18224
18455
  for (let i = 0; i < context.chain.agents.length; i++) {
18225
18456
  let agentChain = context.chain.agents[i];
18226
18457
  if (agentChain.agentResult) {
@@ -18234,7 +18465,7 @@ function getAgentSystemPrompt(agent, agentNode, context, tools, extSysPrompt) {
18234
18465
  return AGENT_SYSTEM_TEMPLATE.replace("{name}", config.name)
18235
18466
  .replace("{agent}", agent.Name)
18236
18467
  .replace("{description}", agent.Description)
18237
- .replace("{datetime}", new Date().toISOString())
18468
+ .replace("{datetime}", new Date().toLocaleString())
18238
18469
  .replace("{prompt}", prompt)
18239
18470
  .replace("{nodePrompt}", nodePrompt)
18240
18471
  .trim();
@@ -18288,7 +18519,7 @@ class Agent {
18288
18519
  let rlm = new RetryLanguageModel(context.config.llms, this.llms);
18289
18520
  let agentTools = tools;
18290
18521
  while (loopNum < maxReactNum) {
18291
- context.checkAborted();
18522
+ await context.checkAborted();
18292
18523
  if (mcpClient) {
18293
18524
  let controlMcp = await this.controlMcpTools(agentContext, messages, loopNum);
18294
18525
  if (controlMcp.mcpTools) {
@@ -18299,7 +18530,7 @@ class Agent {
18299
18530
  }
18300
18531
  }
18301
18532
  await this.handleMessages(agentContext, messages, tools);
18302
- let results = await callLLM(agentContext, rlm, messages, this.convertTools(agentTools));
18533
+ let results = await callLLM(agentContext, rlm, messages, this.convertTools(agentTools), false, undefined, false, this.callback);
18303
18534
  let finalResult = await this.handleCallResult(agentContext, messages, agentTools, results);
18304
18535
  if (finalResult) {
18305
18536
  return finalResult;
@@ -18355,17 +18586,18 @@ class Agent {
18355
18586
  throw e;
18356
18587
  }
18357
18588
  }
18358
- if (context.config.callback) {
18359
- await context.config.callback.onMessage({
18589
+ const callback = this.callback || context.config.callback;
18590
+ if (callback) {
18591
+ await callback.onMessage({
18360
18592
  taskId: context.taskId,
18361
- agentName: result.toolName,
18593
+ agentName: agentContext.agent.Name,
18362
18594
  nodeId: agentContext.agentChain.agent.id,
18363
18595
  type: "tool_result",
18364
18596
  toolId: result.toolCallId,
18365
18597
  toolName: result.toolName,
18366
18598
  params: result.args || {},
18367
18599
  toolResult: toolResult,
18368
- });
18600
+ }, agentContext);
18369
18601
  }
18370
18602
  let llmToolResult = this.convertToolResult(result, toolResult, user_messages);
18371
18603
  toolResults.push(llmToolResult);
@@ -18394,6 +18626,14 @@ class Agent {
18394
18626
  if (hasVariable) {
18395
18627
  tools.push(new VariableStorageTool());
18396
18628
  }
18629
+ let hasForeach = agentNodeXml.indexOf("</forEach>") > -1;
18630
+ if (hasForeach) {
18631
+ tools.push(new ForeachTaskTool());
18632
+ }
18633
+ let hasWatch = agentNodeXml.indexOf("</watch>") > -1;
18634
+ if (hasWatch) {
18635
+ tools.push(new WatchTriggerTool());
18636
+ }
18397
18637
  let toolNames = this.tools.map((tool) => tool.name);
18398
18638
  return tools.filter((tool) => toolNames.indexOf(tool.name) == -1);
18399
18639
  }
@@ -18557,6 +18797,12 @@ class Agent {
18557
18797
  }
18558
18798
  return this.tools;
18559
18799
  }
18800
+ addTool(tool) {
18801
+ this.tools.push(tool);
18802
+ }
18803
+ get Llms() {
18804
+ return this.llms;
18805
+ }
18560
18806
  get Name() {
18561
18807
  return this.name;
18562
18808
  }
@@ -18573,14 +18819,15 @@ class Agent {
18573
18819
  return this.mcpClient;
18574
18820
  }
18575
18821
  }
18576
- async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoice, retry) {
18822
+ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoice, retry, callback) {
18577
18823
  if (messages.length >= config.compressThreshold && !noCompress) {
18578
18824
  await compressAgentMessages(agentContext, rlm, messages, tools);
18579
18825
  }
18580
18826
  let context = agentContext.context;
18581
18827
  let agentChain = agentContext.agentChain;
18582
18828
  let agentNode = agentChain.agent;
18583
- let streamCallback = context.config.callback || {
18829
+ let streamCallback = callback ||
18830
+ context.config.callback || {
18584
18831
  onMessage: async () => { },
18585
18832
  };
18586
18833
  let request = {
@@ -18600,7 +18847,7 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18600
18847
  const reader = result.stream.getReader();
18601
18848
  try {
18602
18849
  while (true) {
18603
- context.checkAborted();
18850
+ await context.checkAborted();
18604
18851
  const { done, value } = await reader.read();
18605
18852
  if (done) {
18606
18853
  break;
@@ -18617,7 +18864,7 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18617
18864
  streamId,
18618
18865
  streamDone: false,
18619
18866
  text: streamText,
18620
- });
18867
+ }, agentContext);
18621
18868
  break;
18622
18869
  }
18623
18870
  case "reasoning": {
@@ -18630,7 +18877,7 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18630
18877
  streamId,
18631
18878
  streamDone: false,
18632
18879
  text: thinkText,
18633
- });
18880
+ }, agentContext);
18634
18881
  break;
18635
18882
  }
18636
18883
  case "tool-call-delta": {
@@ -18644,7 +18891,7 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18644
18891
  streamId,
18645
18892
  streamDone: true,
18646
18893
  text: streamText,
18647
- });
18894
+ }, agentContext);
18648
18895
  }
18649
18896
  toolArgsText += chunk.argsTextDelta || "";
18650
18897
  await streamCallback.onMessage({
@@ -18655,7 +18902,7 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18655
18902
  toolId: chunk.toolCallId,
18656
18903
  toolName: chunk.toolName,
18657
18904
  paramsText: toolArgsText,
18658
- });
18905
+ }, agentContext);
18659
18906
  break;
18660
18907
  }
18661
18908
  case "tool-call": {
@@ -18670,7 +18917,7 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18670
18917
  toolName: chunk.toolName,
18671
18918
  params: args,
18672
18919
  };
18673
- await streamCallback.onMessage(message);
18920
+ await streamCallback.onMessage(message, agentContext);
18674
18921
  toolParts.push({
18675
18922
  type: "tool-call",
18676
18923
  toolCallId: chunk.toolCallId,
@@ -18687,7 +18934,7 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18687
18934
  type: "file",
18688
18935
  mimeType: chunk.mimeType,
18689
18936
  data: chunk.data,
18690
- });
18937
+ }, agentContext);
18691
18938
  break;
18692
18939
  }
18693
18940
  case "error": {
@@ -18698,7 +18945,7 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18698
18945
  nodeId: agentNode.id,
18699
18946
  type: "error",
18700
18947
  error: chunk.error,
18701
- });
18948
+ }, agentContext);
18702
18949
  throw new Error("Plan Error");
18703
18950
  }
18704
18951
  case "finish": {
@@ -18712,7 +18959,7 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18712
18959
  streamId,
18713
18960
  streamDone: true,
18714
18961
  text: streamText,
18715
- });
18962
+ }, agentContext);
18716
18963
  }
18717
18964
  await streamCallback.onMessage({
18718
18965
  taskId: context.taskId,
@@ -18721,10 +18968,13 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18721
18968
  type: "finish",
18722
18969
  finishReason: chunk.finishReason,
18723
18970
  usage: chunk.usage,
18724
- });
18725
- if (chunk.finishReason === "length" && messages.length >= 10 && !noCompress && !retry) {
18971
+ }, agentContext);
18972
+ if (chunk.finishReason === "length" &&
18973
+ messages.length >= 10 &&
18974
+ !noCompress &&
18975
+ !retry) {
18726
18976
  await compressAgentMessages(agentContext, rlm, messages, tools);
18727
- return callLLM(agentContext, rlm, messages, tools, noCompress, toolChoice, true);
18977
+ return callLLM(agentContext, rlm, messages, tools, noCompress, toolChoice, true, streamCallback);
18728
18978
  }
18729
18979
  break;
18730
18980
  }
@@ -18743,11 +18993,11 @@ async function callLLM(agentContext, rlm, messages, tools, noCompress, toolChoic
18743
18993
  : toolParts;
18744
18994
  }
18745
18995
 
18746
- const AGENT_NAME$4 = "Chat";
18996
+ const AGENT_NAME$5 = "Chat";
18747
18997
  class BaseChatAgent extends Agent {
18748
18998
  constructor(llms, ext_tools, mcpClient) {
18749
18999
  super({
18750
- name: AGENT_NAME$4,
19000
+ name: AGENT_NAME$5,
18751
19001
  description: "You are a helpful assistant.",
18752
19002
  tools: ext_tools || [],
18753
19003
  llms: llms,
@@ -18759,7 +19009,7 @@ class BaseChatAgent extends Agent {
18759
19009
 
18760
19010
  const PLAN_SYSTEM_TEMPLATE = `
18761
19011
  You are {name}, an autonomous AI Agent Planner.
18762
- UTC datetime: {datetime}
19012
+ Current datetime: {datetime}
18763
19013
 
18764
19014
  ## Task Description
18765
19015
  Your task is to understand the user's requirements, dynamically plan the user's tasks based on the Agent list, and please follow the steps below:
@@ -18793,8 +19043,8 @@ Your task is to understand the user's requirements, dynamically plan the user's
18793
19043
  <forEach items="list or variable name">
18794
19044
  <node>forEach step node</node>
18795
19045
  </forEach>
18796
- <!-- When you need to monitor changes in webpage DOM or file content, you can use \`Watch\`, the loop attribute specifies whether to listen in a loop or listen once. -->
18797
- <watch event="dom or file" loop="true">
19046
+ <!-- When you need to monitor changes in webpage DOM elements, you can use \`Watch\`, the loop attribute specifies whether to listen in a loop or listen once. -->
19047
+ <watch event="dom" loop="true">
18798
19048
  <description>Monitor task description</description>
18799
19049
  <trigger>
18800
19050
  <node>Trigger step node</node>
@@ -18916,12 +19166,12 @@ Output result:
18916
19166
  ];
18917
19167
  const PLAN_USER_TEMPLATE = `
18918
19168
  User Platform: {platform}
18919
- Task Description: {taskPrompt}
19169
+ Task Description: {task_prompt}
18920
19170
  `;
18921
19171
  const PLAN_USER_TASK_WEBSITE_TEMPLATE = `
18922
19172
  User Platform: {platform}
18923
19173
  Task Website: {task_website}
18924
- Task Description: {taskPrompt}
19174
+ Task Description: {task_prompt}
18925
19175
  `;
18926
19176
  async function getPlanSystemPrompt(context) {
18927
19177
  let agents_prompt = "";
@@ -18940,7 +19190,7 @@ async function getPlanSystemPrompt(context) {
18940
19190
  "\n</agent>\n\n";
18941
19191
  }
18942
19192
  let example_prompt = "";
18943
- let hasChatAgent = context.agents.filter((a) => a.Name == AGENT_NAME$4).length > 0;
19193
+ let hasChatAgent = context.agents.filter((a) => a.Name == AGENT_NAME$5).length > 0;
18944
19194
  const example_list = hasChatAgent
18945
19195
  ? [PLAN_CHAT_EXAMPLE, ...PLAN_EXAMPLE_LIST]
18946
19196
  : [...PLAN_EXAMPLE_LIST];
@@ -18949,22 +19199,26 @@ async function getPlanSystemPrompt(context) {
18949
19199
  }
18950
19200
  return PLAN_SYSTEM_TEMPLATE.replace("{name}", config.name)
18951
19201
  .replace("{agents}", agents_prompt.trim())
18952
- .replace("{datetime}", new Date().toISOString())
19202
+ .replace("{datetime}", new Date().toLocaleString())
18953
19203
  .replace("{example_prompt}", example_prompt)
18954
19204
  .trim();
18955
19205
  }
18956
- function getPlanUserPrompt(taskPrompt, task_website) {
19206
+ function getPlanUserPrompt(task_prompt, task_website, ext_prompt) {
19207
+ let prompt = "";
18957
19208
  if (task_website) {
18958
- return PLAN_USER_TASK_WEBSITE_TEMPLATE.replace("{taskPrompt}", taskPrompt)
19209
+ prompt = PLAN_USER_TASK_WEBSITE_TEMPLATE.replace("{task_prompt}", task_prompt)
18959
19210
  .replace("{platform}", config.platform)
18960
- .replace("{task_website}", task_website)
18961
- .trim();
19211
+ .replace("{task_website}", task_website);
18962
19212
  }
18963
19213
  else {
18964
- return PLAN_USER_TEMPLATE.replace("{taskPrompt}", taskPrompt)
18965
- .replace("{platform}", config.platform)
18966
- .trim();
19214
+ prompt = PLAN_USER_TEMPLATE.replace("{task_prompt}", task_prompt)
19215
+ .replace("{platform}", config.platform);
18967
19216
  }
19217
+ prompt = prompt.trim();
19218
+ if (ext_prompt) {
19219
+ prompt += `\n${ext_prompt.trim()}`;
19220
+ }
19221
+ return prompt;
18968
19222
  }
18969
19223
 
18970
19224
  class Planner {
@@ -19007,7 +19261,7 @@ class Planner {
19007
19261
  content: [
19008
19262
  {
19009
19263
  type: "text",
19010
- text: getPlanUserPrompt(taskPrompt, this.context.variables.get("task_website")),
19264
+ text: getPlanUserPrompt(taskPrompt, this.context.variables.get("task_website"), this.context.variables.get("plan_ext_prompt")),
19011
19265
  },
19012
19266
  ],
19013
19267
  },
@@ -19024,7 +19278,7 @@ class Planner {
19024
19278
  let streamText = "";
19025
19279
  try {
19026
19280
  while (true) {
19027
- this.context.checkAborted();
19281
+ await this.context.checkAborted();
19028
19282
  const { done, value } = await reader.read();
19029
19283
  if (done) {
19030
19284
  break;
@@ -19163,7 +19417,7 @@ class Eko {
19163
19417
  }, {});
19164
19418
  let results = [];
19165
19419
  for (let i = 0; i < workflow.agents.length; i++) {
19166
- context.checkAborted();
19420
+ await context.checkAborted();
19167
19421
  let agentNode = workflow.agents[i];
19168
19422
  let agent = agentMap[agentNode.name];
19169
19423
  if (!agent) {
@@ -19173,6 +19427,9 @@ class Eko {
19173
19427
  context.chain.push(agentChain);
19174
19428
  agent.result = await agent.run(context, agentChain);
19175
19429
  results.push(agent.result);
19430
+ if (agentNode.name === "Timer") {
19431
+ break;
19432
+ }
19176
19433
  }
19177
19434
  return {
19178
19435
  success: true,
@@ -19200,6 +19457,16 @@ class Eko {
19200
19457
  return false;
19201
19458
  }
19202
19459
  }
19460
+ pauseTask(taskId, paused) {
19461
+ let context = this.taskMap.get(taskId);
19462
+ if (context) {
19463
+ context.paused = paused;
19464
+ return true;
19465
+ }
19466
+ else {
19467
+ return false;
19468
+ }
19469
+ }
19203
19470
  addAgent(agent) {
19204
19471
  this.config.agents = this.config.agents || [];
19205
19472
  this.config.agents.push(agent);
@@ -19428,7 +19695,7 @@ function parseChunk(chunk) {
19428
19695
  return chunk_obj;
19429
19696
  }
19430
19697
 
19431
- const AGENT_NAME$3 = "File";
19698
+ const AGENT_NAME$4 = "File";
19432
19699
  class BaseFileAgent extends Agent {
19433
19700
  constructor(work_path, llms, ext_tools, mcpClient, planDescription) {
19434
19701
  const _tools_ = [];
@@ -19436,7 +19703,7 @@ class BaseFileAgent extends Agent {
19436
19703
  ? `Your default working path is: ${work_path}`
19437
19704
  : "";
19438
19705
  super({
19439
- name: AGENT_NAME$3,
19706
+ name: AGENT_NAME$4,
19440
19707
  description: `You are a file agent, handling file-related tasks such as creating, finding, reading, modifying files, etc.${prompt}`,
19441
19708
  tools: _tools_,
19442
19709
  llms: llms,
@@ -19450,6 +19717,37 @@ class BaseFileAgent extends Agent {
19450
19717
  }
19451
19718
  init_tools.forEach((tool) => _tools_.push(tool));
19452
19719
  }
19720
+ async do_file_read(agentContext, path, write_variable) {
19721
+ let file_context = await this.file_read(agentContext, path);
19722
+ if (file_context && file_context.length > config.fileTextMaxLength) {
19723
+ file_context = file_context.substring(0, config.fileTextMaxLength) + "...";
19724
+ }
19725
+ if (write_variable) {
19726
+ agentContext.context.variables.set(write_variable, file_context);
19727
+ }
19728
+ return {
19729
+ file_context: file_context,
19730
+ write_variable: write_variable,
19731
+ };
19732
+ }
19733
+ async do_file_write(agentContext, path, append, content, from_variable) {
19734
+ if (content == null && from_variable == null) {
19735
+ throw new Error(`content and from_variable cannot be both empty, cannot write to file ${path}`);
19736
+ }
19737
+ if (from_variable) {
19738
+ let variable_value = agentContext.context.variables.get(from_variable) || "";
19739
+ if (variable_value) {
19740
+ content = variable_value;
19741
+ }
19742
+ if (!content) {
19743
+ throw new Error(`Variable ${from_variable} is empty, cannot write to file ${path}`);
19744
+ }
19745
+ }
19746
+ if (!content) {
19747
+ throw new Error(`content is empty, cannot write to file ${path}`);
19748
+ }
19749
+ return await this.file_write(agentContext, path, content || "", append);
19750
+ }
19453
19751
  buildInitTools() {
19454
19752
  return [
19455
19753
  {
@@ -19479,16 +19777,20 @@ class BaseFileAgent extends Agent {
19479
19777
  type: "string",
19480
19778
  description: "File path",
19481
19779
  },
19780
+ write_variable: {
19781
+ type: "string",
19782
+ description: "Variable name, the content after reading is simultaneously written to the variable, facilitating direct loading from the variable in subsequent operations.",
19783
+ },
19482
19784
  },
19483
19785
  required: ["path"],
19484
19786
  },
19485
19787
  execute: async (args, agentContext) => {
19486
- return await this.callInnerTool(() => this.file_read(agentContext, args.path));
19788
+ return await this.callInnerTool(() => this.do_file_read(agentContext, args.path, args.write_variable));
19487
19789
  },
19488
19790
  },
19489
19791
  {
19490
19792
  name: "file_write",
19491
- description: "Overwrite or append content to a file. Use for creating new files, appending content, or modifying existing files, only supports txt/md/csv or other text formats.",
19793
+ description: "Overwrite or append content to a file. Use for creating new files, appending content, or modifying existing files, only supports txt/md/json/csv or other text formats.",
19492
19794
  parameters: {
19493
19795
  type: "object",
19494
19796
  properties: {
@@ -19496,20 +19798,24 @@ class BaseFileAgent extends Agent {
19496
19798
  type: "string",
19497
19799
  description: "File path",
19498
19800
  },
19499
- content: {
19500
- type: "string",
19501
- description: "Text content",
19502
- },
19503
19801
  append: {
19504
19802
  type: "boolean",
19505
19803
  description: "(Optional) Whether to use append mode",
19506
19804
  default: false,
19507
19805
  },
19806
+ content: {
19807
+ type: "string",
19808
+ description: "Text content, write content directly to the file.",
19809
+ },
19810
+ from_variable: {
19811
+ type: "string",
19812
+ description: "Variable name, read content from the variable and write it.",
19813
+ },
19508
19814
  },
19509
- required: ["path", "content"],
19815
+ required: ["path"],
19510
19816
  },
19511
19817
  execute: async (args, agentContext) => {
19512
- return await this.callInnerTool(() => this.file_write(agentContext, args.path, args.content, (args.append || false)));
19818
+ return await this.callInnerTool(() => this.do_file_write(agentContext, args.path, (args.append || false), args.content, args.from_variable));
19513
19819
  },
19514
19820
  },
19515
19821
  {
@@ -19562,12 +19868,12 @@ class BaseFileAgent extends Agent {
19562
19868
  }
19563
19869
  }
19564
19870
 
19565
- const AGENT_NAME$2 = "Shell";
19871
+ const AGENT_NAME$3 = "Shell";
19566
19872
  class BaseShellAgent extends Agent {
19567
19873
  constructor(llms, ext_tools, mcpClient, planDescription) {
19568
19874
  const _tools_ = [];
19569
19875
  super({
19570
- name: AGENT_NAME$2,
19876
+ name: AGENT_NAME$3,
19571
19877
  description: `Run commands in a bash shell,
19572
19878
  * You must first call create_session to create a new session when using it for the first time.
19573
19879
  * Please execute delete commands with caution, and never perform dangerous operations like \`rm -rf /\`.
@@ -19644,7 +19950,45 @@ class BaseShellAgent extends Agent {
19644
19950
  }
19645
19951
  }
19646
19952
 
19953
+ const AGENT_NAME$2 = "Timer";
19647
19954
  class BaseTimerAgent extends Agent {
19955
+ constructor(llms, ext_tools, mcpClient) {
19956
+ super({
19957
+ name: AGENT_NAME$2,
19958
+ description: "You are a scheduled task scheduling agent.",
19959
+ tools: ext_tools || [],
19960
+ llms: llms,
19961
+ mcpClient: mcpClient,
19962
+ });
19963
+ this.addTool(this.schedule_tool());
19964
+ }
19965
+ schedule_tool() {
19966
+ return {
19967
+ name: "task_schedule",
19968
+ description: "Task scheduled trigger, the task is triggered at a scheduled time and will automatically create a scheduled task for execution.",
19969
+ parameters: {
19970
+ type: "object",
19971
+ properties: {
19972
+ trigger_description: {
19973
+ type: "string",
19974
+ description: "Trigger time description.",
19975
+ },
19976
+ task_description: {
19977
+ type: "string",
19978
+ description: "Main task description, excluding trigger time.",
19979
+ },
19980
+ cron: {
19981
+ type: "string",
19982
+ description: "The cron expression of the trigger, for example, '0 9 * * *' indicates that it triggers at 9 a.m. every day.",
19983
+ },
19984
+ },
19985
+ required: ["cron"],
19986
+ },
19987
+ execute: async (args, agentContext) => {
19988
+ return await this.callInnerTool(() => this.task_schedule(agentContext, args.trigger_description, args.task_description, args.cron));
19989
+ },
19990
+ };
19991
+ }
19648
19992
  }
19649
19993
 
19650
19994
  const AGENT_NAME$1 = "Computer";
@@ -19873,7 +20217,7 @@ This is a computer GUI interface, observe the execution through screenshots, and
19873
20217
  description: "Duration in millisecond",
19874
20218
  default: 500,
19875
20219
  minimum: 200,
19876
- maximum: 2000,
20220
+ maximum: 10000,
19877
20221
  },
19878
20222
  },
19879
20223
  required: ["duration"],
@@ -20066,6 +20410,9 @@ class BaseBrowserAgent extends Agent {
20066
20410
  }
20067
20411
  toolUseNames(messages) {
20068
20412
  let toolNames = [];
20413
+ if (!messages) {
20414
+ return toolNames;
20415
+ }
20069
20416
  for (let i = 0; i < messages.length; i++) {
20070
20417
  let message = messages[i];
20071
20418
  if (message.role == "tool") {
@@ -20114,8 +20461,8 @@ function run_build_dom_tree() {
20114
20461
  'name',
20115
20462
  'role',
20116
20463
  'class',
20117
- // 'href',
20118
- 'tabindex',
20464
+ 'src',
20465
+ 'href',
20119
20466
  'aria-label',
20120
20467
  'placeholder',
20121
20468
  'value',
@@ -20164,6 +20511,12 @@ function run_build_dom_tree() {
20164
20511
  let classList = value.split(" ").slice(0, 3);
20165
20512
  value = classList.join(" ");
20166
20513
  }
20514
+ else if ((key == "src" || key == "href") && value && value.length > 200) {
20515
+ continue;
20516
+ }
20517
+ else if ((key == "src" || key == "href") && value && value.startsWith("/")) {
20518
+ value = window.location.origin + value;
20519
+ }
20167
20520
  if (key && value) {
20168
20521
  attributes_str += ` ${key}="${value}"`;
20169
20522
  }
@@ -20725,9 +21078,21 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20725
21078
  async scroll_mouse_wheel(agentContext, amount, extract_page_content) {
20726
21079
  await this.execute_script(agentContext, scroll_by, [{ amount }]);
20727
21080
  await sleep(200);
21081
+ if (!extract_page_content) {
21082
+ const tools = this.toolUseNames(agentContext.agentChain.agentRequest?.messages);
21083
+ let scroll_count = 0;
21084
+ for (let i = tools.length - 1; i >= Math.max(tools.length - 8, 0); i--) {
21085
+ if (tools[i] == "scroll_mouse_wheel") {
21086
+ scroll_count++;
21087
+ }
21088
+ }
21089
+ if (scroll_count >= 3) {
21090
+ extract_page_content = true;
21091
+ }
21092
+ }
20728
21093
  if (extract_page_content) {
20729
21094
  let page_content = await this.extract_page_content(agentContext);
20730
- return "This is the latest page content:\n" + page_content;
21095
+ return "The current page content has been extracted, latest page content:\n" + page_content;
20731
21096
  }
20732
21097
  }
20733
21098
  async hover_to_element(agentContext, index) {
@@ -20836,7 +21201,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20836
21201
  },
20837
21202
  enter: {
20838
21203
  type: "boolean",
20839
- description: "press the Enter key",
21204
+ description: "When text input is completed, press Enter (applicable to search boxes)",
20840
21205
  default: false,
20841
21206
  },
20842
21207
  },
@@ -20872,23 +21237,30 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20872
21237
  return await this.callInnerTool(() => this.click_element(agentContext, args.index, (args.num_clicks || 1), (args.button || "left")));
20873
21238
  },
20874
21239
  },
21240
+ /*
20875
21241
  {
20876
- name: "scroll_to_element",
20877
- description: "Scroll to the element",
20878
- parameters: {
20879
- type: "object",
20880
- properties: {
20881
- index: {
20882
- type: "number",
20883
- description: "The index of the element to input text into",
20884
- },
20885
- },
20886
- required: ["index"],
20887
- },
20888
- execute: async (args, agentContext) => {
20889
- return await this.callInnerTool(() => this.scroll_to_element(agentContext, args.index));
21242
+ name: "scroll_to_element",
21243
+ description: "Scroll to the element",
21244
+ parameters: {
21245
+ type: "object",
21246
+ properties: {
21247
+ index: {
21248
+ type: "number",
21249
+ description: "The index of the element to input text into",
21250
+ },
20890
21251
  },
21252
+ required: ["index"],
21253
+ },
21254
+ execute: async (
21255
+ args: Record<string, unknown>,
21256
+ agentContext: AgentContext
21257
+ ): Promise<ToolResult> => {
21258
+ return await this.callInnerTool(() =>
21259
+ this.scroll_to_element(agentContext, args.index as number)
21260
+ );
21261
+ },
20891
21262
  },
21263
+ */
20892
21264
  {
20893
21265
  name: "scroll_mouse_wheel",
20894
21266
  description: "Scroll the mouse wheel at current position, only scroll when you need to load more content",
@@ -20907,10 +21279,11 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
20907
21279
  },
20908
21280
  extract_page_content: {
20909
21281
  type: "boolean",
21282
+ default: false,
20910
21283
  description: "After scrolling is completed, whether to extract the current latest page content",
20911
21284
  },
20912
21285
  },
20913
- required: ["amount", "direction"],
21286
+ required: ["amount", "direction", "extract_page_content"],
20914
21287
  },
20915
21288
  execute: async (args, agentContext) => {
20916
21289
  return await this.callInnerTool(async () => {
@@ -21025,7 +21398,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21025
21398
  description: "Duration in millisecond",
21026
21399
  default: 500,
21027
21400
  minimum: 200,
21028
- maximum: 2000,
21401
+ maximum: 10000,
21029
21402
  },
21030
21403
  },
21031
21404
  required: ["duration"],
@@ -21036,33 +21409,94 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
21036
21409
  },
21037
21410
  ];
21038
21411
  }
21412
+ async double_screenshots(agentContext, messages, tools) {
21413
+ return true;
21414
+ }
21039
21415
  async handleMessages(agentContext, messages, tools) {
21416
+ const pseudoHtmlDescription = "This is the latest screenshot and page element information.\nindex and element:\n";
21040
21417
  let lastTool = this.lastToolResult(messages);
21041
21418
  if (lastTool &&
21042
21419
  lastTool.toolName !== "extract_page_content" &&
21043
21420
  lastTool.toolName !== "get_all_tabs" &&
21044
21421
  lastTool.toolName !== "variable_storage") {
21045
21422
  await sleep(300);
21423
+ let image_contents = [];
21424
+ if (await this.double_screenshots(agentContext, messages, tools)) {
21425
+ let imageResult = await this.screenshot(agentContext);
21426
+ let image = toImage(imageResult.imageBase64);
21427
+ image_contents.push({
21428
+ type: "image",
21429
+ image: image,
21430
+ mimeType: imageResult.imageType,
21431
+ });
21432
+ }
21046
21433
  let result = await this.screenshot_and_html(agentContext);
21047
21434
  let image = toImage(result.imageBase64);
21435
+ image_contents.push({
21436
+ type: "image",
21437
+ image: image,
21438
+ mimeType: result.imageType,
21439
+ });
21048
21440
  messages.push({
21049
21441
  role: "user",
21050
21442
  content: [
21051
- {
21052
- type: "image",
21053
- image: image,
21054
- mimeType: result.imageType,
21055
- },
21443
+ ...image_contents,
21056
21444
  {
21057
21445
  type: "text",
21058
- text: "This is the latest screenshot and page element information.\nindex and element:\n" +
21059
- result.pseudoHtml,
21446
+ text: pseudoHtmlDescription + result.pseudoHtml,
21060
21447
  },
21061
21448
  ],
21062
21449
  });
21063
21450
  }
21451
+ if (messages.length > 10) {
21452
+ // compressed pseudoHtml
21453
+ for (let i = 2; i < messages.length - 3; i++) {
21454
+ let message = messages[i];
21455
+ if (message.role == "user" && message.content.length == 2) {
21456
+ let content = message.content;
21457
+ for (let j = 0; j < content.length; j++) {
21458
+ let _content = content[j];
21459
+ if (_content.type == "text" &&
21460
+ _content.text.startsWith(pseudoHtmlDescription)) {
21461
+ _content.text = this.removePseudoHtmlAttr(_content.text, [
21462
+ "class",
21463
+ "src",
21464
+ "href",
21465
+ ]);
21466
+ }
21467
+ }
21468
+ }
21469
+ }
21470
+ }
21064
21471
  super.handleMessages(agentContext, messages, tools);
21065
21472
  }
21473
+ removePseudoHtmlAttr(pseudoHtml, remove_attrs) {
21474
+ return pseudoHtml
21475
+ .split("\n")
21476
+ .map((line) => {
21477
+ if (!line.startsWith("[") || line.indexOf("]:<") == -1) {
21478
+ return line;
21479
+ }
21480
+ for (let i = 0; i < remove_attrs.length; i++) {
21481
+ let sIdx = line.indexOf(remove_attrs[i] + '="');
21482
+ if (sIdx == -1) {
21483
+ continue;
21484
+ }
21485
+ let eIdx = line.indexOf('"', sIdx + remove_attrs[i].length + 3);
21486
+ if (eIdx == -1) {
21487
+ continue;
21488
+ }
21489
+ line =
21490
+ line.substring(0, sIdx) +
21491
+ line
21492
+ .substring(eIdx + 1)
21493
+ .trim()
21494
+ .replace('" >', '">');
21495
+ }
21496
+ return line;
21497
+ })
21498
+ .join("\n");
21499
+ }
21066
21500
  }
21067
21501
  function typing(params) {
21068
21502
  let { index, text, enter } = params;
@@ -21094,6 +21528,19 @@ function typing(params) {
21094
21528
  }
21095
21529
  }
21096
21530
  input.focus && input.focus();
21531
+ if (!text && enter) {
21532
+ ["keydown", "keypress", "keyup"].forEach((eventType) => {
21533
+ const event = new KeyboardEvent(eventType, {
21534
+ key: "Enter",
21535
+ code: "Enter",
21536
+ keyCode: 13,
21537
+ bubbles: true,
21538
+ cancelable: true,
21539
+ });
21540
+ input.dispatchEvent(event);
21541
+ });
21542
+ return true;
21543
+ }
21097
21544
  if (input.value == undefined) {
21098
21545
  input.textContent = text;
21099
21546
  }
@@ -21209,14 +21656,35 @@ function scroll_by(params) {
21209
21656
  window.scrollBy(0, y * amount);
21210
21657
  return;
21211
21658
  }
21659
+ function findNodes(element = document, nodes = []) {
21660
+ for (const node of Array.from(element.querySelectorAll("*"))) {
21661
+ if (node.tagName === "IFRAME" && node.contentDocument) {
21662
+ findNodes(node.contentDocument, nodes);
21663
+ }
21664
+ else {
21665
+ nodes.push(node);
21666
+ }
21667
+ }
21668
+ return nodes;
21669
+ }
21212
21670
  function findScrollableElements() {
21213
- const allElements = Array.from(document.querySelectorAll("*"));
21214
- return allElements.filter((el) => {
21671
+ const allElements = findNodes();
21672
+ let elements = allElements.filter((el) => {
21215
21673
  const style = window.getComputedStyle(el);
21216
21674
  const overflowY = style.getPropertyValue("overflow-y");
21217
21675
  return ((overflowY === "auto" || overflowY === "scroll") &&
21218
21676
  el.scrollHeight > el.clientHeight);
21219
21677
  });
21678
+ if (elements.length == 0) {
21679
+ elements = allElements.filter((el) => {
21680
+ const style = window.getComputedStyle(el);
21681
+ const overflowY = style.getPropertyValue("overflow-y");
21682
+ return (overflowY === "auto" ||
21683
+ overflowY === "scroll" ||
21684
+ el.scrollHeight > el.clientHeight);
21685
+ });
21686
+ }
21687
+ return elements;
21220
21688
  }
21221
21689
  function getVisibleArea(element) {
21222
21690
  const rect = element.getBoundingClientRect();
@@ -21243,6 +21711,12 @@ function scroll_by(params) {
21243
21711
  const viewportHeight = largestElement.clientHeight;
21244
21712
  const y = Math.max(20, Math.min(viewportHeight / 10, 200));
21245
21713
  largestElement.scrollBy(0, y * amount);
21714
+ const maxHeightElement = sortedElements.sort((a, b) => b.getBoundingClientRect().height - a.getBoundingClientRect().height)[0];
21715
+ if (maxHeightElement != largestElement) {
21716
+ const viewportHeight = maxHeightElement.clientHeight;
21717
+ const y = Math.max(20, Math.min(viewportHeight / 10, 200));
21718
+ maxHeightElement.scrollBy(0, y * amount);
21719
+ }
21246
21720
  return true;
21247
21721
  }
21248
21722
 
@@ -21510,7 +21984,7 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
21510
21984
  description: "Duration in millisecond",
21511
21985
  default: 500,
21512
21986
  minimum: 200,
21513
- maximum: 2000,
21987
+ maximum: 10000,
21514
21988
  },
21515
21989
  },
21516
21990
  required: ["duration"],