@eko-ai/eko 2.1.1 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/base.d.ts +1 -0
- package/dist/agent/base.d.ts.map +1 -1
- package/dist/agent/browser/browser_labels.d.ts +3 -2
- package/dist/agent/browser/browser_labels.d.ts.map +1 -1
- package/dist/agent/browser/browser_screen.d.ts +1 -1
- package/dist/agent/browser/browser_screen.d.ts.map +1 -1
- package/dist/agent/file.d.ts.map +1 -1
- package/dist/common/utils.d.ts.map +1 -1
- package/dist/common/xml.d.ts.map +1 -1
- package/dist/config/index.d.ts +1 -0
- package/dist/config/index.d.ts.map +1 -1
- package/dist/index.cjs.js +303 -49
- package/dist/index.cjs.js.map +1 -1
- package/dist/index.esm.js +303 -49
- package/dist/index.esm.js.map +1 -1
- package/dist/tools/foreach_task.d.ts.map +1 -1
- package/dist/tools/human_interact.d.ts +1 -0
- package/dist/tools/human_interact.d.ts.map +1 -1
- package/dist/tools/watch_trigger.d.ts +1 -0
- package/dist/tools/watch_trigger.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs.js
CHANGED
|
@@ -7,10 +7,11 @@ var buffer = require('buffer');
|
|
|
7
7
|
const config = {
|
|
8
8
|
name: "Eko",
|
|
9
9
|
platform: "mac",
|
|
10
|
-
maxReactNum:
|
|
10
|
+
maxReactNum: 500,
|
|
11
11
|
maxTokens: 16000,
|
|
12
12
|
compressThreshold: 80,
|
|
13
13
|
largeTextLength: 5000,
|
|
14
|
+
fileTextMaxLength: 20000,
|
|
14
15
|
maxDialogueImgFileNum: 2,
|
|
15
16
|
};
|
|
16
17
|
|
|
@@ -232,6 +233,7 @@ function mergeTools(tools1, tools2) {
|
|
|
232
233
|
map[tool.name] = tool;
|
|
233
234
|
return map;
|
|
234
235
|
}, {});
|
|
236
|
+
let names = [];
|
|
235
237
|
for (let i = 0; i < tools1.length; i++) {
|
|
236
238
|
let tool1 = tools1[i];
|
|
237
239
|
let tool2 = toolMap2[tool1.name];
|
|
@@ -245,8 +247,9 @@ function mergeTools(tools1, tools2) {
|
|
|
245
247
|
}
|
|
246
248
|
for (let i = 0; i < tools2.length; i++) {
|
|
247
249
|
let tool2 = tools2[i];
|
|
248
|
-
if (toolMap2[tool2.name]) {
|
|
250
|
+
if (toolMap2[tool2.name] && names.indexOf(tool2.name) === -1) {
|
|
249
251
|
tools.push(tool2);
|
|
252
|
+
names.push(tool2.name);
|
|
250
253
|
}
|
|
251
254
|
}
|
|
252
255
|
return tools;
|
|
@@ -17519,10 +17522,17 @@ function extractAgentXmlNode(agentXml, nodeId) {
|
|
|
17519
17522
|
let nodesNode = doc.getElementsByTagName("nodes");
|
|
17520
17523
|
if (nodesNode.length > 0) {
|
|
17521
17524
|
let nodes = nodesNode[0].childNodes;
|
|
17525
|
+
let _nodeId = 0;
|
|
17522
17526
|
for (let i = 0; i < nodes.length; i++) {
|
|
17523
17527
|
let node = nodes[i];
|
|
17524
|
-
if (node.nodeType == 1
|
|
17525
|
-
|
|
17528
|
+
if (node.nodeType == 1) {
|
|
17529
|
+
if (node.getAttribute("id") == null || node.getAttribute("id") == "") {
|
|
17530
|
+
node.setAttribute("id", _nodeId + "");
|
|
17531
|
+
}
|
|
17532
|
+
_nodeId++;
|
|
17533
|
+
if (node.getAttribute("id") == nodeId + "") {
|
|
17534
|
+
return node;
|
|
17535
|
+
}
|
|
17526
17536
|
}
|
|
17527
17537
|
}
|
|
17528
17538
|
}
|
|
@@ -17856,7 +17866,7 @@ const TOOL_NAME$4 = "foreach_task";
|
|
|
17856
17866
|
class ForeachTaskTool {
|
|
17857
17867
|
constructor() {
|
|
17858
17868
|
this.name = TOOL_NAME$4;
|
|
17859
|
-
this.description = `When executing the \`forEach\` node, please use
|
|
17869
|
+
this.description = `When executing the \`forEach\` node, please use the current tool for counting to ensure tasks are executed sequentially, the tool needs to be called with each loop iteration.`;
|
|
17860
17870
|
this.parameters = {
|
|
17861
17871
|
type: "object",
|
|
17862
17872
|
properties: {
|
|
@@ -17864,12 +17874,19 @@ class ForeachTaskTool {
|
|
|
17864
17874
|
type: "number",
|
|
17865
17875
|
description: "forEach node ID.",
|
|
17866
17876
|
},
|
|
17877
|
+
progress: {
|
|
17878
|
+
type: "string",
|
|
17879
|
+
description: "Current execution progress.",
|
|
17880
|
+
},
|
|
17881
|
+
next_step: {
|
|
17882
|
+
type: "string",
|
|
17883
|
+
description: "Next task description.",
|
|
17884
|
+
},
|
|
17867
17885
|
},
|
|
17868
|
-
required: ["nodeId"],
|
|
17886
|
+
required: ["nodeId", "progress", "next_step"],
|
|
17869
17887
|
};
|
|
17870
17888
|
}
|
|
17871
17889
|
async execute(args, agentContext) {
|
|
17872
|
-
// 调用 forEach Agent 单独逻辑, 根据上下文判断并循环执行
|
|
17873
17890
|
let nodeId = args.nodeId;
|
|
17874
17891
|
let agentXml = agentContext.agentChain.agent.xml;
|
|
17875
17892
|
let node = extractAgentXmlNode(agentXml, nodeId);
|
|
@@ -17880,10 +17897,27 @@ class ForeachTaskTool {
|
|
|
17880
17897
|
throw new Error("Node ID is not a forEach node: " + nodeId);
|
|
17881
17898
|
}
|
|
17882
17899
|
let items = node.getAttribute("items");
|
|
17900
|
+
let varValue = null;
|
|
17901
|
+
let resultText = "Recorded";
|
|
17883
17902
|
if (items && items != "list") {
|
|
17884
|
-
agentContext.context.variables.get(items.trim());
|
|
17903
|
+
varValue = agentContext.context.variables.get(items.trim());
|
|
17904
|
+
if (varValue) {
|
|
17905
|
+
let key = "foreach_" + nodeId;
|
|
17906
|
+
let loop_count = agentContext.variables.get(key) || 0;
|
|
17907
|
+
if (loop_count % 5 == 0) {
|
|
17908
|
+
resultText = `Variable information associated with the current loop task.\nvariable_name: ${items.trim()}\nvariable_value: ${varValue}`;
|
|
17909
|
+
}
|
|
17910
|
+
agentContext.variables.set(key, ++loop_count);
|
|
17911
|
+
}
|
|
17885
17912
|
}
|
|
17886
|
-
return
|
|
17913
|
+
return {
|
|
17914
|
+
content: [
|
|
17915
|
+
{
|
|
17916
|
+
type: "text",
|
|
17917
|
+
text: resultText,
|
|
17918
|
+
},
|
|
17919
|
+
],
|
|
17920
|
+
};
|
|
17887
17921
|
}
|
|
17888
17922
|
}
|
|
17889
17923
|
|
|
@@ -17955,6 +17989,11 @@ request_help: Request assistance from the user; for instance, when an operation
|
|
|
17955
17989
|
break;
|
|
17956
17990
|
case "request_help":
|
|
17957
17991
|
if (callback.onHumanHelp) {
|
|
17992
|
+
if (args.helpType == "request_login" &&
|
|
17993
|
+
(await this.checkIsLogined(agentContext))) {
|
|
17994
|
+
resultText = "Already logged in";
|
|
17995
|
+
break;
|
|
17996
|
+
}
|
|
17958
17997
|
let result = await callback.onHumanHelp(agentContext, (args.helpType || "request_assistance"), args.prompt);
|
|
17959
17998
|
resultText = `request_help result: ${result ? "Solved" : "Unresolved"}`;
|
|
17960
17999
|
}
|
|
@@ -17983,6 +18022,42 @@ request_help: Request assistance from the user; for instance, when an operation
|
|
|
17983
18022
|
};
|
|
17984
18023
|
}
|
|
17985
18024
|
}
|
|
18025
|
+
async checkIsLogined(agentContext) {
|
|
18026
|
+
let screenshot = agentContext.agent["screenshot"];
|
|
18027
|
+
if (!screenshot) {
|
|
18028
|
+
return false;
|
|
18029
|
+
}
|
|
18030
|
+
try {
|
|
18031
|
+
let imageResult = (await screenshot.call(agentContext.agent, agentContext));
|
|
18032
|
+
let rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms);
|
|
18033
|
+
let image = toImage(imageResult.imageBase64);
|
|
18034
|
+
let request = {
|
|
18035
|
+
messages: [
|
|
18036
|
+
{
|
|
18037
|
+
role: "user",
|
|
18038
|
+
content: [
|
|
18039
|
+
{
|
|
18040
|
+
type: "image",
|
|
18041
|
+
image: image,
|
|
18042
|
+
mimeType: imageResult.imageType,
|
|
18043
|
+
},
|
|
18044
|
+
{
|
|
18045
|
+
type: "text",
|
|
18046
|
+
text: "Check if the current website is logged in. If not logged in, output `NOT_LOGIN`. If logged in, output `LOGGED_IN`. Output directly without explanation.",
|
|
18047
|
+
},
|
|
18048
|
+
],
|
|
18049
|
+
},
|
|
18050
|
+
],
|
|
18051
|
+
abortSignal: agentContext.context.controller.signal,
|
|
18052
|
+
};
|
|
18053
|
+
let result = await rlm.call(request);
|
|
18054
|
+
return result.text && result.text.indexOf("LOGGED_IN") > -1;
|
|
18055
|
+
}
|
|
18056
|
+
catch (error) {
|
|
18057
|
+
console.error("Error auto checking login status:", error);
|
|
18058
|
+
return false;
|
|
18059
|
+
}
|
|
18060
|
+
}
|
|
17986
18061
|
}
|
|
17987
18062
|
|
|
17988
18063
|
const TOOL_NAME$2 = "task_node_status";
|
|
@@ -18120,21 +18195,156 @@ const TOOL_NAME = "watch_trigger";
|
|
|
18120
18195
|
class WatchTriggerTool {
|
|
18121
18196
|
constructor() {
|
|
18122
18197
|
this.name = TOOL_NAME;
|
|
18123
|
-
this.description = `When executing the \`watch\` node, please use it to
|
|
18198
|
+
this.description = `When executing the \`watch\` node, please use it to monitor DOM element changes, it will block the listener until the element changes or times out.`;
|
|
18124
18199
|
this.parameters = {
|
|
18125
18200
|
type: "object",
|
|
18126
18201
|
properties: {
|
|
18127
18202
|
nodeId: {
|
|
18128
18203
|
type: "number",
|
|
18129
|
-
description: "
|
|
18204
|
+
description: "watch node ID.",
|
|
18205
|
+
},
|
|
18206
|
+
watch_area: {
|
|
18207
|
+
type: "array",
|
|
18208
|
+
description: "Element changes in monitoring area, eg: [x, y, width, height].",
|
|
18209
|
+
items: {
|
|
18210
|
+
type: "number",
|
|
18211
|
+
},
|
|
18212
|
+
},
|
|
18213
|
+
watch_index: {
|
|
18214
|
+
type: "array",
|
|
18215
|
+
description: "The index of elements to be monitoring multiple elements simultaneously.",
|
|
18216
|
+
items: {
|
|
18217
|
+
type: "number",
|
|
18218
|
+
},
|
|
18219
|
+
},
|
|
18220
|
+
frequency: {
|
|
18221
|
+
type: "number",
|
|
18222
|
+
description: "Check frequency, how many seconds between each check, default 1 seconds.",
|
|
18223
|
+
default: 1,
|
|
18224
|
+
minimum: 0.5,
|
|
18225
|
+
maximum: 30,
|
|
18226
|
+
},
|
|
18227
|
+
timeout: {
|
|
18228
|
+
type: "number",
|
|
18229
|
+
description: "Timeout in minute, default 5 minutes.",
|
|
18230
|
+
default: 5,
|
|
18231
|
+
minimum: 1,
|
|
18232
|
+
maximum: 30,
|
|
18130
18233
|
},
|
|
18131
18234
|
},
|
|
18132
18235
|
required: ["nodeId"],
|
|
18133
18236
|
};
|
|
18134
18237
|
}
|
|
18135
18238
|
async execute(args, agentContext) {
|
|
18136
|
-
|
|
18137
|
-
|
|
18239
|
+
let nodeId = args.nodeId;
|
|
18240
|
+
let agentXml = agentContext.agentChain.agent.xml;
|
|
18241
|
+
let node = extractAgentXmlNode(agentXml, nodeId);
|
|
18242
|
+
if (node == null) {
|
|
18243
|
+
throw new Error("Node ID does not exist: " + nodeId);
|
|
18244
|
+
}
|
|
18245
|
+
if (node.tagName !== "watch") {
|
|
18246
|
+
throw new Error("Node ID is not a watch node: " + nodeId);
|
|
18247
|
+
}
|
|
18248
|
+
let task_description = node.getElementsByTagName("description")[0]?.textContent || "";
|
|
18249
|
+
if (!task_description) {
|
|
18250
|
+
return {
|
|
18251
|
+
content: [
|
|
18252
|
+
{
|
|
18253
|
+
type: "text",
|
|
18254
|
+
text: "The watch node does not have a description, skip.",
|
|
18255
|
+
},
|
|
18256
|
+
],
|
|
18257
|
+
};
|
|
18258
|
+
}
|
|
18259
|
+
const screenshot = agentContext.agent["screenshot"];
|
|
18260
|
+
const image1Result = (await screenshot.call(agentContext.agent, agentContext));
|
|
18261
|
+
const image1 = toImage(image1Result.imageBase64);
|
|
18262
|
+
const start = new Date().getTime();
|
|
18263
|
+
const timeout = (args.timeout || 5) * 60000;
|
|
18264
|
+
const frequency = Math.max(500, (args.frequency = args.frequency || 1) * 1000);
|
|
18265
|
+
let rlm = new RetryLanguageModel(agentContext.context.config.llms, agentContext.agent.Llms);
|
|
18266
|
+
while (new Date().getTime() - start < timeout) {
|
|
18267
|
+
await agentContext.context.checkAborted();
|
|
18268
|
+
await new Promise((resolve) => setTimeout(resolve, frequency));
|
|
18269
|
+
const image2Result = (await screenshot.call(agentContext.agent, agentContext));
|
|
18270
|
+
const image2 = toImage(image2Result.imageBase64);
|
|
18271
|
+
const changeResult = await this.is_dom_change(agentContext, rlm, image1, image1Result.imageType, image2, image2Result.imageType, task_description);
|
|
18272
|
+
if (changeResult.changed) {
|
|
18273
|
+
return {
|
|
18274
|
+
content: [
|
|
18275
|
+
{
|
|
18276
|
+
type: "text",
|
|
18277
|
+
text: changeResult.changeInfo || "DOM change detected.",
|
|
18278
|
+
},
|
|
18279
|
+
],
|
|
18280
|
+
};
|
|
18281
|
+
}
|
|
18282
|
+
}
|
|
18283
|
+
return {
|
|
18284
|
+
content: [
|
|
18285
|
+
{
|
|
18286
|
+
type: "text",
|
|
18287
|
+
text: "Timeout reached, no DOM changes detected.",
|
|
18288
|
+
},
|
|
18289
|
+
],
|
|
18290
|
+
};
|
|
18291
|
+
}
|
|
18292
|
+
async is_dom_change(agentContext, rlm, image1, image1Type, image2, image2Type, task_description) {
|
|
18293
|
+
try {
|
|
18294
|
+
let request = {
|
|
18295
|
+
messages: [
|
|
18296
|
+
{
|
|
18297
|
+
role: "system",
|
|
18298
|
+
content: `You are a tool for detecting element changes. Given a task description, compare two images to determine whether the changes described in the task have occurred.
|
|
18299
|
+
If the changes have occurred, return an json with \`changed\` set to true and \`changeInfo\` containing a description of the changes. If no changes have occurred, return an object with \`changed\` set to false.
|
|
18300
|
+
|
|
18301
|
+
## Example
|
|
18302
|
+
User: Monitor new messages in group chat
|
|
18303
|
+
### No changes detected
|
|
18304
|
+
Output:
|
|
18305
|
+
{
|
|
18306
|
+
"changed": false
|
|
18307
|
+
}
|
|
18308
|
+
### Change detected
|
|
18309
|
+
Output:
|
|
18310
|
+
{
|
|
18311
|
+
"changed": true,
|
|
18312
|
+
"changeInfo": "New message received in the group chat. The message content is: 'Hello, how are you?'"
|
|
18313
|
+
}`,
|
|
18314
|
+
},
|
|
18315
|
+
{
|
|
18316
|
+
role: "user",
|
|
18317
|
+
content: [
|
|
18318
|
+
{
|
|
18319
|
+
type: "image",
|
|
18320
|
+
image: image1,
|
|
18321
|
+
mimeType: image1Type,
|
|
18322
|
+
},
|
|
18323
|
+
{
|
|
18324
|
+
type: "image",
|
|
18325
|
+
image: image2,
|
|
18326
|
+
mimeType: image2Type,
|
|
18327
|
+
},
|
|
18328
|
+
{
|
|
18329
|
+
type: "text",
|
|
18330
|
+
text: task_description,
|
|
18331
|
+
},
|
|
18332
|
+
],
|
|
18333
|
+
},
|
|
18334
|
+
],
|
|
18335
|
+
abortSignal: agentContext.context.controller.signal,
|
|
18336
|
+
};
|
|
18337
|
+
const result = await rlm.call(request);
|
|
18338
|
+
let resultText = result.text || "{}";
|
|
18339
|
+
resultText = resultText.substring(resultText.indexOf("{"), resultText.lastIndexOf("}") + 1);
|
|
18340
|
+
return JSON.parse(resultText);
|
|
18341
|
+
}
|
|
18342
|
+
catch (error) {
|
|
18343
|
+
Log.error("Error in is_dom_change:", error);
|
|
18344
|
+
}
|
|
18345
|
+
return {
|
|
18346
|
+
changed: false,
|
|
18347
|
+
};
|
|
18138
18348
|
}
|
|
18139
18349
|
}
|
|
18140
18350
|
|
|
@@ -18196,7 +18406,7 @@ repetitive tasks, when executing to the forEach node, require the use of the \`$
|
|
|
18196
18406
|
`;
|
|
18197
18407
|
const WATCH_NODE = `
|
|
18198
18408
|
<!-- monitor task node, the loop attribute specifies whether to listen in a loop or listen once -->
|
|
18199
|
-
<watch event="dom
|
|
18409
|
+
<watch event="dom" loop="true">
|
|
18200
18410
|
<description>Monitor task description</description>
|
|
18201
18411
|
<trigger>
|
|
18202
18412
|
<node>Trigger step node</node>
|
|
@@ -18205,7 +18415,7 @@ const WATCH_NODE = `
|
|
|
18205
18415
|
</watch>`;
|
|
18206
18416
|
const WATCH_PROMPT = `
|
|
18207
18417
|
* watch node
|
|
18208
|
-
monitor changes in webpage DOM
|
|
18418
|
+
monitor changes in webpage DOM elements, when executing to the watch node, require the use of the \`${TOOL_NAME}\` tool.
|
|
18209
18419
|
`;
|
|
18210
18420
|
function getAgentSystemPrompt(agent, agentNode, context, tools, extSysPrompt) {
|
|
18211
18421
|
let prompt = "";
|
|
@@ -18416,6 +18626,14 @@ class Agent {
|
|
|
18416
18626
|
if (hasVariable) {
|
|
18417
18627
|
tools.push(new VariableStorageTool());
|
|
18418
18628
|
}
|
|
18629
|
+
let hasForeach = agentNodeXml.indexOf("</forEach>") > -1;
|
|
18630
|
+
if (hasForeach) {
|
|
18631
|
+
tools.push(new ForeachTaskTool());
|
|
18632
|
+
}
|
|
18633
|
+
let hasWatch = agentNodeXml.indexOf("</watch>") > -1;
|
|
18634
|
+
if (hasWatch) {
|
|
18635
|
+
tools.push(new WatchTriggerTool());
|
|
18636
|
+
}
|
|
18419
18637
|
let toolNames = this.tools.map((tool) => tool.name);
|
|
18420
18638
|
return tools.filter((tool) => toolNames.indexOf(tool.name) == -1);
|
|
18421
18639
|
}
|
|
@@ -18582,6 +18800,9 @@ class Agent {
|
|
|
18582
18800
|
addTool(tool) {
|
|
18583
18801
|
this.tools.push(tool);
|
|
18584
18802
|
}
|
|
18803
|
+
get Llms() {
|
|
18804
|
+
return this.llms;
|
|
18805
|
+
}
|
|
18585
18806
|
get Name() {
|
|
18586
18807
|
return this.name;
|
|
18587
18808
|
}
|
|
@@ -18822,8 +19043,8 @@ Your task is to understand the user's requirements, dynamically plan the user's
|
|
|
18822
19043
|
<forEach items="list or variable name">
|
|
18823
19044
|
<node>forEach step node</node>
|
|
18824
19045
|
</forEach>
|
|
18825
|
-
<!-- When you need to monitor changes in webpage DOM
|
|
18826
|
-
<watch event="dom
|
|
19046
|
+
<!-- When you need to monitor changes in webpage DOM elements, you can use \`Watch\`, the loop attribute specifies whether to listen in a loop or listen once. -->
|
|
19047
|
+
<watch event="dom" loop="true">
|
|
18827
19048
|
<description>Monitor task description</description>
|
|
18828
19049
|
<trigger>
|
|
18829
19050
|
<node>Trigger step node</node>
|
|
@@ -19498,6 +19719,9 @@ class BaseFileAgent extends Agent {
|
|
|
19498
19719
|
}
|
|
19499
19720
|
async do_file_read(agentContext, path, write_variable) {
|
|
19500
19721
|
let file_context = await this.file_read(agentContext, path);
|
|
19722
|
+
if (file_context && file_context.length > config.fileTextMaxLength) {
|
|
19723
|
+
file_context = file_context.substring(0, config.fileTextMaxLength) + "...";
|
|
19724
|
+
}
|
|
19501
19725
|
if (write_variable) {
|
|
19502
19726
|
agentContext.context.variables.set(write_variable, file_context);
|
|
19503
19727
|
}
|
|
@@ -19993,7 +20217,7 @@ This is a computer GUI interface, observe the execution through screenshots, and
|
|
|
19993
20217
|
description: "Duration in millisecond",
|
|
19994
20218
|
default: 500,
|
|
19995
20219
|
minimum: 200,
|
|
19996
|
-
maximum:
|
|
20220
|
+
maximum: 10000,
|
|
19997
20221
|
},
|
|
19998
20222
|
},
|
|
19999
20223
|
required: ["duration"],
|
|
@@ -20856,17 +21080,19 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
|
|
|
20856
21080
|
await sleep(200);
|
|
20857
21081
|
if (!extract_page_content) {
|
|
20858
21082
|
const tools = this.toolUseNames(agentContext.agentChain.agentRequest?.messages);
|
|
20859
|
-
|
|
20860
|
-
|
|
20861
|
-
tools[
|
|
20862
|
-
|
|
20863
|
-
|
|
20864
|
-
|
|
21083
|
+
let scroll_count = 0;
|
|
21084
|
+
for (let i = tools.length - 1; i >= Math.max(tools.length - 8, 0); i--) {
|
|
21085
|
+
if (tools[i] == "scroll_mouse_wheel") {
|
|
21086
|
+
scroll_count++;
|
|
21087
|
+
}
|
|
21088
|
+
}
|
|
21089
|
+
if (scroll_count >= 3) {
|
|
21090
|
+
extract_page_content = true;
|
|
20865
21091
|
}
|
|
20866
21092
|
}
|
|
20867
21093
|
if (extract_page_content) {
|
|
20868
21094
|
let page_content = await this.extract_page_content(agentContext);
|
|
20869
|
-
return "
|
|
21095
|
+
return "The current page content has been extracted, latest page content:\n" + page_content;
|
|
20870
21096
|
}
|
|
20871
21097
|
}
|
|
20872
21098
|
async hover_to_element(agentContext, index) {
|
|
@@ -21011,23 +21237,30 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
|
|
|
21011
21237
|
return await this.callInnerTool(() => this.click_element(agentContext, args.index, (args.num_clicks || 1), (args.button || "left")));
|
|
21012
21238
|
},
|
|
21013
21239
|
},
|
|
21240
|
+
/*
|
|
21014
21241
|
{
|
|
21015
|
-
|
|
21016
|
-
|
|
21017
|
-
|
|
21018
|
-
|
|
21019
|
-
|
|
21020
|
-
|
|
21021
|
-
|
|
21022
|
-
|
|
21023
|
-
|
|
21024
|
-
},
|
|
21025
|
-
required: ["index"],
|
|
21026
|
-
},
|
|
21027
|
-
execute: async (args, agentContext) => {
|
|
21028
|
-
return await this.callInnerTool(() => this.scroll_to_element(agentContext, args.index));
|
|
21242
|
+
name: "scroll_to_element",
|
|
21243
|
+
description: "Scroll to the element",
|
|
21244
|
+
parameters: {
|
|
21245
|
+
type: "object",
|
|
21246
|
+
properties: {
|
|
21247
|
+
index: {
|
|
21248
|
+
type: "number",
|
|
21249
|
+
description: "The index of the element to input text into",
|
|
21250
|
+
},
|
|
21029
21251
|
},
|
|
21252
|
+
required: ["index"],
|
|
21253
|
+
},
|
|
21254
|
+
execute: async (
|
|
21255
|
+
args: Record<string, unknown>,
|
|
21256
|
+
agentContext: AgentContext
|
|
21257
|
+
): Promise<ToolResult> => {
|
|
21258
|
+
return await this.callInnerTool(() =>
|
|
21259
|
+
this.scroll_to_element(agentContext, args.index as number)
|
|
21260
|
+
);
|
|
21261
|
+
},
|
|
21030
21262
|
},
|
|
21263
|
+
*/
|
|
21031
21264
|
{
|
|
21032
21265
|
name: "scroll_mouse_wheel",
|
|
21033
21266
|
description: "Scroll the mouse wheel at current position, only scroll when you need to load more content",
|
|
@@ -21165,7 +21398,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
|
|
|
21165
21398
|
description: "Duration in millisecond",
|
|
21166
21399
|
default: 500,
|
|
21167
21400
|
minimum: 200,
|
|
21168
|
-
maximum:
|
|
21401
|
+
maximum: 10000,
|
|
21169
21402
|
},
|
|
21170
21403
|
},
|
|
21171
21404
|
required: ["duration"],
|
|
@@ -21176,6 +21409,9 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
|
|
|
21176
21409
|
},
|
|
21177
21410
|
];
|
|
21178
21411
|
}
|
|
21412
|
+
async double_screenshots(agentContext, messages, tools) {
|
|
21413
|
+
return true;
|
|
21414
|
+
}
|
|
21179
21415
|
async handleMessages(agentContext, messages, tools) {
|
|
21180
21416
|
const pseudoHtmlDescription = "This is the latest screenshot and page element information.\nindex and element:\n";
|
|
21181
21417
|
let lastTool = this.lastToolResult(messages);
|
|
@@ -21184,16 +21420,27 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
|
|
|
21184
21420
|
lastTool.toolName !== "get_all_tabs" &&
|
|
21185
21421
|
lastTool.toolName !== "variable_storage") {
|
|
21186
21422
|
await sleep(300);
|
|
21423
|
+
let image_contents = [];
|
|
21424
|
+
if (await this.double_screenshots(agentContext, messages, tools)) {
|
|
21425
|
+
let imageResult = await this.screenshot(agentContext);
|
|
21426
|
+
let image = toImage(imageResult.imageBase64);
|
|
21427
|
+
image_contents.push({
|
|
21428
|
+
type: "image",
|
|
21429
|
+
image: image,
|
|
21430
|
+
mimeType: imageResult.imageType,
|
|
21431
|
+
});
|
|
21432
|
+
}
|
|
21187
21433
|
let result = await this.screenshot_and_html(agentContext);
|
|
21188
21434
|
let image = toImage(result.imageBase64);
|
|
21435
|
+
image_contents.push({
|
|
21436
|
+
type: "image",
|
|
21437
|
+
image: image,
|
|
21438
|
+
mimeType: result.imageType,
|
|
21439
|
+
});
|
|
21189
21440
|
messages.push({
|
|
21190
21441
|
role: "user",
|
|
21191
21442
|
content: [
|
|
21192
|
-
|
|
21193
|
-
type: "image",
|
|
21194
|
-
image: image,
|
|
21195
|
-
mimeType: result.imageType,
|
|
21196
|
-
},
|
|
21443
|
+
...image_contents,
|
|
21197
21444
|
{
|
|
21198
21445
|
type: "text",
|
|
21199
21446
|
text: pseudoHtmlDescription + result.pseudoHtml,
|
|
@@ -21239,7 +21486,12 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
|
|
|
21239
21486
|
if (eIdx == -1) {
|
|
21240
21487
|
continue;
|
|
21241
21488
|
}
|
|
21242
|
-
line =
|
|
21489
|
+
line =
|
|
21490
|
+
line.substring(0, sIdx) +
|
|
21491
|
+
line
|
|
21492
|
+
.substring(eIdx + 1)
|
|
21493
|
+
.trim()
|
|
21494
|
+
.replace('" >', '">');
|
|
21243
21495
|
}
|
|
21244
21496
|
return line;
|
|
21245
21497
|
})
|
|
@@ -21406,7 +21658,7 @@ function scroll_by(params) {
|
|
|
21406
21658
|
}
|
|
21407
21659
|
function findNodes(element = document, nodes = []) {
|
|
21408
21660
|
for (const node of Array.from(element.querySelectorAll("*"))) {
|
|
21409
|
-
if (node.tagName ===
|
|
21661
|
+
if (node.tagName === "IFRAME" && node.contentDocument) {
|
|
21410
21662
|
findNodes(node.contentDocument, nodes);
|
|
21411
21663
|
}
|
|
21412
21664
|
else {
|
|
@@ -21427,7 +21679,9 @@ function scroll_by(params) {
|
|
|
21427
21679
|
elements = allElements.filter((el) => {
|
|
21428
21680
|
const style = window.getComputedStyle(el);
|
|
21429
21681
|
const overflowY = style.getPropertyValue("overflow-y");
|
|
21430
|
-
return (overflowY === "auto" ||
|
|
21682
|
+
return (overflowY === "auto" ||
|
|
21683
|
+
overflowY === "scroll" ||
|
|
21684
|
+
el.scrollHeight > el.clientHeight);
|
|
21431
21685
|
});
|
|
21432
21686
|
}
|
|
21433
21687
|
return elements;
|
|
@@ -21730,7 +21984,7 @@ class BaseBrowserScreenAgent extends BaseBrowserAgent {
|
|
|
21730
21984
|
description: "Duration in millisecond",
|
|
21731
21985
|
default: 500,
|
|
21732
21986
|
minimum: 200,
|
|
21733
|
-
maximum:
|
|
21987
|
+
maximum: 10000,
|
|
21734
21988
|
},
|
|
21735
21989
|
},
|
|
21736
21990
|
required: ["duration"],
|