@empiricalrun/test-gen 0.37.3 → 0.37.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/dist/actions/index.d.ts +1 -1
- package/dist/actions/index.d.ts.map +1 -1
- package/dist/actions/index.js +2 -4
- package/dist/agent/browsing/index.d.ts +1 -1
- package/dist/agent/browsing/index.d.ts.map +1 -1
- package/dist/agent/browsing/index.js +14 -2
- package/dist/agent/browsing/o1-completion.d.ts.map +1 -1
- package/dist/agent/browsing/o1-completion.js +1 -0
- package/dist/agent/codegen/skills-retriever.d.ts +2 -0
- package/dist/agent/codegen/skills-retriever.d.ts.map +1 -1
- package/dist/agent/codegen/utils.d.ts +1 -0
- package/dist/agent/codegen/utils.d.ts.map +1 -1
- package/dist/agent/codegen/utils.js +3 -2
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +15 -4
- package/dist/evals/fetch-pom-skills-agent.evals.js +1 -1
- package/package.json +1 -1
- package/dist/actions/reload-page.d.ts +0 -4
- package/dist/actions/reload-page.d.ts.map +0 -1
- package/dist/actions/reload-page.js +0 -41
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.37.4
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 46e6f61: test: added test for fetch pom skills
|
|
8
|
+
- 5315b91: fix: remove support for page.reload action
|
|
9
|
+
- 9b4894a: fix: identification of agent stuck in loop
|
|
10
|
+
- 8c6bab2: fix: regex for extracting test steps suggestion
|
|
11
|
+
- 0128934: fix: added code in trace for master agent
|
|
12
|
+
|
|
3
13
|
## 0.37.3
|
|
4
14
|
|
|
5
15
|
### Patch Changes
|
package/dist/actions/index.d.ts
CHANGED
|
@@ -7,7 +7,7 @@ export declare class PlaywrightActions {
|
|
|
7
7
|
private actionGenerators;
|
|
8
8
|
private recordedActions;
|
|
9
9
|
constructor(page: Page, stateVariables?: Record<string, any>);
|
|
10
|
-
executeAction(name: string | undefined, args: Record<string, any>, trace?: TraceClient): Promise<
|
|
10
|
+
executeAction(name: string | undefined, args: Record<string, any>, trace?: TraceClient): Promise<string | undefined>;
|
|
11
11
|
getBrowsingActionSchemas(): ActionSchema[];
|
|
12
12
|
getMasterActionSchemas(): ActionSchema[];
|
|
13
13
|
generateCode(): {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,YAAY,EAA6B,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,YAAY,EAA6B,MAAM,UAAU,CAAC;AAWnE,qBAAa,iBAAiB;IAQ1B,OAAO,CAAC,IAAI;IACZ,OAAO,CAAC,cAAc;IARxB,OAAO,CAAC,gBAAgB,CAA8B;IACtD,OAAO,CAAC,eAAe,CAInB;gBAEM,IAAI,EAAE,IAAI,EACV,cAAc,GAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAM;IAgB5C,aAAa,CACjB,IAAI,oBAAa,EACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACzB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAgD9B,wBAAwB,IAAI,YAAY,EAAE;IAkB1C,sBAAsB,IAAI,YAAY,EAAE;IAUxC,YAAY,IAAI;QACd,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,EAAE,CAAC;KACvB;IAUD,gBAAgB,CAAC,KAAK,EAAE,MAAM;IAK9B;;;;;;OAMG;IACH,aAAa,IAAI,OAAO;IAQxB,UAAU;CAUX"}
|
package/dist/actions/index.js
CHANGED
|
@@ -10,7 +10,6 @@ const fill_1 = require("./fill");
|
|
|
10
10
|
const goto_1 = require("./goto");
|
|
11
11
|
const hover_1 = require("./hover");
|
|
12
12
|
const press_1 = require("./press");
|
|
13
|
-
const reload_page_1 = require("./reload-page");
|
|
14
13
|
const skill_1 = require("./skill");
|
|
15
14
|
const text_content_1 = require("./text-content");
|
|
16
15
|
class PlaywrightActions {
|
|
@@ -29,7 +28,6 @@ class PlaywrightActions {
|
|
|
29
28
|
press_1.pressActionGenerator,
|
|
30
29
|
done_1.doneActionGenerator,
|
|
31
30
|
assert_1.assertTextVisibilityActionGenerator,
|
|
32
|
-
reload_page_1.reloadActionGenerator,
|
|
33
31
|
text_content_1.textContentActionGenerator,
|
|
34
32
|
skill_1.skillActionGenerator,
|
|
35
33
|
];
|
|
@@ -71,6 +69,7 @@ class PlaywrightActions {
|
|
|
71
69
|
logger.log(`action: ${name} \ncode: ${code} \nreason: ${args.reason}`);
|
|
72
70
|
void testgenUpdatesReporter.sendMessage("```ts\n" + code + "\n```");
|
|
73
71
|
}
|
|
72
|
+
return code;
|
|
74
73
|
}
|
|
75
74
|
catch (e) {
|
|
76
75
|
actionSpan?.end({
|
|
@@ -90,7 +89,6 @@ class PlaywrightActions {
|
|
|
90
89
|
press_1.pressActionGenerator,
|
|
91
90
|
done_1.doneActionGenerator,
|
|
92
91
|
assert_1.assertTextVisibilityActionGenerator,
|
|
93
|
-
reload_page_1.reloadActionGenerator,
|
|
94
92
|
text_content_1.textContentActionGenerator,
|
|
95
93
|
]
|
|
96
94
|
.map((a) => a(this.page, {
|
|
@@ -115,7 +113,7 @@ class PlaywrightActions {
|
|
|
115
113
|
};
|
|
116
114
|
}
|
|
117
115
|
getLastCodeLines(count) {
|
|
118
|
-
const actions = this.recordedActions
|
|
116
|
+
const actions = this.recordedActions;
|
|
119
117
|
return actions.slice(-count).map((a) => a.code);
|
|
120
118
|
}
|
|
121
119
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAIhD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,MAAM,MAAM,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IACjE,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,KAAK,EACL,MAAM,EACN,MAAM,EACN,IAAI,EACJ,OAAO,EACP,GAAG,EACH,OAAO,GACR,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,IAAI,CAAC;IACX,OAAO,EAAE,oBAAoB,CAAC;IAC9B,GAAG,EAAE,GAAG,CAAC;IACT,OAAO,EAAE,iBAAiB,CAAC;CAC5B,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAIhD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,MAAM,MAAM,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IACjE,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,KAAK,EACL,MAAM,EACN,MAAM,EACN,IAAI,EACJ,OAAO,EACP,GAAG,EACH,OAAO,GACR,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,IAAI,CAAC;IACX,OAAO,EAAE,oBAAoB,CAAC;IAC9B,GAAG,EAAE,GAAG,CAAC;IACT,OAAO,EAAE,iBAAiB,CAAC;CAC5B,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,SAAS,CAAC,CAiJhC"}
|
|
@@ -12,11 +12,15 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
|
|
|
12
12
|
let isTaskDone = false;
|
|
13
13
|
const executedActions = [];
|
|
14
14
|
let lastActionExecTrace = "";
|
|
15
|
+
let generatedCodeSteps = [];
|
|
15
16
|
const tools = actions.getBrowsingActionSchemas();
|
|
16
17
|
const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
|
|
17
18
|
while (!isTaskDone) {
|
|
18
19
|
const browsingAgentSpan = trace?.span({
|
|
19
20
|
name: `browsing-agent`,
|
|
21
|
+
input: {
|
|
22
|
+
action,
|
|
23
|
+
},
|
|
20
24
|
});
|
|
21
25
|
if (await (0, session_1.shouldStopSession)()) {
|
|
22
26
|
break;
|
|
@@ -48,7 +52,11 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
|
|
|
48
52
|
if (isTaskDone) {
|
|
49
53
|
browsingAgentSpan?.event({ name: "task-done" });
|
|
50
54
|
browsingAgentSpan?.end({
|
|
51
|
-
output: {
|
|
55
|
+
output: {
|
|
56
|
+
taskDone: true,
|
|
57
|
+
reason: verificationAgentResp.reason,
|
|
58
|
+
code: generatedCodeSteps,
|
|
59
|
+
},
|
|
52
60
|
});
|
|
53
61
|
break;
|
|
54
62
|
}
|
|
@@ -98,7 +106,10 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
|
|
|
98
106
|
break;
|
|
99
107
|
}
|
|
100
108
|
try {
|
|
101
|
-
await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments), toolCallsSpan);
|
|
109
|
+
const code = await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments), toolCallsSpan);
|
|
110
|
+
if (code) {
|
|
111
|
+
generatedCodeSteps.push(code);
|
|
112
|
+
}
|
|
102
113
|
executedActions.push({
|
|
103
114
|
isError: false,
|
|
104
115
|
action: JSON.parse(toolCall.function.arguments)?.reason,
|
|
@@ -132,5 +143,6 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
|
|
|
132
143
|
}
|
|
133
144
|
}
|
|
134
145
|
}
|
|
146
|
+
return generatedCodeSteps;
|
|
135
147
|
}
|
|
136
148
|
exports.executeTaskUsingBrowsingAgent = executeTaskUsingBrowsingAgent;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"o1-completion.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/o1-completion.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EACL,qBAAqB,EACrB,0BAA0B,EAC1B,kBAAkB,EACnB,MAAM,4BAA4B,CAAC;AAKpC,wBAAsB,eAAe,CAAC,EACpC,QAAQ,EACR,KAAK,EACL,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,0BAA0B,EAAE,CAAC;IACvC,KAAK,EAAE,kBAAkB,EAAE,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,qBAAqB,GAAG,SAAS,CAAC,
|
|
1
|
+
{"version":3,"file":"o1-completion.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/o1-completion.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EACL,qBAAqB,EACrB,0BAA0B,EAC1B,kBAAkB,EACnB,MAAM,4BAA4B,CAAC;AAKpC,wBAAsB,eAAe,CAAC,EACpC,QAAQ,EACR,KAAK,EACL,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,0BAA0B,EAAE,CAAC;IACvC,KAAK,EAAE,kBAAkB,EAAE,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,qBAAqB,GAAG,SAAS,CAAC,CA8D7C"}
|
|
@@ -33,6 +33,7 @@ async function getO1Completion({ messages, tools, trace, }) {
|
|
|
33
33
|
- Do not respond with any backticks.
|
|
34
34
|
- The reason for action should also include what was been executed in the action.
|
|
35
35
|
`;
|
|
36
|
+
o1Span?.update({ input: [userInstruction] });
|
|
36
37
|
const response = (await llm.createChatCompletion({
|
|
37
38
|
messages: [userInstruction],
|
|
38
39
|
modelParameters: {
|
|
@@ -10,6 +10,7 @@ export declare const fetchPomSkills: ({ testCase, pomFiles, options, trace, }: {
|
|
|
10
10
|
filePath: string;
|
|
11
11
|
usageExample: string;
|
|
12
12
|
reason: string;
|
|
13
|
+
methodName: string;
|
|
13
14
|
}[]>;
|
|
14
15
|
export declare function getAppropriateSkills({ testCase, options, trace, }: {
|
|
15
16
|
testCase: TestCase;
|
|
@@ -20,5 +21,6 @@ export declare function getAppropriateSkills({ testCase, options, trace, }: {
|
|
|
20
21
|
filePath: string;
|
|
21
22
|
usageExample: string;
|
|
22
23
|
reason: string;
|
|
24
|
+
methodName: string;
|
|
23
25
|
}[]>;
|
|
24
26
|
//# sourceMappingURL=skills-retriever.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"skills-retriever.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/skills-retriever.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAYhE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG7D,eAAO,MAAM,cAAc;cAMf,QAAQ
|
|
1
|
+
{"version":3,"file":"skills-retriever.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/skills-retriever.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAYhE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG7D,eAAO,MAAM,cAAc;cAMf,QAAQ;;;;;;;;;;IA8CnB,CAAC;AAEF,wBAAsB,oBAAoB,CAAC,EACzC,QAAQ,EACR,OAAO,EACP,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB;;;;;;KA6BA"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/utils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG;IACjD,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CAiBF;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,GAAG;IACvD,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,cAAc,EAAE,MAAM,GAAG,SAAS,CAAC;IACnC,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CA8BF;AAED,wBAAgB,2BAA2B,CAAC,KAAK,EAAE,MAAM,GAAG;IAC1D,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/utils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG;IACjD,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CAiBF;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,GAAG;IACvD,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,cAAc,EAAE,MAAM,GAAG,SAAS,CAAC;IACnC,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CA8BF;AAED,wBAAgB,2BAA2B,CAAC,KAAK,EAAE,MAAM,GAAG;IAC1D,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACpB,EAAE,CAgBF"}
|
|
@@ -66,15 +66,16 @@ function extractAppendTestUpdates(input) {
|
|
|
66
66
|
exports.extractAppendTestUpdates = extractAppendTestUpdates;
|
|
67
67
|
function extractTestStepsSuggestions(input) {
|
|
68
68
|
const result = [];
|
|
69
|
-
const regex = /<subtask>(.*?)<\/subtask>[\s\S]*?<file_import_path>([\s\S]*?)<\/file_import_path>[\s\S]*?<usage_example>([\s\S]*?)<\/usage_example>[\s\S]*?<
|
|
69
|
+
const regex = /<subtask>(.*?)<\/subtask>[\s\S]*?<reason>([\s\S]*?)<\/reason>[\s\S]*?<file_import_path>([\s\S]*?)<\/file_import_path>[\s\S]*?<usage_example>([\s\S]*?)<\/usage_example>[\s\S]*?<method_name>([\s\S]*?)<\/method_name>/g;
|
|
70
70
|
let match;
|
|
71
71
|
while ((match = regex.exec(input)) !== null) {
|
|
72
|
-
const [, testStep, filePath, usageExample, reason] = match;
|
|
72
|
+
const [, testStep, filePath, usageExample, reason, methodName] = match;
|
|
73
73
|
result.push({
|
|
74
74
|
testStep: testStep?.trim() || "",
|
|
75
75
|
filePath: filePath?.trim() || "",
|
|
76
76
|
usageExample: usageExample?.trim() || "",
|
|
77
77
|
reason: reason?.trim() || "",
|
|
78
|
+
methodName: methodName?.trim() || "",
|
|
78
79
|
});
|
|
79
80
|
}
|
|
80
81
|
return result.filter((r) => !!r.filePath && !!r.usageExample);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAQrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;CACnB,2FAwFA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAQrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;CACnB,2FAwFA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAwTA"}
|
package/dist/agent/master/run.js
CHANGED
|
@@ -178,6 +178,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
178
178
|
const testGenSnapshotUpdatePromise = testGenReporter.sendCurrentView(buffer);
|
|
179
179
|
const pageScreenshot = buffer.toString("base64");
|
|
180
180
|
let output;
|
|
181
|
+
let generatedCodeSteps = [];
|
|
181
182
|
let annotations;
|
|
182
183
|
let testGenAnnotatedSnapshotUpdatePromise;
|
|
183
184
|
let annotatedPageScreenshot;
|
|
@@ -269,23 +270,29 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
269
270
|
void testGenReporter.sendMessage(output.reason);
|
|
270
271
|
logger.log(`Next Action: ${output.action}`);
|
|
271
272
|
if (toolCall.function.name === skill_1.SKILL_USAGE) {
|
|
272
|
-
await actions.executeAction(toolCall.function.name, args, masterAgentActionSpan);
|
|
273
|
+
const code = await actions.executeAction(toolCall.function.name, args, masterAgentActionSpan);
|
|
274
|
+
if (code) {
|
|
275
|
+
generatedCodeSteps.push(code);
|
|
276
|
+
}
|
|
273
277
|
}
|
|
274
278
|
else if (shouldTriggerHintsFlow && hintsExecutionCompletion) {
|
|
275
279
|
const toolCalls = hintsExecutionCompletion?.tool_calls || [];
|
|
276
280
|
for (const i in toolCalls) {
|
|
277
281
|
const currentToolCall = toolCalls[i];
|
|
278
|
-
await actions.executeAction(currentToolCall.function.name, {
|
|
282
|
+
const code = await actions.executeAction(currentToolCall.function.name, {
|
|
279
283
|
...JSON.parse(currentToolCall.function.arguments),
|
|
280
284
|
...args,
|
|
281
285
|
}, masterAgentActionSpan);
|
|
286
|
+
if (code) {
|
|
287
|
+
generatedCodeSteps.push(code);
|
|
288
|
+
}
|
|
282
289
|
}
|
|
283
290
|
if (actions.isStuckInLoop()) {
|
|
284
291
|
throw new Error("Agent is not able to figure out next action when using hints");
|
|
285
292
|
}
|
|
286
293
|
}
|
|
287
294
|
else {
|
|
288
|
-
await (0, browsing_1.executeTaskUsingBrowsingAgent)({
|
|
295
|
+
generatedCodeSteps = await (0, browsing_1.executeTaskUsingBrowsingAgent)({
|
|
289
296
|
trace: masterAgentActionSpan,
|
|
290
297
|
action: output.action,
|
|
291
298
|
logger,
|
|
@@ -343,7 +350,11 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
343
350
|
await testGenAnnotatedSnapshotUpdatePromise;
|
|
344
351
|
}
|
|
345
352
|
masterAgentSpan?.end({
|
|
346
|
-
output: {
|
|
353
|
+
output: {
|
|
354
|
+
action: output.action,
|
|
355
|
+
reason: output.reason,
|
|
356
|
+
code: generatedCodeSteps,
|
|
357
|
+
},
|
|
347
358
|
});
|
|
348
359
|
}
|
|
349
360
|
const { code, importPaths } = actions.generateCode();
|
|
@@ -24,7 +24,7 @@ const fetchSkillsAgentEvaluator = async ({ item, trace }) => {
|
|
|
24
24
|
{
|
|
25
25
|
name: "equality",
|
|
26
26
|
value: output.some((o) => {
|
|
27
|
-
return item.expectedOutput.some((e) => e.
|
|
27
|
+
return item.expectedOutput.some((e) => e.methodName === o.methodName);
|
|
28
28
|
})
|
|
29
29
|
? 1
|
|
30
30
|
: 0,
|
package/package.json
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"reload-page.d.ts","sourceRoot":"","sources":["../../src/actions/reload-page.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAIrD,eAAO,MAAM,6BAA6B,gBAAgB,CAAC;AAE3D,eAAO,MAAM,qBAAqB,EAAE,yBAiCnC,CAAC"}
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.reloadActionGenerator = exports.PLAYWRIGHT_RELOAD_ACTION_NAME = void 0;
|
|
4
|
-
const utils_1 = require("../agent/browsing/utils");
|
|
5
|
-
const constants_1 = require("./constants");
|
|
6
|
-
const utils_2 = require("./utils");
|
|
7
|
-
exports.PLAYWRIGHT_RELOAD_ACTION_NAME = "page_reload";
|
|
8
|
-
const reloadActionGenerator = (page) => {
|
|
9
|
-
return {
|
|
10
|
-
execute: async () => {
|
|
11
|
-
await page.reload();
|
|
12
|
-
await page.waitForTimeout(3000);
|
|
13
|
-
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
14
|
-
},
|
|
15
|
-
template: () => {
|
|
16
|
-
const code = `await ${(0, utils_2.getPageVarName)()}.reload();`;
|
|
17
|
-
return {
|
|
18
|
-
code,
|
|
19
|
-
};
|
|
20
|
-
},
|
|
21
|
-
name: exports.PLAYWRIGHT_RELOAD_ACTION_NAME,
|
|
22
|
-
schema: {
|
|
23
|
-
type: "function",
|
|
24
|
-
function: {
|
|
25
|
-
name: exports.PLAYWRIGHT_RELOAD_ACTION_NAME,
|
|
26
|
-
description: "reload the page by calling this method. Call this method only when a page reload is requested in the task.",
|
|
27
|
-
parameters: {
|
|
28
|
-
type: "object",
|
|
29
|
-
properties: {
|
|
30
|
-
reason: {
|
|
31
|
-
type: "string",
|
|
32
|
-
description: constants_1.DEFAULT_ACTION_REASON_PROMPT,
|
|
33
|
-
},
|
|
34
|
-
},
|
|
35
|
-
required: ["reason"],
|
|
36
|
-
},
|
|
37
|
-
},
|
|
38
|
-
},
|
|
39
|
-
};
|
|
40
|
-
};
|
|
41
|
-
exports.reloadActionGenerator = reloadActionGenerator;
|