@empiricalrun/test-gen 0.38.28 → 0.38.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/actions/skill.d.ts +1 -1
- package/dist/actions/skill.d.ts.map +1 -1
- package/dist/agent/master/element-annotation.d.ts +11 -0
- package/dist/agent/master/element-annotation.d.ts.map +1 -0
- package/dist/agent/master/element-annotation.js +134 -0
- package/dist/agent/master/next-action.d.ts +19 -0
- package/dist/agent/master/next-action.d.ts.map +1 -0
- package/dist/agent/master/next-action.js +161 -0
- package/dist/agent/master/planner.d.ts +15 -0
- package/dist/agent/master/planner.d.ts.map +1 -0
- package/dist/agent/master/planner.js +143 -0
- package/dist/agent/master/run.d.ts +1 -18
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +124 -175
- package/dist/agent/master/with-hints.d.ts.map +1 -1
- package/dist/agent/master/with-hints.js +3 -1
- package/dist/evals/master-agent.evals.js +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +13 -29
- package/dist/types/index.d.ts +0 -1
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +9 -1
- package/dist/agent/master/run-v2.d.ts +0 -88
- package/dist/agent/master/run-v2.d.ts.map +0 -1
- package/dist/agent/master/run-v2.js +0 -708
package/dist/agent/master/run.js
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.createTestUsingMasterAgent =
|
|
3
|
+
exports.createTestUsingMasterAgent = void 0;
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
|
-
const vision_1 = require("@empiricalrun/llm/vision");
|
|
6
5
|
const actions_1 = require("../../actions");
|
|
7
|
-
const next_task_1 = require("../../actions/next-task");
|
|
8
6
|
const skill_1 = require("../../actions/skill");
|
|
9
7
|
const utils_1 = require("../../actions/utils");
|
|
10
8
|
const logger_1 = require("../../bin/logger");
|
|
@@ -16,6 +14,8 @@ const browsing_1 = require("../browsing");
|
|
|
16
14
|
const utils_2 = require("../browsing/utils");
|
|
17
15
|
const skills_retriever_1 = require("../codegen/skills-retriever");
|
|
18
16
|
const run_time_planner_1 = require("../planner/run-time-planner");
|
|
17
|
+
const element_annotation_1 = require("./element-annotation");
|
|
18
|
+
const next_action_1 = require("./next-action");
|
|
19
19
|
const with_hints_1 = require("./with-hints");
|
|
20
20
|
const MAX_ERROR_COUNT = 2;
|
|
21
21
|
function getPageVariables(stateVariables) {
|
|
@@ -27,90 +27,6 @@ function getPageVariables(stateVariables) {
|
|
|
27
27
|
}, {});
|
|
28
28
|
return pages;
|
|
29
29
|
}
|
|
30
|
-
async function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, annotatedPageScreenshot, actions, disableSkills, useHints = false, annotations, }) {
|
|
31
|
-
const nextActionSpan = trace?.span({
|
|
32
|
-
name: "master-agent-next-action",
|
|
33
|
-
input: {
|
|
34
|
-
task,
|
|
35
|
-
executedActions,
|
|
36
|
-
failedActions,
|
|
37
|
-
pageUrl,
|
|
38
|
-
options,
|
|
39
|
-
pageScreenshot,
|
|
40
|
-
annotatedPageScreenshot,
|
|
41
|
-
disableSkills,
|
|
42
|
-
useHints,
|
|
43
|
-
skills: skill_1.testCaseSkills.getAvailableSkills(),
|
|
44
|
-
annotations,
|
|
45
|
-
},
|
|
46
|
-
});
|
|
47
|
-
const promptSpan = nextActionSpan?.span({
|
|
48
|
-
name: "master-agent-prompt",
|
|
49
|
-
});
|
|
50
|
-
const promptMessages = await (0, llm_1.getPrompt)("test-gen", {
|
|
51
|
-
task,
|
|
52
|
-
failedActions: failedActions.map((a) => a).join("\n"),
|
|
53
|
-
executedActions: executedActions.map((a) => a).join("\n"),
|
|
54
|
-
pageUrl,
|
|
55
|
-
annotations,
|
|
56
|
-
}, 24);
|
|
57
|
-
// assuming there is only one user message in the prompt. if there is a change in langfuse prompt format, this will need to be updated
|
|
58
|
-
const userMessage = promptMessages.filter((m) => m.role === "user")[0];
|
|
59
|
-
const systemMessage = promptMessages.filter((m) => m.role === "system")[0];
|
|
60
|
-
userMessage.content =
|
|
61
|
-
useHints && annotatedPageScreenshot
|
|
62
|
-
? (0, with_hints_1.getUserMessageWithForHints)({
|
|
63
|
-
userMessage: userMessage,
|
|
64
|
-
options,
|
|
65
|
-
pageScreenshot,
|
|
66
|
-
annotatedPageScreenshot,
|
|
67
|
-
})
|
|
68
|
-
: [
|
|
69
|
-
{
|
|
70
|
-
type: "text",
|
|
71
|
-
text: userMessage.content,
|
|
72
|
-
},
|
|
73
|
-
{
|
|
74
|
-
type: "image_url",
|
|
75
|
-
image_url: {
|
|
76
|
-
url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, pageScreenshot),
|
|
77
|
-
},
|
|
78
|
-
},
|
|
79
|
-
];
|
|
80
|
-
const messages = [
|
|
81
|
-
systemMessage,
|
|
82
|
-
userMessage,
|
|
83
|
-
];
|
|
84
|
-
const actionSchemas = disableSkills || skill_1.testCaseSkills.getAvailableSkills().length === 0
|
|
85
|
-
? []
|
|
86
|
-
: actions.getMasterActionSchemas();
|
|
87
|
-
const tools = [next_task_1.NextTaskAction.schema, ...actionSchemas];
|
|
88
|
-
promptSpan?.end({ output: { messages } });
|
|
89
|
-
llm =
|
|
90
|
-
llm ||
|
|
91
|
-
new llm_1.LLM({
|
|
92
|
-
provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
93
|
-
defaultModel: options?.model || constants_1.DEFAULT_MODEL,
|
|
94
|
-
});
|
|
95
|
-
const completion = await llm.createChatCompletion({
|
|
96
|
-
messages,
|
|
97
|
-
modelParameters: {
|
|
98
|
-
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
99
|
-
...options?.modelParameters,
|
|
100
|
-
tool_choice: "required",
|
|
101
|
-
temperature: 1,
|
|
102
|
-
},
|
|
103
|
-
trace: nextActionSpan,
|
|
104
|
-
traceName: "master-agent-llm",
|
|
105
|
-
// @ts-ignore
|
|
106
|
-
tools,
|
|
107
|
-
});
|
|
108
|
-
const toolCall = completion?.tool_calls?.[0];
|
|
109
|
-
nextActionSpan?.end({ output: toolCall });
|
|
110
|
-
return toolCall;
|
|
111
|
-
}
|
|
112
|
-
exports.getNextAction = getNextAction;
|
|
113
|
-
// let screenshotIndex = 0; // keeping this for local debugging
|
|
114
30
|
async function createTestUsingMasterAgent({ task, page, testCase, options, scopeVars, }) {
|
|
115
31
|
const useHints = options?.useHints || false;
|
|
116
32
|
const logger = new logger_1.CustomLogger({ useReporter: false });
|
|
@@ -146,12 +62,16 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
146
62
|
// we will be using google model for larger context window, in such cases 1 million tokens is not enough
|
|
147
63
|
maxTokens: options.modelProvider === "google" ? 3000000 : 1000000,
|
|
148
64
|
});
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
65
|
+
let skills = [];
|
|
66
|
+
if (testCase) {
|
|
67
|
+
//Fetching available skills
|
|
68
|
+
skills = await (0, skills_retriever_1.getAppropriateSkills)({
|
|
69
|
+
testCase,
|
|
70
|
+
trace,
|
|
71
|
+
//@ts-ignore
|
|
72
|
+
options,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
155
75
|
skill_1.testCaseSkills.updateSkills(skills);
|
|
156
76
|
const actions = new actions_1.PlaywrightActions(testGenPage, scopeVars);
|
|
157
77
|
await (0, utils_2.injectPwLocatorGenerator)(page);
|
|
@@ -160,6 +80,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
160
80
|
const masterAgentActions = [];
|
|
161
81
|
let failedActions = [];
|
|
162
82
|
let disableSkills = false;
|
|
83
|
+
//Run the loop until task is done or we have reached max retry limit
|
|
163
84
|
while (!isGivenTaskDone) {
|
|
164
85
|
if (await (0, session_1.shouldStopSession)()) {
|
|
165
86
|
break;
|
|
@@ -172,6 +93,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
172
93
|
failedActions,
|
|
173
94
|
},
|
|
174
95
|
});
|
|
96
|
+
//Provides a plan to master agent to execute, also verifies whether task is done or not
|
|
175
97
|
const plannerResp = await (0, run_time_planner_1.runtimePlanner)({
|
|
176
98
|
trace: masterAgentSpan,
|
|
177
99
|
task,
|
|
@@ -202,41 +124,11 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
202
124
|
const pageScreenshot = buffer.toString("base64");
|
|
203
125
|
let output;
|
|
204
126
|
let generatedCodeSteps = [];
|
|
205
|
-
let annotatedPageScreenshot;
|
|
206
|
-
let annotationKeys = [];
|
|
207
|
-
if (useHints) {
|
|
208
|
-
await page.waitForTimeout(2000);
|
|
209
|
-
annotationKeys = await page.evaluate(() => {
|
|
210
|
-
// @ts-ignore
|
|
211
|
-
// eslint-disable-next-line no-undef
|
|
212
|
-
window.annotationInstance = annotateClickableElements();
|
|
213
|
-
// @ts-ignore
|
|
214
|
-
return Object.keys(window.annotationInstance.annotations);
|
|
215
|
-
});
|
|
216
|
-
await page.waitForTimeout(2000);
|
|
217
|
-
const annonationBuffer = await page.screenshot({
|
|
218
|
-
//This is done to improve element annotation accuracy, anyways it doesn't annotate elements which are out of viewport
|
|
219
|
-
// fullPage: true,
|
|
220
|
-
// path: `screenshots/screenshot-${screenshotIndex++}.png`,
|
|
221
|
-
});
|
|
222
|
-
await page.evaluate(() => {
|
|
223
|
-
console.log({
|
|
224
|
-
// @ts-ignore
|
|
225
|
-
disable: window?.annotationInstance?.disable,
|
|
226
|
-
});
|
|
227
|
-
// @ts-ignore
|
|
228
|
-
if (window?.annotationInstance && window?.annotationInstance?.destroy) {
|
|
229
|
-
// @ts-ignore
|
|
230
|
-
window?.annotationInstance?.destroy();
|
|
231
|
-
}
|
|
232
|
-
});
|
|
233
|
-
await testGenReporter.sendCurrentView(annonationBuffer);
|
|
234
|
-
annotatedPageScreenshot = annonationBuffer.toString("base64");
|
|
235
|
-
}
|
|
236
127
|
if (await (0, session_1.shouldStopSession)()) {
|
|
237
128
|
break;
|
|
238
129
|
}
|
|
239
|
-
|
|
130
|
+
//Provides next action that needs to be taken
|
|
131
|
+
const toolCall = await (0, next_action_1.getNextAction)({
|
|
240
132
|
task,
|
|
241
133
|
executedActions: masterAgentActions,
|
|
242
134
|
failedActions,
|
|
@@ -245,11 +137,9 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
245
137
|
llm,
|
|
246
138
|
options,
|
|
247
139
|
pageScreenshot,
|
|
248
|
-
annotatedPageScreenshot,
|
|
249
140
|
actions,
|
|
250
141
|
disableSkills,
|
|
251
142
|
useHints,
|
|
252
|
-
annotations: annotationKeys,
|
|
253
143
|
});
|
|
254
144
|
if (await (0, session_1.shouldStopSession)()) {
|
|
255
145
|
break;
|
|
@@ -264,32 +154,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
264
154
|
action: JSON.parse(toolCall.function.arguments).action ||
|
|
265
155
|
JSON.parse(toolCall.function.arguments).skill,
|
|
266
156
|
reason: JSON.parse(toolCall.function.arguments).reason,
|
|
267
|
-
elementAnnotation: JSON.parse(toolCall.function.arguments)
|
|
268
|
-
?.elementAnnotation,
|
|
269
157
|
};
|
|
270
|
-
let shouldTriggerHintsFlow;
|
|
271
|
-
let hintsExecutionCompletion;
|
|
272
|
-
if (useHints) {
|
|
273
|
-
const triggerHintsFlowSpan = masterAgentActionSpan?.span({
|
|
274
|
-
name: "trigger-hints-flow",
|
|
275
|
-
input: {
|
|
276
|
-
outputFromGetNextAction: output,
|
|
277
|
-
generatedAnnotations: annotationKeys,
|
|
278
|
-
},
|
|
279
|
-
});
|
|
280
|
-
const result = await (0, with_hints_1.triggerHintsFlow)({
|
|
281
|
-
outputFromGetNextAction: output,
|
|
282
|
-
generatedAnnotations: annotationKeys,
|
|
283
|
-
page: testGenPage,
|
|
284
|
-
llm,
|
|
285
|
-
trace: triggerHintsFlowSpan,
|
|
286
|
-
});
|
|
287
|
-
shouldTriggerHintsFlow = result.shouldTriggerHintsFlow;
|
|
288
|
-
hintsExecutionCompletion = result.hintsExecutionCompletion;
|
|
289
|
-
triggerHintsFlowSpan?.end({
|
|
290
|
-
output: result,
|
|
291
|
-
});
|
|
292
|
-
}
|
|
293
158
|
void testGenReporter.sendMessage(output.reason);
|
|
294
159
|
logger.log(`Next Action: ${output.action}`);
|
|
295
160
|
if (toolCall.function.name === skill_1.SKILL_USAGE) {
|
|
@@ -298,33 +163,117 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
298
163
|
generatedCodeSteps.push(code);
|
|
299
164
|
}
|
|
300
165
|
}
|
|
301
|
-
else
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
166
|
+
else {
|
|
167
|
+
let shouldTriggerHintsFlow;
|
|
168
|
+
let hintsExecutionCompletion;
|
|
169
|
+
let annotationKeys = [];
|
|
170
|
+
let elementAnnotation;
|
|
171
|
+
if (useHints) {
|
|
172
|
+
await page.waitForTimeout(2000);
|
|
173
|
+
annotationKeys = await page.evaluate(() => {
|
|
174
|
+
// @ts-ignore
|
|
175
|
+
// eslint-disable-next-line no-undef
|
|
176
|
+
window.annotationInstance = annotateClickableElements();
|
|
177
|
+
const annotations = Object.entries(
|
|
178
|
+
// @ts-ignore
|
|
179
|
+
window.annotationInstance.annotations).map(([key, value]) => ({
|
|
180
|
+
elementID: key,
|
|
181
|
+
text:
|
|
182
|
+
//@ts-ignore
|
|
183
|
+
value.node.innerText?.trim() ||
|
|
184
|
+
//@ts-ignore
|
|
185
|
+
value.node.placeholder?.trim() ||
|
|
186
|
+
"NA",
|
|
187
|
+
}));
|
|
188
|
+
return annotations;
|
|
189
|
+
});
|
|
190
|
+
await page.waitForTimeout(2000);
|
|
191
|
+
let annotatedPageScreenshot;
|
|
192
|
+
const annonationBuffer = await page.screenshot({
|
|
193
|
+
//This is done to improve element annotation accuracy, anyways it doesn't annotate elements which are out of viewport
|
|
194
|
+
// fullPage: true,
|
|
195
|
+
// path: `screenshots/screenshot-${screenshotIndex++}.png`,
|
|
196
|
+
});
|
|
197
|
+
annotatedPageScreenshot = annonationBuffer.toString("base64");
|
|
198
|
+
await page.evaluate(() => {
|
|
199
|
+
console.log({
|
|
200
|
+
// @ts-ignore
|
|
201
|
+
disable: window?.annotationInstance?.disable,
|
|
202
|
+
});
|
|
203
|
+
if (
|
|
204
|
+
// @ts-ignore
|
|
205
|
+
window?.annotationInstance &&
|
|
206
|
+
// @ts-ignore
|
|
207
|
+
window?.annotationInstance?.destroy) {
|
|
208
|
+
// @ts-ignore
|
|
209
|
+
window?.annotationInstance?.destroy();
|
|
210
|
+
}
|
|
211
|
+
});
|
|
212
|
+
const annotationMapString = annotationKeys
|
|
213
|
+
?.map((a) => `${a.elementID}:${a.text}`)
|
|
214
|
+
.join("\n");
|
|
215
|
+
//Provides the annotations for all the element present on screen
|
|
216
|
+
//Also provides the annotation of element on which the action needs to be taken
|
|
217
|
+
elementAnnotation = await (0, element_annotation_1.getElementAnnotation)({
|
|
218
|
+
elementDescription: JSON.parse(toolCall.function.arguments)
|
|
219
|
+
.element_description,
|
|
220
|
+
annotations: annotationMapString,
|
|
221
|
+
annotatedScreenshot: annotatedPageScreenshot,
|
|
222
|
+
trace: masterAgentActionSpan,
|
|
223
|
+
llm,
|
|
224
|
+
});
|
|
225
|
+
output.elementAnnotation = elementAnnotation;
|
|
226
|
+
console.log("Output: ", output);
|
|
227
|
+
await testGenReporter.sendCurrentView(annonationBuffer);
|
|
228
|
+
const triggerHintsFlowSpan = masterAgentActionSpan?.span({
|
|
229
|
+
name: "trigger-hints-flow",
|
|
230
|
+
input: {
|
|
231
|
+
outputFromGetNextAction: output,
|
|
232
|
+
generatedAnnotations: annotationKeys,
|
|
233
|
+
},
|
|
234
|
+
});
|
|
235
|
+
//Provides the action whether its a click, fill etc.
|
|
236
|
+
const result = await (0, with_hints_1.triggerHintsFlow)({
|
|
237
|
+
outputFromGetNextAction: output,
|
|
238
|
+
generatedAnnotations: annotationKeys,
|
|
239
|
+
page: testGenPage,
|
|
240
|
+
llm,
|
|
241
|
+
trace: triggerHintsFlowSpan,
|
|
242
|
+
});
|
|
243
|
+
shouldTriggerHintsFlow = result.shouldTriggerHintsFlow;
|
|
244
|
+
hintsExecutionCompletion = result.hintsExecutionCompletion;
|
|
245
|
+
triggerHintsFlowSpan?.end({
|
|
246
|
+
output: result,
|
|
247
|
+
});
|
|
248
|
+
}
|
|
249
|
+
if (shouldTriggerHintsFlow && hintsExecutionCompletion) {
|
|
250
|
+
const toolCalls = hintsExecutionCompletion?.tool_calls || [];
|
|
251
|
+
for (const i in toolCalls) {
|
|
252
|
+
const currentToolCall = toolCalls[i];
|
|
253
|
+
const code = await actions.executeAction(currentToolCall.function.name, {
|
|
254
|
+
...JSON.parse(currentToolCall.function.arguments),
|
|
255
|
+
elementAnnotation,
|
|
256
|
+
}, masterAgentActionSpan);
|
|
257
|
+
if (code) {
|
|
258
|
+
generatedCodeSteps.push(code);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
if (actions.isStuckInLoop()) {
|
|
262
|
+
throw new Error("Agent is not able to figure out next action when using hints");
|
|
311
263
|
}
|
|
312
264
|
}
|
|
313
|
-
|
|
314
|
-
|
|
265
|
+
else {
|
|
266
|
+
generatedCodeSteps = await (0, browsing_1.executeTaskUsingBrowsingAgent)({
|
|
267
|
+
trace: masterAgentActionSpan,
|
|
268
|
+
action: output.action,
|
|
269
|
+
logger,
|
|
270
|
+
page,
|
|
271
|
+
options,
|
|
272
|
+
llm,
|
|
273
|
+
actions,
|
|
274
|
+
});
|
|
315
275
|
}
|
|
316
276
|
}
|
|
317
|
-
else {
|
|
318
|
-
generatedCodeSteps = await (0, browsing_1.executeTaskUsingBrowsingAgent)({
|
|
319
|
-
trace: masterAgentActionSpan,
|
|
320
|
-
action: output.action,
|
|
321
|
-
logger,
|
|
322
|
-
page,
|
|
323
|
-
options,
|
|
324
|
-
llm,
|
|
325
|
-
actions,
|
|
326
|
-
});
|
|
327
|
-
}
|
|
328
277
|
// resetting error count as there is a successful action
|
|
329
278
|
failedActions = [];
|
|
330
279
|
masterAgentActions.push(output.action);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAO5B,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,OAAO,MAAM,EAAE,GAAG,CAAC;UACnC,WAAW;SACZ,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,
|
|
1
|
+
{"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAO5B,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,OAAO,MAAM,EAAE,GAAG,CAAC;UACnC,WAAW;SACZ,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CA0GA,CAAC"}
|
|
@@ -40,7 +40,9 @@ const triggerHintsFlow = async ({ outputFromGetNextAction, generatedAnnotations,
|
|
|
40
40
|
try {
|
|
41
41
|
const hasElementAnnotation = outputFromGetNextAction?.elementAnnotation?.length &&
|
|
42
42
|
outputFromGetNextAction?.elementAnnotation?.trim()?.length &&
|
|
43
|
-
generatedAnnotations
|
|
43
|
+
generatedAnnotations.some(
|
|
44
|
+
//@ts-ignore
|
|
45
|
+
(annotation) => annotation.elementID === outputFromGetNextAction?.elementAnnotation);
|
|
44
46
|
trace?.event({
|
|
45
47
|
name: "has-element-annotation",
|
|
46
48
|
output: {
|
|
@@ -3,14 +3,14 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.masterGetNextActionEvaluator = void 0;
|
|
4
4
|
const actions_1 = require("../actions");
|
|
5
5
|
const skill_1 = require("../actions/skill");
|
|
6
|
-
const
|
|
6
|
+
const next_action_1 = require("../agent/master/next-action");
|
|
7
7
|
const page_1 = require("../page");
|
|
8
8
|
const masterGetNextActionEvaluator = async ({ item, trace, }) => {
|
|
9
9
|
const { task, executedActions, failedActions, pageUrl, options, pageScreenshot, annotatedPageScreenshot, disableSkills, useHints, skills = [], annotations, } = item.input;
|
|
10
10
|
const page = {};
|
|
11
11
|
skill_1.testCaseSkills.updateSkills(skills);
|
|
12
12
|
const actions = new actions_1.PlaywrightActions(new page_1.TestGenPage(page, "page"));
|
|
13
|
-
const output = await (0,
|
|
13
|
+
const output = await (0, next_action_1.getNextAction)({
|
|
14
14
|
task,
|
|
15
15
|
executedActions,
|
|
16
16
|
failedActions,
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAOpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAuC3E"}
|
package/dist/index.js
CHANGED
|
@@ -7,7 +7,6 @@ exports.createTest = void 0;
|
|
|
7
7
|
const llm_1 = require("@empiricalrun/llm");
|
|
8
8
|
const test_1 = __importDefault(require("playwright/test"));
|
|
9
9
|
const run_1 = require("./agent/master/run");
|
|
10
|
-
const run_v2_1 = require("./agent/master/run-v2");
|
|
11
10
|
const utils_1 = require("./bin/utils");
|
|
12
11
|
const client_1 = __importDefault(require("./file/client"));
|
|
13
12
|
const reporter_1 = require("./reporter");
|
|
@@ -34,35 +33,20 @@ async function createTest(task, page, scope) {
|
|
|
34
33
|
projectRepoName: testGenConfig.options?.metadata.projectRepoName,
|
|
35
34
|
});
|
|
36
35
|
const fileService = new client_1.default(Number(port));
|
|
37
|
-
const { code, importPaths } =
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
],
|
|
47
|
-
},
|
|
48
|
-
...testGenConfig.options,
|
|
49
|
-
},
|
|
50
|
-
scopeVars: scope,
|
|
51
|
-
})
|
|
52
|
-
: await (0, run_1.createTestUsingMasterAgent)({
|
|
53
|
-
testCase: testGenConfig.testCase,
|
|
54
|
-
page,
|
|
55
|
-
task,
|
|
56
|
-
options: {
|
|
57
|
-
htmlSanitize: {
|
|
58
|
-
disallowedStrings: [
|
|
59
|
-
"v-data-table__td v-data-table-column--align-start",
|
|
60
|
-
],
|
|
61
|
-
},
|
|
62
|
-
...testGenConfig.options,
|
|
36
|
+
const { code, importPaths } = await (0, run_1.createTestUsingMasterAgent)({
|
|
37
|
+
testCase: testGenConfig.testCase,
|
|
38
|
+
page,
|
|
39
|
+
task,
|
|
40
|
+
options: {
|
|
41
|
+
htmlSanitize: {
|
|
42
|
+
disallowedStrings: [
|
|
43
|
+
"v-data-table__td v-data-table-column--align-start",
|
|
44
|
+
],
|
|
63
45
|
},
|
|
64
|
-
|
|
65
|
-
}
|
|
46
|
+
...testGenConfig.options,
|
|
47
|
+
},
|
|
48
|
+
scopeVars: scope,
|
|
49
|
+
});
|
|
66
50
|
await fileService.updateTest({
|
|
67
51
|
task,
|
|
68
52
|
generatedCode: code,
|
package/dist/types/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,EACR,WAAW,EACX,eAAe,EACf,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAC3B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAEtC,MAAM,MAAM,WAAW,GAAG;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,kBAAkB,EAAE,MAAM,EAAE,CAAC;CAC9B,CAAC;AAEF,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAC;AAExD,MAAM,MAAM,oBAAoB,GAAG;IACjC,KAAK,EAAE,KAAK,CAAC;IACb,KAAK,EAAE,QAAQ,CAAC;IAChB,aAAa,EAAE,WAAW,CAAC;IAC3B,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,EACR,WAAW,EACX,eAAe,EACf,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAC3B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAEtC,MAAM,MAAM,WAAW,GAAG;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,kBAAkB,EAAE,MAAM,EAAE,CAAC;CAC9B,CAAC;AAEF,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAC;AAExD,MAAM,MAAM,oBAAoB,GAAG;IACjC,KAAK,EAAE,KAAK,CAAC;IACb,KAAK,EAAE,QAAQ,CAAC;IAChB,aAAa,EAAE,WAAW,CAAC;IAC3B,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,QAAQ,EAAE;QACR,aAAa,EAAE,MAAM,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,MAAM,CAAC;QACrB,eAAe,EAAE,MAAM,CAAC;QACxB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,aAAa,GAAG,YAAY,CAAC;KAC3C,CAAC;CACH,CAAC;AAEF,MAAM,MAAM,KAAK,GAAG;IAClB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,QAAQ,GAAG;IACrB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG,CACtC,IAAI,EAAE,WAAW,EACjB,OAAO,EAAE;IACP,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IACpC,iBAAiB,EAAE,CAAC,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KAAK,IAAI,CAAC;CAClE,KACE,MAAM,CAAC;AAEZ,MAAM,MAAM,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC;AAEtE,MAAM,MAAM,MAAM,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,YAAY,CAAC;IACrB,OAAO,EAAE,CAAC,OAAO,EAAE;QACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC1B,KAAK,CAAC,EAAE,WAAW,CAAC;KACrB,KAAK,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC,CAAC;IAC1C,QAAQ,EAAE,CACR,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACzB,OAAO,EAAE;QAAE,OAAO,EAAE,MAAM,CAAA;KAAE,KACzB;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CAC5C,CAAC;AAEF,MAAM,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.38.
|
|
3
|
+
"version": "0.38.30",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -18,6 +18,14 @@
|
|
|
18
18
|
"types": "./dist/types/index.d.ts",
|
|
19
19
|
"default": "./dist/types/index.js"
|
|
20
20
|
},
|
|
21
|
+
"./agent/master/run": {
|
|
22
|
+
"types": "./dist/agent/master/run.d.ts",
|
|
23
|
+
"default": "./dist/agent/master/run.js"
|
|
24
|
+
},
|
|
25
|
+
"./agent/master/planner": {
|
|
26
|
+
"types": "./dist/agent/master/planner.d.ts",
|
|
27
|
+
"default": "./dist/agent/master/planner.js"
|
|
28
|
+
},
|
|
21
29
|
".": {
|
|
22
30
|
"types": "./dist/index.d.ts",
|
|
23
31
|
"default": "./dist/index.js"
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
import { LLM, TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
import OpenAI from "openai";
|
|
3
|
-
import { Page } from "playwright";
|
|
4
|
-
import { PlaywrightActions } from "../../actions";
|
|
5
|
-
import { TestGenPage } from "../../page";
|
|
6
|
-
import { ScopeVars, TestCase } from "../../types";
|
|
7
|
-
import { BrowsingAgentOptions } from "../browsing";
|
|
8
|
-
export declare const AnnotationToolAction: {
|
|
9
|
-
name: string;
|
|
10
|
-
schema: {
|
|
11
|
-
type: string;
|
|
12
|
-
function: {
|
|
13
|
-
name: string;
|
|
14
|
-
description: string;
|
|
15
|
-
parameters: {
|
|
16
|
-
type: string;
|
|
17
|
-
properties: {
|
|
18
|
-
enriched_annotations: {
|
|
19
|
-
type: string;
|
|
20
|
-
description: string;
|
|
21
|
-
items: {
|
|
22
|
-
type: string;
|
|
23
|
-
properties: {
|
|
24
|
-
element_id: {
|
|
25
|
-
type: string;
|
|
26
|
-
description: string;
|
|
27
|
-
};
|
|
28
|
-
description: {
|
|
29
|
-
type: string;
|
|
30
|
-
description: string;
|
|
31
|
-
};
|
|
32
|
-
};
|
|
33
|
-
required: string[];
|
|
34
|
-
};
|
|
35
|
-
};
|
|
36
|
-
reason: {
|
|
37
|
-
type: string;
|
|
38
|
-
description: string;
|
|
39
|
-
};
|
|
40
|
-
element_annotation: {
|
|
41
|
-
type: string;
|
|
42
|
-
description: string;
|
|
43
|
-
};
|
|
44
|
-
};
|
|
45
|
-
required: string[];
|
|
46
|
-
};
|
|
47
|
-
};
|
|
48
|
-
};
|
|
49
|
-
};
|
|
50
|
-
export declare function getNextActionV2({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, actions, disableSkills, useHints, }: {
|
|
51
|
-
task: string;
|
|
52
|
-
executedActions: string[];
|
|
53
|
-
failedActions: any[];
|
|
54
|
-
pageUrl: string;
|
|
55
|
-
trace?: TraceClient;
|
|
56
|
-
llm?: LLM;
|
|
57
|
-
options?: BrowsingAgentOptions;
|
|
58
|
-
pageScreenshot: string;
|
|
59
|
-
annotatedPageScreenshot?: string;
|
|
60
|
-
actions: PlaywrightActions;
|
|
61
|
-
disableSkills: boolean;
|
|
62
|
-
useHints: boolean;
|
|
63
|
-
annotations?: string[];
|
|
64
|
-
}): Promise<import("openai/resources/index.mjs").ChatCompletionMessageToolCall | undefined>;
|
|
65
|
-
export declare const triggerHintsFlowV2: ({ outputFromGetNextAction, generatedAnnotations, page, llm, trace, }: {
|
|
66
|
-
outputFromGetNextAction: {
|
|
67
|
-
action: string;
|
|
68
|
-
elementAnnotation?: string;
|
|
69
|
-
};
|
|
70
|
-
generatedAnnotations: Record<string, any>;
|
|
71
|
-
page: TestGenPage;
|
|
72
|
-
llm: LLM;
|
|
73
|
-
trace?: TraceClient | undefined;
|
|
74
|
-
}) => Promise<{
|
|
75
|
-
shouldTriggerHintsFlow: boolean;
|
|
76
|
-
hintsExecutionCompletion: OpenAI.ChatCompletionMessage | undefined;
|
|
77
|
-
}>;
|
|
78
|
-
export declare function createTestUsingMasterAgentV2({ task, page, testCase, options, scopeVars, }: {
|
|
79
|
-
task: string;
|
|
80
|
-
page: Page;
|
|
81
|
-
testCase: TestCase;
|
|
82
|
-
options: BrowsingAgentOptions;
|
|
83
|
-
scopeVars?: ScopeVars;
|
|
84
|
-
}): Promise<{
|
|
85
|
-
code: string;
|
|
86
|
-
importPaths: string[];
|
|
87
|
-
}>;
|
|
88
|
-
//# sourceMappingURL=run-v2.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"run-v2.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run-v2.ts"],"names":[],"mappings":"AAAA,OAAO,EAAoB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEvE,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAalD,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAGzC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAmBrB,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4ChC,CAAC;AA8GF,wBAAsB,eAAe,CAAC,EACpC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,OAAO,EACP,aAAa,EACb,QAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;CACxB,2FAmKA;AAED,eAAO,MAAM,kBAAkB;6BAOJ;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,OAAO,MAAM,EAAE,GAAG,CAAC;UACnC,WAAW;SACZ,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CA0GA,CAAC;AAGF,wBAAsB,4BAA4B,CAAC,EACjD,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAkVA"}
|