@empiricalrun/test-gen 0.38.28 → 0.38.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/actions/skill.d.ts +1 -1
- package/dist/actions/skill.d.ts.map +1 -1
- package/dist/agent/master/element-annotation.d.ts +11 -0
- package/dist/agent/master/element-annotation.d.ts.map +1 -0
- package/dist/agent/master/element-annotation.js +134 -0
- package/dist/agent/master/next-action.d.ts +19 -0
- package/dist/agent/master/next-action.d.ts.map +1 -0
- package/dist/agent/master/next-action.js +161 -0
- package/dist/agent/master/planner.d.ts +15 -0
- package/dist/agent/master/planner.d.ts.map +1 -0
- package/dist/agent/master/planner.js +143 -0
- package/dist/agent/master/run.d.ts +1 -18
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +124 -175
- package/dist/agent/master/with-hints.d.ts.map +1 -1
- package/dist/agent/master/with-hints.js +3 -1
- package/dist/evals/master-agent.evals.js +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +13 -29
- package/dist/types/index.d.ts +0 -1
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +9 -1
- package/dist/agent/master/run-v2.d.ts +0 -88
- package/dist/agent/master/run-v2.d.ts.map +0 -1
- package/dist/agent/master/run-v2.js +0 -708
|
@@ -1,708 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.createTestUsingMasterAgentV2 = exports.triggerHintsFlowV2 = exports.getNextActionV2 = exports.AnnotationToolAction = void 0;
|
|
4
|
-
const llm_1 = require("@empiricalrun/llm");
|
|
5
|
-
const vision_1 = require("@empiricalrun/llm/vision");
|
|
6
|
-
const actions_1 = require("../../actions");
|
|
7
|
-
const click_1 = require("../../actions/click");
|
|
8
|
-
const fill_1 = require("../../actions/fill");
|
|
9
|
-
const hover_1 = require("../../actions/hover");
|
|
10
|
-
const skill_1 = require("../../actions/skill");
|
|
11
|
-
const utils_1 = require("../../actions/utils");
|
|
12
|
-
const logger_1 = require("../../bin/logger");
|
|
13
|
-
const constants_1 = require("../../constants");
|
|
14
|
-
const page_1 = require("../../page");
|
|
15
|
-
const reporter_1 = require("../../reporter");
|
|
16
|
-
const session_1 = require("../../session");
|
|
17
|
-
const browsing_1 = require("../browsing");
|
|
18
|
-
const utils_2 = require("../browsing/utils");
|
|
19
|
-
const skills_retriever_1 = require("../codegen/skills-retriever");
|
|
20
|
-
const run_time_planner_1 = require("../planner/run-time-planner");
|
|
21
|
-
const MAX_ERROR_COUNT = 2;
|
|
22
|
-
function getPageVariables(stateVariables) {
|
|
23
|
-
const keys = Object.keys(stateVariables);
|
|
24
|
-
const pageVariables = keys.filter((key) => key.endsWith("Page") || key.endsWith("page"));
|
|
25
|
-
const pages = pageVariables.reduce((acc, key) => {
|
|
26
|
-
acc[key] = stateVariables[key];
|
|
27
|
-
return acc;
|
|
28
|
-
}, {});
|
|
29
|
-
return pages;
|
|
30
|
-
}
|
|
31
|
-
exports.AnnotationToolAction = {
|
|
32
|
-
name: "element_annotation",
|
|
33
|
-
schema: {
|
|
34
|
-
type: "function",
|
|
35
|
-
function: {
|
|
36
|
-
name: "element_annotation",
|
|
37
|
-
description: "Handles annotations for elements and enriched annotations.",
|
|
38
|
-
parameters: {
|
|
39
|
-
type: "object",
|
|
40
|
-
properties: {
|
|
41
|
-
enriched_annotations: {
|
|
42
|
-
type: "array",
|
|
43
|
-
description: "List of enriched annotations for elements.",
|
|
44
|
-
items: {
|
|
45
|
-
type: "object",
|
|
46
|
-
properties: {
|
|
47
|
-
element_id: {
|
|
48
|
-
type: "string",
|
|
49
|
-
description: "Unique element ID for the element.",
|
|
50
|
-
},
|
|
51
|
-
description: {
|
|
52
|
-
type: "string",
|
|
53
|
-
description: "Detailed description for the element. Must not be NA",
|
|
54
|
-
},
|
|
55
|
-
},
|
|
56
|
-
required: ["element_id", "description"],
|
|
57
|
-
},
|
|
58
|
-
},
|
|
59
|
-
reason: {
|
|
60
|
-
type: "string",
|
|
61
|
-
description: "Explain why this element is selected. The reason should be clear and align with the task or purpose.",
|
|
62
|
-
},
|
|
63
|
-
element_annotation: {
|
|
64
|
-
type: "string",
|
|
65
|
-
description: "Return the unique element ID for the element on which the action needs to be performed.",
|
|
66
|
-
},
|
|
67
|
-
},
|
|
68
|
-
required: ["enriched_annotations", "reason", "element_annotation"],
|
|
69
|
-
},
|
|
70
|
-
},
|
|
71
|
-
},
|
|
72
|
-
};
|
|
73
|
-
async function getElementAnnotation({ elementDescription, annotations, annotatedScreenshot, trace, llm, options, }) {
|
|
74
|
-
const annotationsSpan = trace?.span({
|
|
75
|
-
name: "get-element-annotation",
|
|
76
|
-
input: {
|
|
77
|
-
elementDescription,
|
|
78
|
-
annotations,
|
|
79
|
-
annotatedScreenshot,
|
|
80
|
-
},
|
|
81
|
-
});
|
|
82
|
-
const systemMessage = {
|
|
83
|
-
role: "system",
|
|
84
|
-
content: `You are an expert in describing the images and it's content. You need to provide the descriptions of annotated elements present in the image.
|
|
85
|
-
|
|
86
|
-
You will be provided with an annotated screenshot where interact-able / clickable elements are annotated. The annotation is done by drawing a red box around the element and a small yellow box on it which contains unique element id.
|
|
87
|
-
|
|
88
|
-
You are given a Annotations which contains list of unique element id and description of the element separated by ":".
|
|
89
|
-
|
|
90
|
-
You are also given the description of the element on which the action needs to be taken. The description includes information about how the element looks, it's position etc.
|
|
91
|
-
|
|
92
|
-
Your task is to provide the annotation of the element on which the action needs to be performed based on the element description.
|
|
93
|
-
|
|
94
|
-
Follow steps to fulfil your task:
|
|
95
|
-
- Using the list of all element Ids provided to you, map all the element Ids on the annotated screen and describe each element.
|
|
96
|
-
- For describing each element Id
|
|
97
|
-
-- iterate over each element Id in annotation list
|
|
98
|
-
-- check if the description is already present for the element Id in the Annotation provided to you. If present skip describing it and use it as is.
|
|
99
|
-
-- if the description is NA, then identify the element in the annotated screenshot and describe it using the image or icon enclosed in the element.
|
|
100
|
-
- Respond with the mapped element Ids as "enriched_annotations"
|
|
101
|
-
- Based on the description provided to you and the enriched annotations, first identify the element Id whose description matches the task provided
|
|
102
|
-
|
|
103
|
-
Note:
|
|
104
|
-
- Ensure providing the description of all the elements in the list.
|
|
105
|
-
- Don't update the description if its already present in the given annotations
|
|
106
|
-
- Replace all the "NA" with description of the element. Its position, how does it look like etc.
|
|
107
|
-
- There should be no "NA" present in any of the element description
|
|
108
|
-
`,
|
|
109
|
-
};
|
|
110
|
-
const userMessage = {
|
|
111
|
-
role: "user",
|
|
112
|
-
content: [
|
|
113
|
-
{
|
|
114
|
-
type: "text",
|
|
115
|
-
text: `
|
|
116
|
-
Element description:
|
|
117
|
-
${elementDescription}
|
|
118
|
-
|
|
119
|
-
Annotations:
|
|
120
|
-
${annotations}`,
|
|
121
|
-
},
|
|
122
|
-
{
|
|
123
|
-
type: "image_url",
|
|
124
|
-
image_url: {
|
|
125
|
-
url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, annotatedScreenshot),
|
|
126
|
-
},
|
|
127
|
-
},
|
|
128
|
-
],
|
|
129
|
-
};
|
|
130
|
-
const messages = [
|
|
131
|
-
systemMessage,
|
|
132
|
-
userMessage,
|
|
133
|
-
];
|
|
134
|
-
llm =
|
|
135
|
-
llm ||
|
|
136
|
-
new llm_1.LLM({
|
|
137
|
-
provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
138
|
-
defaultModel: options?.model || constants_1.DEFAULT_MODEL,
|
|
139
|
-
});
|
|
140
|
-
const completion = await llm.createChatCompletion({
|
|
141
|
-
messages,
|
|
142
|
-
modelParameters: {
|
|
143
|
-
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
144
|
-
...options?.modelParameters,
|
|
145
|
-
tool_choice: "required",
|
|
146
|
-
temperature: 1,
|
|
147
|
-
},
|
|
148
|
-
trace: annotationsSpan,
|
|
149
|
-
traceName: "get-element-from-action",
|
|
150
|
-
//@ts-ignore
|
|
151
|
-
tools: [exports.AnnotationToolAction.schema],
|
|
152
|
-
});
|
|
153
|
-
const toolCall = completion?.tool_calls?.[0];
|
|
154
|
-
annotationsSpan?.end({ output: toolCall });
|
|
155
|
-
const args = JSON.parse(toolCall.function.arguments);
|
|
156
|
-
return args.element_annotation;
|
|
157
|
-
}
|
|
158
|
-
async function getNextActionV2({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, actions, disableSkills, useHints = false, }) {
|
|
159
|
-
const nextActionSpan = trace?.span({
|
|
160
|
-
name: "master-agent-next-action",
|
|
161
|
-
input: {
|
|
162
|
-
task,
|
|
163
|
-
executedActions,
|
|
164
|
-
failedActions,
|
|
165
|
-
pageUrl,
|
|
166
|
-
options,
|
|
167
|
-
pageScreenshot,
|
|
168
|
-
disableSkills,
|
|
169
|
-
useHints,
|
|
170
|
-
skills: skill_1.testCaseSkills.getAvailableSkills(),
|
|
171
|
-
},
|
|
172
|
-
});
|
|
173
|
-
const promptSpan = nextActionSpan?.span({
|
|
174
|
-
name: "master-agent-prompt",
|
|
175
|
-
});
|
|
176
|
-
// assuming there is only one user message in the prompt. if there is a change in langfuse prompt format, this will need to be updated
|
|
177
|
-
const systemMessage = {
|
|
178
|
-
role: "system",
|
|
179
|
-
content: `You are an web automation tool which is given a task to complete. You need to execute the task provided to you with the help of web page screenshot, a browser automation tool or skills which are learnt while writing previous tests.
|
|
180
|
-
|
|
181
|
-
Browser automation tool is a tool which uses Playwright and browser to execute action using next_action tool call.
|
|
182
|
-
Skill usage is a tool which helps to execute previously known pieces of code to achieve a task.
|
|
183
|
-
|
|
184
|
-
You will be provided with a screenshot of the webpage which you will use to extract the action that needs to be taken.
|
|
185
|
-
|
|
186
|
-
You will be provided with previously executed actions by the browser automation tool and based on the current screenshot and previously executed actions, you need to predict the next action to be taken.
|
|
187
|
-
|
|
188
|
-
You will also be provided with failed next action predicted by you, so that you can avoid suggesting the same action again - which failed.
|
|
189
|
-
|
|
190
|
-
The next action should be as atomic as possible.
|
|
191
|
-
e.g: click on an element, fill an input element, assert, extract text from an element are valid next action as they are atomic in nature.
|
|
192
|
-
|
|
193
|
-
You will also be provided with skill usage tool which you can use to execute action. These skills are compound functions which helps you to complete your action.
|
|
194
|
-
|
|
195
|
-
You need to respond with either:
|
|
196
|
-
- Next action to be taken by a browser automation tool
|
|
197
|
-
- Use previously learnt skills in the form of tool call.
|
|
198
|
-
|
|
199
|
-
You need to make a decision whether the given skill can be reused if "YES" respond with the skill else respond with the next action.`,
|
|
200
|
-
};
|
|
201
|
-
const userMessage = {
|
|
202
|
-
role: "user",
|
|
203
|
-
content: [
|
|
204
|
-
{
|
|
205
|
-
type: "text",
|
|
206
|
-
text: `Task:
|
|
207
|
-
${task}
|
|
208
|
-
|
|
209
|
-
----
|
|
210
|
-
|
|
211
|
-
Previous executed actions:
|
|
212
|
-
${executedActions.map((a) => a).join("\n")}
|
|
213
|
-
|
|
214
|
-
----
|
|
215
|
-
|
|
216
|
-
Previous failed actions:
|
|
217
|
-
${failedActions.map((a) => a).join("\n")}
|
|
218
|
-
|
|
219
|
-
----
|
|
220
|
-
You are also provided with a page screenshot for you to decide the next action.
|
|
221
|
-
|
|
222
|
-
Current page URL: ${pageUrl}
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
Follow the instructions before responding:
|
|
226
|
-
- Divide the task into sub tasks
|
|
227
|
-
- Using previously executed actions, identify tasks are complete and which tasks needs to be executed next.
|
|
228
|
-
- You will be provided a skill usage action, if the testStep matches the next action then respond with the skill usage.
|
|
229
|
-
- If responding with next action, ensure next action to be detailed and explicit about what action needs to be done. Provide all the information which can be extracted from the screenshot as a part of next action.
|
|
230
|
-
- Mark task as complete only when executed actions provided to you indicates that the task is done.
|
|
231
|
-
- Refer to the text and references available in the screenshot to create the next action.
|
|
232
|
-
- Do not take any extra actions which are not required for the execution of the task
|
|
233
|
-
- If there are no further actions required based on the task, then respond with task as done.
|
|
234
|
-
- Do not recommend actions which are not available in the screenshot
|
|
235
|
-
`,
|
|
236
|
-
},
|
|
237
|
-
{
|
|
238
|
-
type: "text",
|
|
239
|
-
text: "Screenshot in normal mode 👇",
|
|
240
|
-
},
|
|
241
|
-
{
|
|
242
|
-
type: "image_url",
|
|
243
|
-
image_url: {
|
|
244
|
-
url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, pageScreenshot),
|
|
245
|
-
},
|
|
246
|
-
},
|
|
247
|
-
],
|
|
248
|
-
};
|
|
249
|
-
const messages = [
|
|
250
|
-
systemMessage,
|
|
251
|
-
userMessage,
|
|
252
|
-
];
|
|
253
|
-
const actionSchemas = disableSkills || skill_1.testCaseSkills.getAvailableSkills().length === 0
|
|
254
|
-
? []
|
|
255
|
-
: actions.getMasterActionSchemas();
|
|
256
|
-
const actionToolCall = {
|
|
257
|
-
name: "next_task",
|
|
258
|
-
schema: {
|
|
259
|
-
type: "function",
|
|
260
|
-
function: {
|
|
261
|
-
name: "next_task",
|
|
262
|
-
description: "take the next action base on the provided task",
|
|
263
|
-
parameters: {
|
|
264
|
-
type: "object",
|
|
265
|
-
properties: {
|
|
266
|
-
reason: {
|
|
267
|
-
type: "string",
|
|
268
|
-
description: "explain how this action will help to complete the task. the reason should align with the task provided",
|
|
269
|
-
},
|
|
270
|
-
action: {
|
|
271
|
-
type: "string",
|
|
272
|
-
description: `explain the next action in natural language.
|
|
273
|
-
The next action should be as atomic as possible, precise and should contain enough details about the action to be performed.
|
|
274
|
-
E.g. each click, key press, input, assert should be a separate action.
|
|
275
|
-
Each action should take the task to completion, if not the action is invalid.`,
|
|
276
|
-
},
|
|
277
|
-
element_description: {
|
|
278
|
-
type: "string",
|
|
279
|
-
description: "The description of the element on which action needs to be taken, including its position, appearance, etc.",
|
|
280
|
-
},
|
|
281
|
-
},
|
|
282
|
-
required: ["reason", "action", "element_description"],
|
|
283
|
-
},
|
|
284
|
-
},
|
|
285
|
-
},
|
|
286
|
-
};
|
|
287
|
-
const tools = [actionToolCall.schema, ...actionSchemas];
|
|
288
|
-
promptSpan?.end({ output: { messages } });
|
|
289
|
-
llm =
|
|
290
|
-
llm ||
|
|
291
|
-
new llm_1.LLM({
|
|
292
|
-
provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
293
|
-
defaultModel: options?.model || constants_1.DEFAULT_MODEL,
|
|
294
|
-
});
|
|
295
|
-
const completion = await llm.createChatCompletion({
|
|
296
|
-
messages,
|
|
297
|
-
modelParameters: {
|
|
298
|
-
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
299
|
-
...options?.modelParameters,
|
|
300
|
-
tool_choice: "required",
|
|
301
|
-
temperature: 1,
|
|
302
|
-
},
|
|
303
|
-
trace: nextActionSpan,
|
|
304
|
-
traceName: "master-agent-llm",
|
|
305
|
-
// @ts-ignore
|
|
306
|
-
tools,
|
|
307
|
-
});
|
|
308
|
-
const toolCall = completion?.tool_calls?.[0];
|
|
309
|
-
nextActionSpan?.end({ output: toolCall });
|
|
310
|
-
return toolCall;
|
|
311
|
-
}
|
|
312
|
-
exports.getNextActionV2 = getNextActionV2;
|
|
313
|
-
const triggerHintsFlowV2 = async ({ outputFromGetNextAction, generatedAnnotations, page, llm, trace, }) => {
|
|
314
|
-
try {
|
|
315
|
-
const hasElementAnnotation = outputFromGetNextAction?.elementAnnotation?.length &&
|
|
316
|
-
outputFromGetNextAction?.elementAnnotation?.trim()?.length &&
|
|
317
|
-
generatedAnnotations.some(
|
|
318
|
-
//@ts-ignore
|
|
319
|
-
(annotation) => annotation.elementID === outputFromGetNextAction?.elementAnnotation);
|
|
320
|
-
trace?.event({
|
|
321
|
-
name: "has-element-annotation",
|
|
322
|
-
output: {
|
|
323
|
-
hasElementAnnotation,
|
|
324
|
-
generatedAnnotations,
|
|
325
|
-
},
|
|
326
|
-
});
|
|
327
|
-
if (!hasElementAnnotation) {
|
|
328
|
-
return {
|
|
329
|
-
shouldTriggerHintsFlow: false,
|
|
330
|
-
hintsExecutionCompletion: undefined,
|
|
331
|
-
};
|
|
332
|
-
}
|
|
333
|
-
const actions = new actions_1.PlaywrightActions(page);
|
|
334
|
-
const completion = await llm.createChatCompletion({
|
|
335
|
-
messages: [
|
|
336
|
-
{
|
|
337
|
-
role: "system",
|
|
338
|
-
content: `You are a browser automation agent who is given a task to generate code for navigation and assertion. This task is your goal and you must achieve it.
|
|
339
|
-
|
|
340
|
-
You need to pick the next step to achieve the task. Remember that the goal must be achieved.
|
|
341
|
-
|
|
342
|
-
Based on the goal and available tool calls you need to pick the appropriate tool call.
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
Instructions:
|
|
346
|
-
- Take actions one at a time. Do not try to take multiple actions
|
|
347
|
-
- You can respond with multiple assertions in one shot
|
|
348
|
-
- Stick to the task provided to you and mark the task done once the task is complete
|
|
349
|
-
- Do not execute any action which is not mentioned in the task`,
|
|
350
|
-
},
|
|
351
|
-
{
|
|
352
|
-
role: "user",
|
|
353
|
-
content: [
|
|
354
|
-
{
|
|
355
|
-
type: "text",
|
|
356
|
-
text: `Task: ${outputFromGetNextAction?.action}`,
|
|
357
|
-
},
|
|
358
|
-
],
|
|
359
|
-
},
|
|
360
|
-
],
|
|
361
|
-
tools: actions.getBrowsingActionSchemas(),
|
|
362
|
-
modelParameters: {
|
|
363
|
-
temperature: 0.5,
|
|
364
|
-
max_completion_tokens: 4000,
|
|
365
|
-
tool_choice: "required",
|
|
366
|
-
},
|
|
367
|
-
trace,
|
|
368
|
-
});
|
|
369
|
-
const canTriggerHintsFlow = completion?.tool_calls?.some((currentToolCall) => currentToolCall.function.name === fill_1.PLAYWRIGHT_FILL_ACTION_NAME ||
|
|
370
|
-
currentToolCall.function.name === click_1.PLAYWRIGHT_CLICK_ACTION_NAME ||
|
|
371
|
-
currentToolCall.function.name === hover_1.PLAYWRIGHT_HOVER_ACTION_NAME);
|
|
372
|
-
trace?.event({
|
|
373
|
-
name: "can-trigger-hints-flow",
|
|
374
|
-
output: {
|
|
375
|
-
canTriggerHintsFlow,
|
|
376
|
-
},
|
|
377
|
-
});
|
|
378
|
-
if (!canTriggerHintsFlow) {
|
|
379
|
-
return {
|
|
380
|
-
shouldTriggerHintsFlow: false,
|
|
381
|
-
hintsExecutionCompletion: undefined,
|
|
382
|
-
};
|
|
383
|
-
}
|
|
384
|
-
return {
|
|
385
|
-
shouldTriggerHintsFlow: true,
|
|
386
|
-
// @ts-ignore
|
|
387
|
-
hintsExecutionCompletion: completion,
|
|
388
|
-
};
|
|
389
|
-
}
|
|
390
|
-
catch (e) {
|
|
391
|
-
console.error("Error in triggerHintsFlow", e);
|
|
392
|
-
trace?.event({
|
|
393
|
-
name: "trigger-hints-flow-error",
|
|
394
|
-
output: {
|
|
395
|
-
errorMessage: e?.message,
|
|
396
|
-
errorStack: e?.stack,
|
|
397
|
-
},
|
|
398
|
-
});
|
|
399
|
-
return {
|
|
400
|
-
shouldTriggerHintsFlow: false,
|
|
401
|
-
hintsExecutionCompletion: undefined,
|
|
402
|
-
};
|
|
403
|
-
}
|
|
404
|
-
};
|
|
405
|
-
exports.triggerHintsFlowV2 = triggerHintsFlowV2;
|
|
406
|
-
// let screenshotIndex = 0; // keeping this for local debugging
|
|
407
|
-
async function createTestUsingMasterAgentV2({ task, page, testCase, options, scopeVars, }) {
|
|
408
|
-
const useHints = options?.useHints || false;
|
|
409
|
-
const logger = new logger_1.CustomLogger({ useReporter: false });
|
|
410
|
-
const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
|
|
411
|
-
const session = (0, session_1.getSessionDetails)();
|
|
412
|
-
const testGenPage = new page_1.TestGenPage(page, (0, utils_1.getPageVarName)());
|
|
413
|
-
console.log("Using master agent v2");
|
|
414
|
-
// add timeout for the page to settle in
|
|
415
|
-
await page.waitForTimeout(3000);
|
|
416
|
-
const trace = llm_1.langfuseInstance?.trace({
|
|
417
|
-
name: "test-generator",
|
|
418
|
-
id: crypto.randomUUID(),
|
|
419
|
-
version: session.version,
|
|
420
|
-
metadata: {
|
|
421
|
-
generationId: session.generationId,
|
|
422
|
-
sessionId: session.sessionId,
|
|
423
|
-
testUrl: session.testUrl,
|
|
424
|
-
},
|
|
425
|
-
tags: [
|
|
426
|
-
options.metadata?.projectName,
|
|
427
|
-
options.metadata?.environment,
|
|
428
|
-
].filter((s) => !!s),
|
|
429
|
-
});
|
|
430
|
-
if (trace) {
|
|
431
|
-
void testgenUpdatesReporter.sendMessage(`Starting master agent. [view trace](${trace?.getTraceUrl()})`);
|
|
432
|
-
logger.log(`Starting master agent: ${trace?.getTraceUrl()}`);
|
|
433
|
-
void testgenUpdatesReporter.sendAgentTraceUrl(trace.getTraceUrl());
|
|
434
|
-
}
|
|
435
|
-
const llm = new llm_1.LLM({
|
|
436
|
-
trace,
|
|
437
|
-
provider: options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
438
|
-
defaultModel: options.model || constants_1.DEFAULT_MODEL,
|
|
439
|
-
providerApiKey: constants_1.MODEL_API_KEYS[options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
|
|
440
|
-
// we will be using google model for larger context window, in such cases 1 million tokens is not enough
|
|
441
|
-
maxTokens: options.modelProvider === "google" ? 3000000 : 1000000,
|
|
442
|
-
});
|
|
443
|
-
const skills = await (0, skills_retriever_1.getAppropriateSkills)({
|
|
444
|
-
testCase,
|
|
445
|
-
trace,
|
|
446
|
-
//@ts-ignore
|
|
447
|
-
options,
|
|
448
|
-
});
|
|
449
|
-
skill_1.testCaseSkills.updateSkills(skills);
|
|
450
|
-
const actions = new actions_1.PlaywrightActions(testGenPage, scopeVars);
|
|
451
|
-
await (0, utils_2.injectPwLocatorGenerator)(page);
|
|
452
|
-
trace?.update({ input: { task } });
|
|
453
|
-
let isGivenTaskDone = false;
|
|
454
|
-
const masterAgentActions = [];
|
|
455
|
-
let failedActions = [];
|
|
456
|
-
let disableSkills = false;
|
|
457
|
-
while (!isGivenTaskDone) {
|
|
458
|
-
if (await (0, session_1.shouldStopSession)()) {
|
|
459
|
-
break;
|
|
460
|
-
}
|
|
461
|
-
const masterAgentSpan = trace?.span({
|
|
462
|
-
name: "master-agent",
|
|
463
|
-
input: {
|
|
464
|
-
task,
|
|
465
|
-
executedActions: masterAgentActions,
|
|
466
|
-
failedActions,
|
|
467
|
-
},
|
|
468
|
-
});
|
|
469
|
-
const plannerResp = await (0, run_time_planner_1.runtimePlanner)({
|
|
470
|
-
trace: masterAgentSpan,
|
|
471
|
-
task,
|
|
472
|
-
conversation: [...masterAgentActions],
|
|
473
|
-
pages: getPageVariables(actions.getStateVariables()),
|
|
474
|
-
currentPage: (0, utils_1.getPageVarName)(),
|
|
475
|
-
});
|
|
476
|
-
isGivenTaskDone = plannerResp.isDone;
|
|
477
|
-
if (isGivenTaskDone) {
|
|
478
|
-
await testgenUpdatesReporter.sendMessage(`${plannerResp.reason} Marking the task as done.`);
|
|
479
|
-
break;
|
|
480
|
-
}
|
|
481
|
-
if (actions.getStateVariables()[plannerResp.pageName]) {
|
|
482
|
-
// update page for the master agent
|
|
483
|
-
page = actions.getStateVariables()[plannerResp.pageName];
|
|
484
|
-
// update page in actions
|
|
485
|
-
testGenPage.updatePage({ page, name: plannerResp.pageName });
|
|
486
|
-
}
|
|
487
|
-
// inject scripts in the page
|
|
488
|
-
await (0, utils_2.injectPwLocatorGenerator)(testGenPage.pwPageInstance);
|
|
489
|
-
const buffer = await page.screenshot({
|
|
490
|
-
//This is done to improve element annotation accuracy, anyways it doesn't annotate elements which are out of viewport
|
|
491
|
-
// fullPage: true,
|
|
492
|
-
// path: `screenshots/screenshot-${screenshotIndex++}.png`, // enable this and screenshotIndex var for local debugging
|
|
493
|
-
});
|
|
494
|
-
const testGenReporter = new reporter_1.TestGenUpdatesReporter();
|
|
495
|
-
await testGenReporter.sendCurrentView(buffer);
|
|
496
|
-
const pageScreenshot = buffer.toString("base64");
|
|
497
|
-
let output;
|
|
498
|
-
let generatedCodeSteps = [];
|
|
499
|
-
if (await (0, session_1.shouldStopSession)()) {
|
|
500
|
-
break;
|
|
501
|
-
}
|
|
502
|
-
const toolCall = await getNextActionV2({
|
|
503
|
-
task,
|
|
504
|
-
executedActions: masterAgentActions,
|
|
505
|
-
failedActions,
|
|
506
|
-
pageUrl: page.url(),
|
|
507
|
-
trace: masterAgentSpan,
|
|
508
|
-
llm,
|
|
509
|
-
options,
|
|
510
|
-
pageScreenshot,
|
|
511
|
-
actions,
|
|
512
|
-
disableSkills,
|
|
513
|
-
useHints,
|
|
514
|
-
});
|
|
515
|
-
if (await (0, session_1.shouldStopSession)()) {
|
|
516
|
-
break;
|
|
517
|
-
}
|
|
518
|
-
if (toolCall) {
|
|
519
|
-
const args = JSON.parse(toolCall.function.arguments);
|
|
520
|
-
const masterAgentActionSpan = masterAgentSpan?.span({
|
|
521
|
-
name: "master-agent-execute-action",
|
|
522
|
-
});
|
|
523
|
-
try {
|
|
524
|
-
output = {
|
|
525
|
-
action: JSON.parse(toolCall.function.arguments).action ||
|
|
526
|
-
JSON.parse(toolCall.function.arguments).skill,
|
|
527
|
-
reason: JSON.parse(toolCall.function.arguments).reason,
|
|
528
|
-
};
|
|
529
|
-
void testGenReporter.sendMessage(output.reason);
|
|
530
|
-
logger.log(`Next Action: ${output.action}`);
|
|
531
|
-
if (toolCall.function.name === skill_1.SKILL_USAGE) {
|
|
532
|
-
const code = await actions.executeAction(toolCall.function.name, args, masterAgentActionSpan);
|
|
533
|
-
if (code) {
|
|
534
|
-
generatedCodeSteps.push(code);
|
|
535
|
-
}
|
|
536
|
-
}
|
|
537
|
-
else {
|
|
538
|
-
let shouldTriggerHintsFlow;
|
|
539
|
-
let hintsExecutionCompletion;
|
|
540
|
-
let annotationKeys = [];
|
|
541
|
-
let elementAnnotation;
|
|
542
|
-
if (useHints) {
|
|
543
|
-
await page.waitForTimeout(2000);
|
|
544
|
-
annotationKeys = await page.evaluate(() => {
|
|
545
|
-
// @ts-ignore
|
|
546
|
-
// eslint-disable-next-line no-undef
|
|
547
|
-
window.annotationInstance = annotateClickableElements();
|
|
548
|
-
const annotations = Object.entries(
|
|
549
|
-
// @ts-ignore
|
|
550
|
-
window.annotationInstance.annotations).map(([key, value]) => ({
|
|
551
|
-
elementID: key, // Assign the key to elementID
|
|
552
|
-
text:
|
|
553
|
-
//@ts-ignore
|
|
554
|
-
value.node.innerText?.trim() ||
|
|
555
|
-
//@ts-ignore
|
|
556
|
-
value.node.placeholder?.trim() ||
|
|
557
|
-
"NA",
|
|
558
|
-
}));
|
|
559
|
-
return annotations;
|
|
560
|
-
});
|
|
561
|
-
await page.waitForTimeout(2000);
|
|
562
|
-
let annotatedPageScreenshot;
|
|
563
|
-
const annonationBuffer = await page.screenshot({
|
|
564
|
-
//This is done to improve element annotation accuracy, anyways it doesn't annotate elements which are out of viewport
|
|
565
|
-
// fullPage: true,
|
|
566
|
-
// path: `screenshots/screenshot-${screenshotIndex++}.png`,
|
|
567
|
-
});
|
|
568
|
-
annotatedPageScreenshot = annonationBuffer.toString("base64");
|
|
569
|
-
await page.evaluate(() => {
|
|
570
|
-
console.log({
|
|
571
|
-
// @ts-ignore
|
|
572
|
-
disable: window?.annotationInstance?.disable,
|
|
573
|
-
});
|
|
574
|
-
if (
|
|
575
|
-
// @ts-ignore
|
|
576
|
-
window?.annotationInstance &&
|
|
577
|
-
// @ts-ignore
|
|
578
|
-
window?.annotationInstance?.destroy) {
|
|
579
|
-
// @ts-ignore
|
|
580
|
-
window?.annotationInstance?.destroy();
|
|
581
|
-
}
|
|
582
|
-
});
|
|
583
|
-
const annotationMapString = annotationKeys
|
|
584
|
-
?.map((a) => `${a.elementID}:${a.text}`)
|
|
585
|
-
.join("\n");
|
|
586
|
-
elementAnnotation = await getElementAnnotation({
|
|
587
|
-
elementDescription: JSON.parse(toolCall.function.arguments)
|
|
588
|
-
.element_description,
|
|
589
|
-
annotations: annotationMapString,
|
|
590
|
-
annotatedScreenshot: annotatedPageScreenshot,
|
|
591
|
-
trace: masterAgentActionSpan,
|
|
592
|
-
llm,
|
|
593
|
-
});
|
|
594
|
-
output.elementAnnotation = elementAnnotation;
|
|
595
|
-
console.log("Output: ", output);
|
|
596
|
-
await testGenReporter.sendCurrentView(annonationBuffer);
|
|
597
|
-
const triggerHintsFlowSpan = masterAgentActionSpan?.span({
|
|
598
|
-
name: "trigger-hints-flow",
|
|
599
|
-
input: {
|
|
600
|
-
outputFromGetNextAction: output,
|
|
601
|
-
generatedAnnotations: annotationKeys,
|
|
602
|
-
},
|
|
603
|
-
});
|
|
604
|
-
const result = await (0, exports.triggerHintsFlowV2)({
|
|
605
|
-
outputFromGetNextAction: output,
|
|
606
|
-
generatedAnnotations: annotationKeys,
|
|
607
|
-
page: testGenPage,
|
|
608
|
-
llm,
|
|
609
|
-
trace: triggerHintsFlowSpan,
|
|
610
|
-
});
|
|
611
|
-
shouldTriggerHintsFlow = result.shouldTriggerHintsFlow;
|
|
612
|
-
hintsExecutionCompletion = result.hintsExecutionCompletion;
|
|
613
|
-
triggerHintsFlowSpan?.end({
|
|
614
|
-
output: result,
|
|
615
|
-
});
|
|
616
|
-
}
|
|
617
|
-
if (shouldTriggerHintsFlow && hintsExecutionCompletion) {
|
|
618
|
-
const toolCalls = hintsExecutionCompletion?.tool_calls || [];
|
|
619
|
-
for (const i in toolCalls) {
|
|
620
|
-
const currentToolCall = toolCalls[i];
|
|
621
|
-
const code = await actions.executeAction(currentToolCall.function.name, {
|
|
622
|
-
...JSON.parse(currentToolCall.function.arguments),
|
|
623
|
-
elementAnnotation,
|
|
624
|
-
}, masterAgentActionSpan);
|
|
625
|
-
if (code) {
|
|
626
|
-
generatedCodeSteps.push(code);
|
|
627
|
-
}
|
|
628
|
-
}
|
|
629
|
-
if (actions.isStuckInLoop()) {
|
|
630
|
-
throw new Error("Agent is not able to figure out next action when using hints");
|
|
631
|
-
}
|
|
632
|
-
}
|
|
633
|
-
else {
|
|
634
|
-
generatedCodeSteps = await (0, browsing_1.executeTaskUsingBrowsingAgent)({
|
|
635
|
-
trace: masterAgentActionSpan,
|
|
636
|
-
action: output.action,
|
|
637
|
-
logger,
|
|
638
|
-
page,
|
|
639
|
-
options,
|
|
640
|
-
llm,
|
|
641
|
-
actions,
|
|
642
|
-
});
|
|
643
|
-
}
|
|
644
|
-
}
|
|
645
|
-
// resetting error count as there is a successful action
|
|
646
|
-
failedActions = [];
|
|
647
|
-
masterAgentActions.push(output.action);
|
|
648
|
-
masterAgentActionSpan?.end({
|
|
649
|
-
input: {
|
|
650
|
-
action: output.action,
|
|
651
|
-
reason: output.reason,
|
|
652
|
-
type: toolCall.function.name,
|
|
653
|
-
},
|
|
654
|
-
output: {
|
|
655
|
-
success: true,
|
|
656
|
-
},
|
|
657
|
-
});
|
|
658
|
-
// enable skills after success
|
|
659
|
-
disableSkills = false;
|
|
660
|
-
}
|
|
661
|
-
catch (e) {
|
|
662
|
-
if (toolCall.function.name === skill_1.SKILL_USAGE) {
|
|
663
|
-
logger.log("Disabling skill usage for next retry");
|
|
664
|
-
disableSkills = true;
|
|
665
|
-
}
|
|
666
|
-
masterAgentActionSpan?.end({
|
|
667
|
-
input: {
|
|
668
|
-
action: output.action,
|
|
669
|
-
reason: output.reason,
|
|
670
|
-
type: toolCall.function.name,
|
|
671
|
-
},
|
|
672
|
-
output: {
|
|
673
|
-
error: true,
|
|
674
|
-
errorSummary: e.message,
|
|
675
|
-
},
|
|
676
|
-
});
|
|
677
|
-
logger.error("Failed to run master agent's next task", JSON.stringify(toolCall, null, 2), e);
|
|
678
|
-
failedActions.push(output.action);
|
|
679
|
-
if (failedActions.length >= MAX_ERROR_COUNT) {
|
|
680
|
-
const error = "Agent is not able to figure out next action, marking task as done";
|
|
681
|
-
logger.error(error);
|
|
682
|
-
await testgenUpdatesReporter.sendMessage(error);
|
|
683
|
-
break;
|
|
684
|
-
}
|
|
685
|
-
}
|
|
686
|
-
}
|
|
687
|
-
trace?.update({ input: { task }, output: { output } });
|
|
688
|
-
masterAgentSpan?.end({
|
|
689
|
-
output: {
|
|
690
|
-
action: output.action,
|
|
691
|
-
reason: output.reason,
|
|
692
|
-
code: generatedCodeSteps,
|
|
693
|
-
},
|
|
694
|
-
});
|
|
695
|
-
}
|
|
696
|
-
const { code, importPaths } = actions.generateCode();
|
|
697
|
-
trace?.update({ input: { task }, output: { code } });
|
|
698
|
-
await new reporter_1.TestGenUpdatesReporter().sendCurrentView(await page.screenshot());
|
|
699
|
-
logger.success("Successfully generated code for the given task");
|
|
700
|
-
if (trace) {
|
|
701
|
-
await testgenUpdatesReporter.sendMessage(`Successfully generated code for the given task. \n View [trace](${trace.getTraceUrl()})`);
|
|
702
|
-
}
|
|
703
|
-
return {
|
|
704
|
-
code,
|
|
705
|
-
importPaths,
|
|
706
|
-
};
|
|
707
|
-
}
|
|
708
|
-
exports.createTestUsingMasterAgentV2 = createTestUsingMasterAgentV2;
|