@empiricalrun/test-gen 0.81.0 → 0.81.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/package.json +7 -7
- package/tsconfig.tsbuildinfo +1 -1
- package/dist/agent/browsing/index.d.ts +0 -15
- package/dist/agent/browsing/index.d.ts.map +0 -1
- package/dist/agent/browsing/index.js +0 -60
- package/dist/agent/master/action-tool-calls.d.ts +0 -42
- package/dist/agent/master/action-tool-calls.d.ts.map +0 -1
- package/dist/agent/master/action-tool-calls.js +0 -87
- package/dist/agent/master/element-annotation.d.ts +0 -30
- package/dist/agent/master/element-annotation.d.ts.map +0 -1
- package/dist/agent/master/element-annotation.js +0 -175
- package/dist/agent/master/execute-browser-action.d.ts +0 -24
- package/dist/agent/master/execute-browser-action.d.ts.map +0 -1
- package/dist/agent/master/execute-browser-action.js +0 -121
- package/dist/agent/master/next-action.d.ts +0 -22
- package/dist/agent/master/next-action.d.ts.map +0 -1
- package/dist/agent/master/next-action.js +0 -97
- package/dist/agent/master/planner.d.ts +0 -15
- package/dist/agent/master/planner.d.ts.map +0 -1
- package/dist/agent/master/planner.js +0 -142
- package/dist/agent/master/run.d.ts +0 -17
- package/dist/agent/master/run.d.ts.map +0 -1
- package/dist/agent/master/run.js +0 -156
- package/dist/agent/master/scroller.d.ts +0 -15
- package/dist/agent/master/scroller.d.ts.map +0 -1
- package/dist/agent/master/scroller.js +0 -369
- package/dist/agent/master/with-hints.d.ts +0 -17
- package/dist/agent/master/with-hints.d.ts.map +0 -1
- package/dist/agent/master/with-hints.js +0 -103
- package/dist/agent/planner/run-time-planner.d.ts +0 -15
- package/dist/agent/planner/run-time-planner.d.ts.map +0 -1
- package/dist/agent/planner/run-time-planner.js +0 -98
- package/dist/agent/planner/run.d.ts +0 -7
- package/dist/agent/planner/run.d.ts.map +0 -1
- package/dist/agent/planner/run.js +0 -128
- package/dist/browser-injected-scripts/annotate-elements.js +0 -612
- package/dist/browser-injected-scripts/annotate-elements.spec.d.ts +0 -2
- package/dist/browser-injected-scripts/annotate-elements.spec.d.ts.map +0 -1
- package/dist/browser-injected-scripts/annotate-elements.spec.js +0 -202
- package/dist/browser-injected-scripts/annotate-elements.spec.ts +0 -327
- package/dist/generate-summary/frame-sampling.d.ts +0 -12
- package/dist/generate-summary/frame-sampling.d.ts.map +0 -1
- package/dist/generate-summary/frame-sampling.js +0 -72
- package/dist/generate-summary/generate-error-stack-summary.d.ts +0 -11
- package/dist/generate-summary/generate-error-stack-summary.d.ts.map +0 -1
- package/dist/generate-summary/generate-error-stack-summary.js +0 -41
- package/dist/generate-summary/generate-failed-step-screenshot-diff-summary.d.ts +0 -58
- package/dist/generate-summary/generate-failed-step-screenshot-diff-summary.d.ts.map +0 -1
- package/dist/generate-summary/generate-failed-step-screenshot-diff-summary.js +0 -460
- package/dist/generate-summary/generate-grouped-summary.d.ts +0 -18
- package/dist/generate-summary/generate-grouped-summary.d.ts.map +0 -1
- package/dist/generate-summary/generate-grouped-summary.js +0 -88
- package/dist/generate-summary/merge-summary.d.ts +0 -16
- package/dist/generate-summary/merge-summary.d.ts.map +0 -1
- package/dist/generate-summary/merge-summary.js +0 -43
- package/dist/generate-summary/pick-videos-for-comparison.d.ts +0 -9
- package/dist/generate-summary/pick-videos-for-comparison.d.ts.map +0 -1
- package/dist/generate-summary/pick-videos-for-comparison.js +0 -54
- package/dist/utils/env.d.ts +0 -2
- package/dist/utils/env.d.ts.map +0 -1
- package/dist/utils/env.js +0 -7
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.executeBrowserAction = executeBrowserAction;
|
|
4
|
-
const browsing_1 = require("../browsing");
|
|
5
|
-
const action_tool_calls_1 = require("./action-tool-calls");
|
|
6
|
-
const element_annotation_1 = require("./element-annotation");
|
|
7
|
-
const with_hints_1 = require("./with-hints");
|
|
8
|
-
async function executeBrowserAction({ page, nextAction, flags, actions, llm, trace, }) {
|
|
9
|
-
const args = JSON.parse(nextAction.toolCallArgs);
|
|
10
|
-
let generatedCodeSteps = [];
|
|
11
|
-
let output = {
|
|
12
|
-
action: args.action,
|
|
13
|
-
reason: args.reason,
|
|
14
|
-
};
|
|
15
|
-
let shouldTriggerHintsFlow;
|
|
16
|
-
let hintsExecutionCompletion;
|
|
17
|
-
let elementAnnotation;
|
|
18
|
-
const actionType = nextAction.actionType;
|
|
19
|
-
let preference = {
|
|
20
|
-
actionType: "all",
|
|
21
|
-
};
|
|
22
|
-
if (flags.useActionSpecificAnnotations && (0, action_tool_calls_1.isValidActionType)(actionType)) {
|
|
23
|
-
switch (actionType) {
|
|
24
|
-
case action_tool_calls_1.ActionType.FILL:
|
|
25
|
-
preference = {
|
|
26
|
-
actionType: action_tool_calls_1.ActionType.FILL,
|
|
27
|
-
};
|
|
28
|
-
break;
|
|
29
|
-
case action_tool_calls_1.ActionType.ASSERT_TEXT:
|
|
30
|
-
preference = {
|
|
31
|
-
actionType: action_tool_calls_1.ActionType.ASSERT_TEXT,
|
|
32
|
-
assertionText: args.assertion_text,
|
|
33
|
-
};
|
|
34
|
-
break;
|
|
35
|
-
default:
|
|
36
|
-
preference = {
|
|
37
|
-
actionType: "all",
|
|
38
|
-
};
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
let { annotationKeys, annotatedPageScreenshot } = await (0, element_annotation_1.getAnnotationKeys)({
|
|
42
|
-
page,
|
|
43
|
-
preference,
|
|
44
|
-
trace,
|
|
45
|
-
});
|
|
46
|
-
if (annotationKeys.length > 0) {
|
|
47
|
-
// TODO: this string has newline characters that makes it harder to read
|
|
48
|
-
const annotationMapString = annotationKeys
|
|
49
|
-
?.map((a) => `${a.elementID}: ${a.text}`)
|
|
50
|
-
.join("\n");
|
|
51
|
-
// Provides the annotations for all the element present on screen
|
|
52
|
-
// Also provides the annotation of element on which the action needs to be taken
|
|
53
|
-
elementAnnotation = await (0, element_annotation_1.getElementAnnotation)({
|
|
54
|
-
elementDescription: args.element_description,
|
|
55
|
-
annotations: annotationMapString,
|
|
56
|
-
annotatedScreenshot: annotatedPageScreenshot,
|
|
57
|
-
trace,
|
|
58
|
-
llm,
|
|
59
|
-
preference,
|
|
60
|
-
});
|
|
61
|
-
output.elementAnnotation = elementAnnotation;
|
|
62
|
-
console.log("Output: ", output);
|
|
63
|
-
const triggerHintsFlowSpan = trace?.span({
|
|
64
|
-
name: "trigger-hints-flow",
|
|
65
|
-
input: {
|
|
66
|
-
outputFromGetNextAction: output,
|
|
67
|
-
generatedAnnotations: annotationKeys,
|
|
68
|
-
},
|
|
69
|
-
});
|
|
70
|
-
// Provides the action whether its a click, fill etc.
|
|
71
|
-
const result = await (0, with_hints_1.triggerHintsFlow)({
|
|
72
|
-
outputFromGetNextAction: output,
|
|
73
|
-
generatedAnnotations: annotationKeys,
|
|
74
|
-
actions,
|
|
75
|
-
llm,
|
|
76
|
-
trace: triggerHintsFlowSpan,
|
|
77
|
-
});
|
|
78
|
-
shouldTriggerHintsFlow = result.shouldTriggerHintsFlow;
|
|
79
|
-
hintsExecutionCompletion = result.hintsExecutionCompletion;
|
|
80
|
-
triggerHintsFlowSpan?.end({
|
|
81
|
-
output: result,
|
|
82
|
-
});
|
|
83
|
-
}
|
|
84
|
-
if (shouldTriggerHintsFlow && hintsExecutionCompletion) {
|
|
85
|
-
const toolCalls = hintsExecutionCompletion?.tool_calls || [];
|
|
86
|
-
for (const i in toolCalls) {
|
|
87
|
-
const currentToolCall = toolCalls[i];
|
|
88
|
-
if (currentToolCall && "function" in currentToolCall) {
|
|
89
|
-
const code = await actions.executeAction(currentToolCall.function.name, {
|
|
90
|
-
...JSON.parse(currentToolCall.function.arguments),
|
|
91
|
-
elementAnnotation,
|
|
92
|
-
}, trace);
|
|
93
|
-
if (code) {
|
|
94
|
-
generatedCodeSteps.push(code);
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
if (actions.isStuckInLoop()) {
|
|
99
|
-
throw new Error("Agent is not able to figure out next action when using hints");
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
else {
|
|
103
|
-
const browserAction = await (0, browsing_1.executeTaskUsingBrowsingAgent)({
|
|
104
|
-
trace,
|
|
105
|
-
action: output.action,
|
|
106
|
-
page,
|
|
107
|
-
llm,
|
|
108
|
-
actions,
|
|
109
|
-
});
|
|
110
|
-
if (browserAction) {
|
|
111
|
-
output.action = browserAction.action;
|
|
112
|
-
if (browserAction.code) {
|
|
113
|
-
generatedCodeSteps.push(browserAction.code);
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
return {
|
|
118
|
-
generatedCodeSteps,
|
|
119
|
-
output,
|
|
120
|
-
};
|
|
121
|
-
}
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import { LLM, TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
import { TestGenConfigOptions } from "@empiricalrun/shared-types/agent-workflow";
|
|
3
|
-
import { Page } from "playwright";
|
|
4
|
-
import { PlaywrightActions } from "../../actions";
|
|
5
|
-
import { CustomLogger } from "../../bin/logger";
|
|
6
|
-
export declare function getNextAction({ page, pageScreenshot, task, executedActions, failedActions, trace, llm, options, actions, disableSkills, logger, }: {
|
|
7
|
-
page: Page;
|
|
8
|
-
pageScreenshot: string[];
|
|
9
|
-
task: string;
|
|
10
|
-
executedActions: string[];
|
|
11
|
-
failedActions: any[];
|
|
12
|
-
trace?: TraceClient;
|
|
13
|
-
llm?: LLM;
|
|
14
|
-
options?: Partial<TestGenConfigOptions>;
|
|
15
|
-
actions: PlaywrightActions;
|
|
16
|
-
disableSkills: boolean;
|
|
17
|
-
logger?: CustomLogger;
|
|
18
|
-
}): Promise<{
|
|
19
|
-
actionType: string;
|
|
20
|
-
toolCallArgs: string;
|
|
21
|
-
} | undefined>;
|
|
22
|
-
//# sourceMappingURL=next-action.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"next-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/next-action.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAUhD,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,cAAc,EACd,IAAI,EACJ,eAAe,EACf,aAAa,EACb,KAAK,EACL,GAAG,EACH,OAAO,EACP,OAAO,EACP,aAAa,EACb,MAAM,GACP,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACxC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CACP;IACE,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;CACtB,GACD,SAAS,CACZ,CAwFA"}
|
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.getNextAction = getNextAction;
|
|
4
|
-
const llm_1 = require("@empiricalrun/llm");
|
|
5
|
-
const constants_1 = require("../../constants");
|
|
6
|
-
const promptTemplate_0 = "{{#section \"system\"}}\nYou are a web automation tool which is given a task to complete. You need to execute the\ntask provided to you with the help of web page screenshot, a browser automation tool or skills\nwhich are learnt while writing previous tests. \n \nBrowser automation tool is a tool which uses Playwright and browser to execute action using\nnext_action tool call.\nSkill usage is a tool which helps to execute previously known pieces of code to achieve a task.\n\nYou will be provided with a screenshot of the webpage which you will use to extract the action\nthat needs to be taken.\n\nYou will be provided with previously executed actions by the browser automation tool and based\non the current screenshot and previously executed actions, you need to predict the next action\nto be taken.\n\nYou will also be provided with failed next action predicted by you, so that you can avoid\nsuggesting the same action again - which failed.\n\nThe next action should be as atomic as possible. e.g: scroll, click on an element, fill an input\nelement, assert, extract text from an element are valid next action as they are atomic in nature.\n\nYou also need to provide the action type using the list below, action type which is not present in\nthe list is invalid: {{validActionTypes}}\n\nYou will also be provided with skill usage tool which you can use to execute action. These skills\nare compound functions which helps you to complete your action.\n\nYou need to respond with either:\n- Next action to be taken by a browser automation tool \n- Use previously learnt skills in the form of tool call.\n \nYou need to make a decision whether the given skill can be reused if \"YES\" respond with the\nskill else respond with the next action.\n{{/section}}\n\n{{#section \"user\"}}\nTask:\n{{task}}\n\n-----\n\nPrevious executed actions:\n{{executedActions}}\n\n-----\n\nPrevious failed actions:\n{{failedActions}}\n\n-----\n\nYou are also provided with a page screenshot for you to decide the next action.\n\nCurrent page URL: {{pageUrl}}\n\nFollow the instructions before responding:\n- Divide the task into sub tasks\n- Using previously executed actions, identify tasks are complete and which tasks needs to be executed next.\n- You will be provided a skill usage action, if the testStep matches the next action then respond with the skill usage.\n- If responding with next action, ensure next action to be detailed and explicit about what action needs to be done. Provide all the information which can be extracted from the screenshot as a part of next action.\n- Mark task as complete only when executed actions provided to you indicates that the task is done.\n- Refer to the text and references available in the screenshot to create the next action.\n- Do not take any extra actions which are not required for the execution of the task\n- If there are no further actions required based on the task, then respond with task as done.\n- Do not recommend actions which are not available in the screenshot\n\nScreenshots:\n{{images pageScreenshots}}\n\n{{/section}}\n";
|
|
7
|
-
const action_tool_calls_1 = require("./action-tool-calls");
|
|
8
|
-
const scroller_1 = require("./scroller");
|
|
9
|
-
async function getNextAction({ page, pageScreenshot, task, executedActions, failedActions, trace, llm, options, actions, disableSkills, logger, }) {
|
|
10
|
-
const pageUrl = page.url();
|
|
11
|
-
const nextActionSpan = trace?.span({
|
|
12
|
-
name: "master-agent-next-action",
|
|
13
|
-
input: {
|
|
14
|
-
task,
|
|
15
|
-
executedActions,
|
|
16
|
-
failedActions,
|
|
17
|
-
pageUrl,
|
|
18
|
-
options,
|
|
19
|
-
pageScreenshot,
|
|
20
|
-
disableSkills,
|
|
21
|
-
},
|
|
22
|
-
});
|
|
23
|
-
const messages = (0, llm_1.compilePrompt)(promptTemplate_0, {
|
|
24
|
-
validActionTypes: Object.values(action_tool_calls_1.ActionType).join(", "),
|
|
25
|
-
task,
|
|
26
|
-
executedActions: executedActions.map((a) => a).join("\n"),
|
|
27
|
-
failedActions: failedActions.map((a) => a).join("\n"),
|
|
28
|
-
pageUrl,
|
|
29
|
-
pageScreenshots: pageScreenshot,
|
|
30
|
-
});
|
|
31
|
-
const tools = [...(0, action_tool_calls_1.getActionToolCalls)()];
|
|
32
|
-
llm =
|
|
33
|
-
llm ||
|
|
34
|
-
new llm_1.LLM({
|
|
35
|
-
provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
36
|
-
defaultModel: options?.model || constants_1.DEFAULT_MODEL,
|
|
37
|
-
});
|
|
38
|
-
const completion = await llm.createChatCompletion({
|
|
39
|
-
messages,
|
|
40
|
-
modelParameters: {
|
|
41
|
-
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
42
|
-
...options?.modelParameters,
|
|
43
|
-
tool_choice: "required",
|
|
44
|
-
temperature: 1,
|
|
45
|
-
},
|
|
46
|
-
trace: nextActionSpan,
|
|
47
|
-
traceName: "master-agent-llm",
|
|
48
|
-
// @ts-ignore
|
|
49
|
-
tools,
|
|
50
|
-
});
|
|
51
|
-
const toolCall = completion?.tool_calls?.[0];
|
|
52
|
-
nextActionSpan?.end({ output: toolCall });
|
|
53
|
-
if (toolCall && "function" in toolCall) {
|
|
54
|
-
const toolCallArgs = JSON.parse(toolCall.function.arguments);
|
|
55
|
-
const actionType = toolCall.function.name;
|
|
56
|
-
// If the action type is scroll, we need to scroll the page and get the reference to the frame in which the element is visible
|
|
57
|
-
// else we return the next action
|
|
58
|
-
// For scroll we have at max 2 retries
|
|
59
|
-
// If the element is not visible after 2 retries, we throw an error
|
|
60
|
-
if (actionType === "scroll" && toolCallArgs) {
|
|
61
|
-
let maxScrollRetries = 2;
|
|
62
|
-
while (maxScrollRetries--) {
|
|
63
|
-
const frames = await (0, scroller_1.scroller)({
|
|
64
|
-
elementDescription: toolCallArgs.element,
|
|
65
|
-
page,
|
|
66
|
-
trace: nextActionSpan,
|
|
67
|
-
logger,
|
|
68
|
-
});
|
|
69
|
-
if (frames.length > 0) {
|
|
70
|
-
return getNextAction({
|
|
71
|
-
task,
|
|
72
|
-
executedActions,
|
|
73
|
-
failedActions,
|
|
74
|
-
trace,
|
|
75
|
-
llm,
|
|
76
|
-
options,
|
|
77
|
-
pageScreenshot: frames.map((frame) => frame.frameScreenshot),
|
|
78
|
-
actions,
|
|
79
|
-
disableSkills,
|
|
80
|
-
page,
|
|
81
|
-
logger,
|
|
82
|
-
});
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
if (maxScrollRetries === -1) {
|
|
86
|
-
return {
|
|
87
|
-
actionType: action_tool_calls_1.ActionType.UNKNOWN,
|
|
88
|
-
toolCallArgs: "",
|
|
89
|
-
};
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
return {
|
|
93
|
-
actionType,
|
|
94
|
-
toolCallArgs: toolCall?.function.arguments,
|
|
95
|
-
};
|
|
96
|
-
}
|
|
97
|
-
}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
import { Page } from "playwright/test";
|
|
3
|
-
export declare function runtimePlannerWithScreenshot({ trace, task, conversation, pages, page, currentPage, }: {
|
|
4
|
-
trace?: TraceClient;
|
|
5
|
-
conversation: string[];
|
|
6
|
-
task: string;
|
|
7
|
-
pages?: Record<string, any>;
|
|
8
|
-
page: Page;
|
|
9
|
-
currentPage?: string;
|
|
10
|
-
}): Promise<{
|
|
11
|
-
pageName: string;
|
|
12
|
-
isDone: boolean;
|
|
13
|
-
reason: string;
|
|
14
|
-
}>;
|
|
15
|
-
//# sourceMappingURL=planner.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"planner.d.ts","sourceRoot":"","sources":["../../../src/agent/master/planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD,OAAO,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAC;AAIvC,wBAAsB,4BAA4B,CAAC,EACjD,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,IAAI,EACJ,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,IAAI,EAAE,IAAI,CAAC;IACX,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;cAsHoC,MAAM;YACV,OAAO;YACP,MAAM;GAgBtC"}
|
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.runtimePlannerWithScreenshot = runtimePlannerWithScreenshot;
|
|
4
|
-
const llm_1 = require("@empiricalrun/llm");
|
|
5
|
-
const vision_1 = require("@empiricalrun/llm/vision");
|
|
6
|
-
const constants_1 = require("../../constants");
|
|
7
|
-
async function runtimePlannerWithScreenshot({ trace, task, conversation, pages, page, currentPage, }) {
|
|
8
|
-
const buffer = await page.screenshot({
|
|
9
|
-
//This is done to improve element annotation accuracy, anyways it doesn't annotate elements which are out of viewport
|
|
10
|
-
// fullPage: true,
|
|
11
|
-
// path: `screenshots/screenshot-${screenshotIndex++}.png`, // enable this and screenshotIndex var for local debugging
|
|
12
|
-
});
|
|
13
|
-
const pageBuffer = buffer.toString("base64");
|
|
14
|
-
const runTimePlannerSpan = trace?.span({
|
|
15
|
-
name: "popup-verifier",
|
|
16
|
-
input: {
|
|
17
|
-
task,
|
|
18
|
-
conversation,
|
|
19
|
-
},
|
|
20
|
-
});
|
|
21
|
-
const llm = new llm_1.LLM({ provider: "openai" });
|
|
22
|
-
const prompt = [
|
|
23
|
-
{
|
|
24
|
-
role: "system",
|
|
25
|
-
content: `
|
|
26
|
-
Given a successfully executed actions that lists only the actions that were successfully executed and a task comprising multiple actions, your goal is to analyse the list and determine if the entire task is completed.
|
|
27
|
-
These actions are executed by AI agents using Playwright on a browser. These agents already have access to browser tabs to execute actions. The successfully executed actions on browser post browser has opened, is provided to you as successfully executed actions.
|
|
28
|
-
|
|
29
|
-
You are also given a screenshot of the current screen which you can also use to determine whether the entire task is completed or not.
|
|
30
|
-
|
|
31
|
-
If the task is not fully completed, identify which specific actions are missing and suggest next steps to complete the task. Assume that the conversation provided is entirely truthful and no additional actions were performed beyond those listed.
|
|
32
|
-
|
|
33
|
-
To fulfil your goal, follow these steps:
|
|
34
|
-
- Divide the task into individual actions.
|
|
35
|
-
- Compare each task action against the actions listed in the successfully executed actions list.
|
|
36
|
-
- Identify which actions have been executed and which have not.
|
|
37
|
-
- If all actions are executed, respond with the task as done.
|
|
38
|
-
- If any actions are missing, respond with the task as not done, listing all actions and specifying which are complete and which are missing.
|
|
39
|
-
- If provided with list of pages, based on the next pending action and previously executed action, identify the page on which next action needs to be taken
|
|
40
|
-
`,
|
|
41
|
-
},
|
|
42
|
-
{
|
|
43
|
-
role: "user",
|
|
44
|
-
content: [
|
|
45
|
-
{
|
|
46
|
-
type: "text",
|
|
47
|
-
text: `
|
|
48
|
-
Task: ${task}
|
|
49
|
-
|
|
50
|
-
----
|
|
51
|
-
|
|
52
|
-
Following are successfully executed actions:
|
|
53
|
-
${conversation.join("\n")}
|
|
54
|
-
|
|
55
|
-
----
|
|
56
|
-
|
|
57
|
-
Current page:
|
|
58
|
-
${currentPage}
|
|
59
|
-
`,
|
|
60
|
-
},
|
|
61
|
-
{
|
|
62
|
-
type: "text",
|
|
63
|
-
text: "Page Screenshot",
|
|
64
|
-
},
|
|
65
|
-
{
|
|
66
|
-
type: "image_url",
|
|
67
|
-
image_url: {
|
|
68
|
-
url: (0, vision_1.imageFormatForProvider)(constants_1.DEFAULT_MODEL_PROVIDER, pageBuffer),
|
|
69
|
-
},
|
|
70
|
-
},
|
|
71
|
-
],
|
|
72
|
-
},
|
|
73
|
-
];
|
|
74
|
-
const response = await llm.createChatCompletion({
|
|
75
|
-
trace: runTimePlannerSpan,
|
|
76
|
-
traceName: "runtime-planner-llm",
|
|
77
|
-
model: "gpt-4o",
|
|
78
|
-
messages: prompt,
|
|
79
|
-
tools: [
|
|
80
|
-
{
|
|
81
|
-
type: "function",
|
|
82
|
-
function: {
|
|
83
|
-
name: "task_done",
|
|
84
|
-
description: "end the task by calling this method",
|
|
85
|
-
parameters: {
|
|
86
|
-
type: "object",
|
|
87
|
-
properties: {
|
|
88
|
-
actions: {
|
|
89
|
-
type: "string",
|
|
90
|
-
description: "actions extracted from task",
|
|
91
|
-
},
|
|
92
|
-
successful_actions: {
|
|
93
|
-
type: "string",
|
|
94
|
-
description: "successful actions mentioned in the conversation",
|
|
95
|
-
},
|
|
96
|
-
reason: {
|
|
97
|
-
type: "string",
|
|
98
|
-
description: "reasoning for identification of task status",
|
|
99
|
-
},
|
|
100
|
-
isDone: {
|
|
101
|
-
type: "boolean",
|
|
102
|
-
description: "whether the task is done",
|
|
103
|
-
},
|
|
104
|
-
pageName: {
|
|
105
|
-
type: "string",
|
|
106
|
-
enum: pages ? Object.keys(pages) : [],
|
|
107
|
-
description: "page name for the next action.",
|
|
108
|
-
},
|
|
109
|
-
},
|
|
110
|
-
required: ["isDone", "reason", "pageName"],
|
|
111
|
-
},
|
|
112
|
-
},
|
|
113
|
-
},
|
|
114
|
-
],
|
|
115
|
-
modelParameters: {
|
|
116
|
-
tool_choice: "required",
|
|
117
|
-
temperature: 0.5,
|
|
118
|
-
},
|
|
119
|
-
});
|
|
120
|
-
const toolCallResp = (response?.tool_calls || [])[0];
|
|
121
|
-
if (toolCallResp && "function" in toolCallResp) {
|
|
122
|
-
const toolCall = JSON.parse(toolCallResp.function.arguments);
|
|
123
|
-
const output = {
|
|
124
|
-
pageName: toolCall.pageName,
|
|
125
|
-
isDone: toolCall.isDone,
|
|
126
|
-
reason: toolCall.reason,
|
|
127
|
-
};
|
|
128
|
-
runTimePlannerSpan?.end({
|
|
129
|
-
output,
|
|
130
|
-
});
|
|
131
|
-
return output;
|
|
132
|
-
}
|
|
133
|
-
const output = {
|
|
134
|
-
pageName: "",
|
|
135
|
-
isDone: false,
|
|
136
|
-
reason: "LLM failed to generate a valid response",
|
|
137
|
-
};
|
|
138
|
-
runTimePlannerSpan?.end({
|
|
139
|
-
output,
|
|
140
|
-
});
|
|
141
|
-
return output;
|
|
142
|
-
}
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
import type { TestGenConfigOptions } from "@empiricalrun/shared-types/agent-workflow";
|
|
2
|
-
import type { TestCaseWithSuitesAsArray } from "@empiricalrun/shared-types/api/test-cases";
|
|
3
|
-
import { Page } from "playwright";
|
|
4
|
-
import { ScopeVars } from "../../types";
|
|
5
|
-
export declare const IS_ALLOWED_TO_USE_SKILLS = false;
|
|
6
|
-
export declare function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }: {
|
|
7
|
-
task: string;
|
|
8
|
-
page: Page;
|
|
9
|
-
testCase?: TestCaseWithSuitesAsArray;
|
|
10
|
-
specPath?: string;
|
|
11
|
-
options: Partial<TestGenConfigOptions>;
|
|
12
|
-
scopeVars?: ScopeVars;
|
|
13
|
-
}): Promise<{
|
|
14
|
-
code: string;
|
|
15
|
-
importPaths: string[];
|
|
16
|
-
}>;
|
|
17
|
-
//# sourceMappingURL=run.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACtF,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,2CAA2C,CAAC;AAC3F,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAWxC,eAAO,MAAM,wBAAwB,QAAQ,CAAC;AAqB9C,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,yBAAyB,CAAC;IACrC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACvC,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAwJA"}
|
package/dist/agent/master/run.js
DELETED
|
@@ -1,156 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.IS_ALLOWED_TO_USE_SKILLS = void 0;
|
|
4
|
-
exports.createTestUsingMasterAgent = createTestUsingMasterAgent;
|
|
5
|
-
const llm_1 = require("@empiricalrun/llm");
|
|
6
|
-
const actions_1 = require("../../actions");
|
|
7
|
-
const utils_1 = require("../../actions/utils");
|
|
8
|
-
const logger_1 = require("../../bin/logger");
|
|
9
|
-
const constants_1 = require("../../constants");
|
|
10
|
-
const errors_1 = require("../../errors");
|
|
11
|
-
const page_1 = require("../../page");
|
|
12
|
-
const utils_2 = require("../browsing/utils");
|
|
13
|
-
const run_1 = require("../planner/run");
|
|
14
|
-
const run_time_planner_1 = require("../planner/run-time-planner");
|
|
15
|
-
const action_tool_calls_1 = require("./action-tool-calls");
|
|
16
|
-
const execute_browser_action_1 = require("./execute-browser-action");
|
|
17
|
-
const next_action_1 = require("./next-action");
|
|
18
|
-
const MAX_ERROR_COUNT = 2;
|
|
19
|
-
// Disabling skills as we're seeing false usage with chat agent
|
|
20
|
-
exports.IS_ALLOWED_TO_USE_SKILLS = false;
|
|
21
|
-
function getPageVariables(stateVariables) {
|
|
22
|
-
const keys = Object.keys(stateVariables);
|
|
23
|
-
// This checks for whether page.url() exists, which is true for all pages
|
|
24
|
-
// created by playwright actions.
|
|
25
|
-
const pageVariables = keys.filter((key) => typeof stateVariables[key] === "object" &&
|
|
26
|
-
typeof stateVariables[key]?.url === "function");
|
|
27
|
-
const pages = pageVariables.reduce((acc, key) => {
|
|
28
|
-
acc[key] = stateVariables[key];
|
|
29
|
-
return acc;
|
|
30
|
-
}, {});
|
|
31
|
-
return pages;
|
|
32
|
-
}
|
|
33
|
-
async function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }) {
|
|
34
|
-
const useActionSpecificAnnotations = options?.useActionSpecificAnnotations || false;
|
|
35
|
-
const usePlannerInMaster = options?.usePlannerInMaster || false;
|
|
36
|
-
const logger = new logger_1.CustomLogger({ useReporter: false });
|
|
37
|
-
const testGenPage = new page_1.TestGenPage(page, (0, utils_1.getPageVarName)());
|
|
38
|
-
const llm = new llm_1.LLM({
|
|
39
|
-
provider: options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
40
|
-
defaultModel: options.model || constants_1.DEFAULT_MODEL,
|
|
41
|
-
providerApiKey: constants_1.MODEL_API_KEYS[options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
|
|
42
|
-
// we will be using google model for larger context window, in such cases 1 million tokens is not enough
|
|
43
|
-
maxTokens: options.modelProvider === "google" ? 3_000_000 : 1_000_000,
|
|
44
|
-
});
|
|
45
|
-
if (usePlannerInMaster && testCase && specPath) {
|
|
46
|
-
logger.log(`Planner is working on task: ${task}`);
|
|
47
|
-
const plan = await (0, run_1.planTask)({
|
|
48
|
-
task,
|
|
49
|
-
specPath,
|
|
50
|
-
});
|
|
51
|
-
logger.log(`Here is the plan:\n${plan}`);
|
|
52
|
-
// Will assume this is the task hereon
|
|
53
|
-
task = plan;
|
|
54
|
-
}
|
|
55
|
-
const actions = new actions_1.PlaywrightActions(testGenPage, scopeVars);
|
|
56
|
-
await (0, utils_2.injectPwLocatorGenerator)(page);
|
|
57
|
-
let isGivenTaskDone = false;
|
|
58
|
-
const masterAgentActions = [];
|
|
59
|
-
let failedActions = [];
|
|
60
|
-
let disableSkills = false;
|
|
61
|
-
// Run the loop until task is done or we have reached max retry limit
|
|
62
|
-
while (!isGivenTaskDone) {
|
|
63
|
-
const plannerResp = await (0, run_time_planner_1.runtimePlanner)({
|
|
64
|
-
task,
|
|
65
|
-
successfulActions: [...masterAgentActions],
|
|
66
|
-
pages: getPageVariables(actions.getStateVariables()),
|
|
67
|
-
currentPage: testGenPage,
|
|
68
|
-
});
|
|
69
|
-
isGivenTaskDone = plannerResp.isDone;
|
|
70
|
-
if (isGivenTaskDone) {
|
|
71
|
-
break;
|
|
72
|
-
}
|
|
73
|
-
if (actions.getStateVariables()[plannerResp.pageName]) {
|
|
74
|
-
// update page for the master agent
|
|
75
|
-
page = actions.getStateVariables()[plannerResp.pageName];
|
|
76
|
-
// update page in actions
|
|
77
|
-
testGenPage.updatePage({ page, name: plannerResp.pageName });
|
|
78
|
-
}
|
|
79
|
-
// inject scripts in the page
|
|
80
|
-
await (0, utils_2.injectPwLocatorGenerator)(testGenPage.pwPageInstance);
|
|
81
|
-
const buffer = await page.screenshot();
|
|
82
|
-
const pageScreenshot = buffer.toString("base64");
|
|
83
|
-
let output;
|
|
84
|
-
let generatedCodeSteps = [];
|
|
85
|
-
// Provides next action that needs to be taken
|
|
86
|
-
const nextAction = await (0, next_action_1.getNextAction)({
|
|
87
|
-
page,
|
|
88
|
-
pageScreenshot: [pageScreenshot],
|
|
89
|
-
task,
|
|
90
|
-
executedActions: masterAgentActions,
|
|
91
|
-
failedActions,
|
|
92
|
-
llm,
|
|
93
|
-
options,
|
|
94
|
-
actions,
|
|
95
|
-
disableSkills,
|
|
96
|
-
logger,
|
|
97
|
-
});
|
|
98
|
-
if (nextAction) {
|
|
99
|
-
if (nextAction.actionType === action_tool_calls_1.ActionType.UNKNOWN) {
|
|
100
|
-
logger.error("Agent is not able to figure out next action since element is not visible on screen.");
|
|
101
|
-
break;
|
|
102
|
-
}
|
|
103
|
-
const args = JSON.parse(nextAction.toolCallArgs);
|
|
104
|
-
output = {
|
|
105
|
-
action: args.action || args.skill,
|
|
106
|
-
reason: args.reason,
|
|
107
|
-
};
|
|
108
|
-
try {
|
|
109
|
-
logger.log(`Next Action: ${output.action}`);
|
|
110
|
-
switch (nextAction.actionType) {
|
|
111
|
-
case action_tool_calls_1.ActionType.OBSERVATION: {
|
|
112
|
-
output.action = args.observation;
|
|
113
|
-
logger.log("Observation: ", output.action);
|
|
114
|
-
break;
|
|
115
|
-
}
|
|
116
|
-
default: {
|
|
117
|
-
const result = await (0, execute_browser_action_1.executeBrowserAction)({
|
|
118
|
-
page,
|
|
119
|
-
nextAction,
|
|
120
|
-
flags: {
|
|
121
|
-
useActionSpecificAnnotations,
|
|
122
|
-
},
|
|
123
|
-
actions,
|
|
124
|
-
llm,
|
|
125
|
-
});
|
|
126
|
-
const { generatedCodeSteps: codeFromExecuteAction, output: outputFromExecuteAction, } = result;
|
|
127
|
-
generatedCodeSteps.push(...codeFromExecuteAction);
|
|
128
|
-
output = outputFromExecuteAction;
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
// resetting error count as there is a successful action
|
|
132
|
-
failedActions = [];
|
|
133
|
-
masterAgentActions.push(output.action);
|
|
134
|
-
// enable skills after success
|
|
135
|
-
disableSkills = false;
|
|
136
|
-
}
|
|
137
|
-
catch (e) {
|
|
138
|
-
if (!(e instanceof errors_1.HumanApprovalDenied)) {
|
|
139
|
-
logger.error("Failed to run master agent's next task", JSON.stringify(nextAction, null, 2), e);
|
|
140
|
-
failedActions.push(output.action);
|
|
141
|
-
if (failedActions.length >= MAX_ERROR_COUNT) {
|
|
142
|
-
const error = "Agent is not able to figure out next action, marking task as done";
|
|
143
|
-
logger.error(error);
|
|
144
|
-
break;
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
const { code, importPaths } = actions.generateCode();
|
|
151
|
-
logger.success("Successfully generated code for the given task");
|
|
152
|
-
return {
|
|
153
|
-
code,
|
|
154
|
-
importPaths,
|
|
155
|
-
};
|
|
156
|
-
}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
import type { Page } from "@playwright/test";
|
|
3
|
-
import { CustomLogger } from "../../bin/logger";
|
|
4
|
-
export type FrameReference = {
|
|
5
|
-
scrollPosition: number;
|
|
6
|
-
frameScreenshot: string;
|
|
7
|
-
};
|
|
8
|
-
export declare function scroller({ elementDescription, page, trace, frameReference, logger, }: {
|
|
9
|
-
elementDescription: string;
|
|
10
|
-
page: Page;
|
|
11
|
-
trace?: TraceClient;
|
|
12
|
-
frameReference?: FrameReference;
|
|
13
|
-
logger?: CustomLogger;
|
|
14
|
-
}): Promise<FrameReference[]>;
|
|
15
|
-
//# sourceMappingURL=scroller.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"scroller.d.ts","sourceRoot":"","sources":["../../../src/agent/master/scroller.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErE,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAG7C,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAYhD,MAAM,MAAM,cAAc,GAAG;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAoZF,wBAAsB,QAAQ,CAAC,EAC7B,kBAAkB,EAClB,IAAI,EACJ,KAAK,EACL,cAAc,EACd,MAAM,GACP,EAAE;IACD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,IAAI,EAAE,IAAI,CAAC;IACX,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CA6D5B"}
|