@empiricalrun/test-gen 0.46.7 → 0.46.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/dist/actions/skill.d.ts.map +1 -1
- package/dist/actions/skill.js +3 -4
- package/dist/agent/browsing/index.d.ts +1 -8
- package/dist/agent/browsing/index.d.ts.map +1 -1
- package/dist/agent/browsing/index.js +2 -4
- package/dist/agent/codegen/skills-retriever.d.ts.map +1 -1
- package/dist/agent/codegen/skills-retriever.js +7 -34
- package/dist/agent/codegen/utils.d.ts.map +1 -1
- package/dist/agent/codegen/utils.js +20 -9
- package/dist/agent/master/action-tool-calls.d.ts +3 -1
- package/dist/agent/master/action-tool-calls.d.ts.map +1 -1
- package/dist/agent/master/action-tool-calls.js +41 -38
- package/dist/agent/master/browser-tests/fixtures.d.ts +9 -0
- package/dist/agent/master/browser-tests/fixtures.d.ts.map +1 -0
- package/dist/agent/master/browser-tests/fixtures.js +33 -0
- package/dist/agent/master/browser-tests/index.spec.js +42 -50
- package/dist/agent/master/browser-tests/skills.spec.d.ts +2 -0
- package/dist/agent/master/browser-tests/skills.spec.d.ts.map +1 -0
- package/dist/agent/master/browser-tests/skills.spec.js +107 -0
- package/dist/agent/master/element-annotation.d.ts +3 -4
- package/dist/agent/master/element-annotation.d.ts.map +1 -1
- package/dist/agent/master/element-annotation.js +2 -2
- package/dist/agent/master/execute-browser-action.d.ts +19 -0
- package/dist/agent/master/execute-browser-action.d.ts.map +1 -0
- package/dist/agent/master/execute-browser-action.js +123 -0
- package/dist/agent/master/execute-skill-action.d.ts +11 -0
- package/dist/agent/master/execute-skill-action.d.ts.map +1 -0
- package/dist/agent/master/execute-skill-action.js +25 -0
- package/dist/agent/master/next-action.d.ts +5 -8
- package/dist/agent/master/next-action.d.ts.map +1 -1
- package/dist/agent/master/next-action.js +11 -91
- package/dist/agent/master/run.d.ts +2 -3
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +34 -131
- package/dist/agent/master/scroller.d.ts +1 -1
- package/dist/agent/master/scroller.d.ts.map +1 -1
- package/dist/agent/master/scroller.js +0 -1
- package/dist/evals/master-agent.evals.d.ts.map +1 -1
- package/dist/evals/master-agent.evals.js +1 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +0 -5
- package/dist/page/index.d.ts +1 -1
- package/dist/page/index.d.ts.map +1 -1
- package/dist/utils/env.d.ts.map +1 -1
- package/dist/utils/env.js +3 -1
- package/package.json +1 -1
- package/playwright.config.ts +1 -1
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const fs_1 = __importDefault(require("fs"));
|
|
7
|
+
const run_1 = require("../run");
|
|
8
|
+
const fixtures_1 = require("./fixtures");
|
|
9
|
+
fixtures_1.test.describe.configure({ mode: "default" });
|
|
10
|
+
fixtures_1.test.beforeEach(async () => {
|
|
11
|
+
// Setup repo with POM file
|
|
12
|
+
fs_1.default.mkdirSync("pages");
|
|
13
|
+
const pomContent = `
|
|
14
|
+
import type { Page } from "@playwright/test";
|
|
15
|
+
|
|
16
|
+
export async function subscribeToBlog({ page, email }: { page: Page; email: string }) {
|
|
17
|
+
await page.getByPlaceholder("Enter your email").fill(email);
|
|
18
|
+
await page.getByRole("button", { name: "Subscribe" }).click();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export async function extractTitleForPost({ page, nth }: { page: Page; nth: number }) {
|
|
22
|
+
// nth is zero-indexed
|
|
23
|
+
const title = await page.locator("h2").nth(nth).textContent();
|
|
24
|
+
return title;
|
|
25
|
+
}
|
|
26
|
+
`;
|
|
27
|
+
fs_1.default.writeFileSync("pages/blog.ts", pomContent);
|
|
28
|
+
});
|
|
29
|
+
fixtures_1.test.afterEach(async () => {
|
|
30
|
+
// Clean up the repo in cwd
|
|
31
|
+
fs_1.default.rmSync("pages", { recursive: true });
|
|
32
|
+
});
|
|
33
|
+
(0, fixtures_1.test)("use skills to subscribe to blog", async ({ page, server }) => {
|
|
34
|
+
await page.goto(`${server.baseURL}/blog-page.html`);
|
|
35
|
+
const response = await (0, run_1.createTestUsingMasterAgent)({
|
|
36
|
+
task: `subscribe as user@example.com`,
|
|
37
|
+
page,
|
|
38
|
+
testCase: {
|
|
39
|
+
id: 1,
|
|
40
|
+
name: "subscribe to blog",
|
|
41
|
+
steps: ["subscribe as user@example.com"],
|
|
42
|
+
filePath: "blog.spec.ts",
|
|
43
|
+
suites: [],
|
|
44
|
+
},
|
|
45
|
+
options: {},
|
|
46
|
+
});
|
|
47
|
+
// Assert execution was successful
|
|
48
|
+
await (0, fixtures_1.expect)(page.getByText("Thanks for subscribing")).toBeVisible();
|
|
49
|
+
// Assert code generated is correct
|
|
50
|
+
console.log(response);
|
|
51
|
+
(0, fixtures_1.expect)(response.importPaths.length).toBeGreaterThan(0);
|
|
52
|
+
(0, fixtures_1.expect)(response.importPaths[0]).toBe("pages/blog.ts");
|
|
53
|
+
(0, fixtures_1.expect)(response.code.length).toBeGreaterThan(0);
|
|
54
|
+
(0, fixtures_1.expect)(response.code).toMatch(/await.*subscribeToBlog.*page.*email.*user@example\.com/);
|
|
55
|
+
});
|
|
56
|
+
(0, fixtures_1.test)("use skills to extract blog post title", async ({ page, server }) => {
|
|
57
|
+
await page.goto(`${server.baseURL}/blog-page.html`);
|
|
58
|
+
const response = await (0, run_1.createTestUsingMasterAgent)({
|
|
59
|
+
// TODO: Extend this to click on "read more" and verify generated code
|
|
60
|
+
// uses the state variable created by the skill
|
|
61
|
+
task: `extract title of 2nd blog post`,
|
|
62
|
+
page,
|
|
63
|
+
testCase: {
|
|
64
|
+
id: 1,
|
|
65
|
+
name: "extract blog post title",
|
|
66
|
+
steps: ["extract title of 2nd blog post"],
|
|
67
|
+
filePath: "blog.spec.ts",
|
|
68
|
+
suites: [],
|
|
69
|
+
},
|
|
70
|
+
options: {},
|
|
71
|
+
});
|
|
72
|
+
// Assert code generated is correct
|
|
73
|
+
console.log(response);
|
|
74
|
+
(0, fixtures_1.expect)(response.importPaths.length).toBeGreaterThan(0);
|
|
75
|
+
(0, fixtures_1.expect)(response.importPaths[0]).toBe("pages/blog.ts");
|
|
76
|
+
(0, fixtures_1.expect)(response.code.length).toBeGreaterThan(0);
|
|
77
|
+
(0, fixtures_1.expect)(response.code).toMatch(/^const.*=.*await extractTitleForPost.*page.*nth:.*1/);
|
|
78
|
+
});
|
|
79
|
+
(0, fixtures_1.test)("use skills to subscribe with multiple pages", async ({ page, server, }) => {
|
|
80
|
+
await page.goto(`${server.baseURL}/icons-navbar.html`);
|
|
81
|
+
const blogPage = await page.context().newPage();
|
|
82
|
+
await blogPage.goto(`${server.baseURL}/blog-page.html`);
|
|
83
|
+
const response = await (0, run_1.createTestUsingMasterAgent)({
|
|
84
|
+
task: `subscribe as user@example.com on the blog page`,
|
|
85
|
+
page,
|
|
86
|
+
testCase: {
|
|
87
|
+
id: 1,
|
|
88
|
+
name: "subscribe to blog",
|
|
89
|
+
steps: ["subscribe as user@example.com on the blog page"],
|
|
90
|
+
filePath: "blog.spec.ts",
|
|
91
|
+
suites: [],
|
|
92
|
+
},
|
|
93
|
+
scopeVars: {
|
|
94
|
+
page,
|
|
95
|
+
blogPage,
|
|
96
|
+
},
|
|
97
|
+
options: {},
|
|
98
|
+
});
|
|
99
|
+
// Assert execution was successful
|
|
100
|
+
await (0, fixtures_1.expect)(blogPage.getByText("Thanks for subscribing")).toBeVisible();
|
|
101
|
+
// Assert code generated is correct
|
|
102
|
+
console.log(response);
|
|
103
|
+
(0, fixtures_1.expect)(response.importPaths.length).toBeGreaterThan(0);
|
|
104
|
+
(0, fixtures_1.expect)(response.importPaths[0]).toBe("pages/blog.ts");
|
|
105
|
+
(0, fixtures_1.expect)(response.code.length).toBeGreaterThan(0);
|
|
106
|
+
(0, fixtures_1.expect)(response.code).toMatch(/await.*subscribeToBlog.*blogPage.*email.*user@example\.com/);
|
|
107
|
+
});
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { LLM, TraceClient } from "@empiricalrun/llm";
|
|
2
|
+
import { TestGenConfigOptions } from "@empiricalrun/shared-types";
|
|
2
3
|
import { Page } from "playwright";
|
|
3
|
-
import { BrowsingAgentOptions } from "../browsing";
|
|
4
4
|
import { ActionType } from "./action-tool-calls";
|
|
5
5
|
export declare function getElementAnnotation({ elementDescription, annotations, annotatedScreenshot, trace, llm, options, preference, }: {
|
|
6
6
|
elementDescription: string;
|
|
@@ -8,17 +8,16 @@ export declare function getElementAnnotation({ elementDescription, annotations,
|
|
|
8
8
|
annotatedScreenshot: string;
|
|
9
9
|
trace?: TraceClient;
|
|
10
10
|
llm?: LLM;
|
|
11
|
-
options?:
|
|
11
|
+
options?: TestGenConfigOptions;
|
|
12
12
|
preference: AnnotationPreference;
|
|
13
13
|
}): Promise<string | undefined>;
|
|
14
14
|
export type AnnotationPreference = {
|
|
15
15
|
actionType: "all" | ActionType.FILL | ActionType.ASSERT_TEXT | ActionType.SCROLL;
|
|
16
16
|
assertionText?: string | undefined;
|
|
17
17
|
};
|
|
18
|
-
export declare function getAnnotationKeys({ page, preference,
|
|
18
|
+
export declare function getAnnotationKeys({ page, preference, trace, }: {
|
|
19
19
|
page: Page;
|
|
20
20
|
preference: AnnotationPreference;
|
|
21
|
-
options: BrowsingAgentOptions;
|
|
22
21
|
trace?: TraceClient;
|
|
23
22
|
}): Promise<{
|
|
24
23
|
annotationKeys: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"element-annotation.d.ts","sourceRoot":"","sources":["../../../src/agent/master/element-annotation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"element-annotation.d.ts","sourceRoot":"","sources":["../../../src/agent/master/element-annotation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAClE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AASlC,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AA2DjD,wBAAsB,oBAAoB,CAAC,EACzC,kBAAkB,EAClB,WAAW,EACX,mBAAmB,EACnB,KAAK,EACL,GAAG,EACH,OAAO,EACP,UAAU,GACX,EAAE;IACD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,UAAU,EAAE,oBAAoB,CAAC;CAClC,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CA8C9B;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,UAAU,EACN,KAAK,GACL,UAAU,CAAC,IAAI,GACf,UAAU,CAAC,WAAW,GACtB,UAAU,CAAC,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CACpC,CAAC;AAgBF,wBAAsB,iBAAiB,CAAC,EACtC,IAAI,EACJ,UAAU,EACV,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE,oBAAoB,CAAC;IACjC,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,cAAc,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IACtD,gBAAgB,EAAE,MAAM,CAAC;IACzB,uBAAuB,EAAE,MAAM,CAAC;CACjC,CAAC,CAoFD"}
|
|
@@ -100,7 +100,7 @@ async function getElementAnnotation({ elementDescription, annotations, annotated
|
|
|
100
100
|
return;
|
|
101
101
|
}
|
|
102
102
|
exports.getElementAnnotation = getElementAnnotation;
|
|
103
|
-
async function getAnnotationKeys({ page, preference,
|
|
103
|
+
async function getAnnotationKeys({ page, preference, trace, }) {
|
|
104
104
|
const annotatedElements = await page.evaluate(({ preference, options }) => {
|
|
105
105
|
// @ts-ignore
|
|
106
106
|
// eslint-disable-next-line no-undef
|
|
@@ -139,7 +139,7 @@ async function getAnnotationKeys({ page, preference, options, trace, }) {
|
|
|
139
139
|
});
|
|
140
140
|
const fullPageHTML = document.documentElement.outerHTML;
|
|
141
141
|
return { annotations, fullPageHTML };
|
|
142
|
-
}, { preference, options });
|
|
142
|
+
}, { preference, options: {} });
|
|
143
143
|
const annotationKeys = await enrichAnnotations(annotatedElements.annotations, annotatedElements.fullPageHTML, trace);
|
|
144
144
|
const annotationBuffer = await page.screenshot({
|
|
145
145
|
// path: `screenshots/screenshot-${screenshotIndex++}.png`,
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { LLM, TraceClient } from "@empiricalrun/llm";
|
|
2
|
+
import type { Page } from "@playwright/test";
|
|
3
|
+
import { PlaywrightActions } from "../../actions";
|
|
4
|
+
export declare function executeBrowserAction({ page, nextAction, flags, actions, llm, trace, }: {
|
|
5
|
+
page: Page;
|
|
6
|
+
nextAction: {
|
|
7
|
+
actionType: string;
|
|
8
|
+
toolCallArgs: string;
|
|
9
|
+
};
|
|
10
|
+
flags: {
|
|
11
|
+
useActionSpecificAnnotations: boolean;
|
|
12
|
+
};
|
|
13
|
+
actions: PlaywrightActions;
|
|
14
|
+
trace: TraceClient | undefined;
|
|
15
|
+
llm: LLM;
|
|
16
|
+
}): Promise<{
|
|
17
|
+
generatedCodeSteps: string[];
|
|
18
|
+
}>;
|
|
19
|
+
//# sourceMappingURL=execute-browser-action.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"execute-browser-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/execute-browser-action.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAE7C,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,wBAAsB,oBAAoB,CAAC,EACzC,IAAI,EACJ,UAAU,EACV,KAAK,EACL,OAAO,EACP,GAAG,EACH,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE;QACV,UAAU,EAAE,MAAM,CAAC;QACnB,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,KAAK,EAAE;QACL,4BAA4B,EAAE,OAAO,CAAC;KACvC,CAAC;IACF,OAAO,EAAE,iBAAiB,CAAC;IAC3B,KAAK,EAAE,WAAW,GAAG,SAAS,CAAC;IAC/B,GAAG,EAAE,GAAG,CAAC;CACV;;GAsIA"}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.executeBrowserAction = void 0;
|
|
4
|
+
const reporter_1 = require("../../reporter");
|
|
5
|
+
const browsing_1 = require("../browsing");
|
|
6
|
+
const utils_1 = require("../utils");
|
|
7
|
+
const action_tool_calls_1 = require("./action-tool-calls");
|
|
8
|
+
const element_annotation_1 = require("./element-annotation");
|
|
9
|
+
const with_hints_1 = require("./with-hints");
|
|
10
|
+
async function executeBrowserAction({ page, nextAction, flags, actions, llm, trace, }) {
|
|
11
|
+
const args = (0, utils_1.parseJson)(nextAction.toolCallArgs);
|
|
12
|
+
let generatedCodeSteps = [];
|
|
13
|
+
let output = {
|
|
14
|
+
action: args.action,
|
|
15
|
+
reason: args.reason,
|
|
16
|
+
};
|
|
17
|
+
let shouldTriggerHintsFlow;
|
|
18
|
+
let hintsExecutionCompletion;
|
|
19
|
+
let elementAnnotation;
|
|
20
|
+
const actionType = nextAction.actionType;
|
|
21
|
+
let preference = {
|
|
22
|
+
actionType: "all",
|
|
23
|
+
};
|
|
24
|
+
if (flags.useActionSpecificAnnotations && (0, action_tool_calls_1.isValidActionType)(actionType)) {
|
|
25
|
+
switch (actionType) {
|
|
26
|
+
case action_tool_calls_1.ActionType.FILL:
|
|
27
|
+
preference = {
|
|
28
|
+
actionType: action_tool_calls_1.ActionType.FILL,
|
|
29
|
+
};
|
|
30
|
+
break;
|
|
31
|
+
case action_tool_calls_1.ActionType.ASSERT_TEXT:
|
|
32
|
+
preference = {
|
|
33
|
+
actionType: action_tool_calls_1.ActionType.ASSERT_TEXT,
|
|
34
|
+
assertionText: args.assertion_text,
|
|
35
|
+
};
|
|
36
|
+
break;
|
|
37
|
+
default:
|
|
38
|
+
preference = {
|
|
39
|
+
actionType: "all",
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
let { annotationKeys, annotatedPageScreenshot, annotationBuffer } = await (0, element_annotation_1.getAnnotationKeys)({
|
|
44
|
+
page,
|
|
45
|
+
preference,
|
|
46
|
+
trace,
|
|
47
|
+
});
|
|
48
|
+
if (annotationKeys.length > 0) {
|
|
49
|
+
// TODO: this string has newline characters that makes it harder to read
|
|
50
|
+
const annotationMapString = annotationKeys
|
|
51
|
+
?.map((a) => `${a.elementID}: ${a.text}`)
|
|
52
|
+
.join("\n");
|
|
53
|
+
// Provides the annotations for all the element present on screen
|
|
54
|
+
// Also provides the annotation of element on which the action needs to be taken
|
|
55
|
+
elementAnnotation = await (0, element_annotation_1.getElementAnnotation)({
|
|
56
|
+
elementDescription: args.element_description,
|
|
57
|
+
annotations: annotationMapString,
|
|
58
|
+
annotatedScreenshot: annotatedPageScreenshot,
|
|
59
|
+
trace,
|
|
60
|
+
llm,
|
|
61
|
+
preference,
|
|
62
|
+
});
|
|
63
|
+
output.elementAnnotation = elementAnnotation;
|
|
64
|
+
console.log("Output: ", output);
|
|
65
|
+
const testGenReporter = new reporter_1.TestGenUpdatesReporter();
|
|
66
|
+
await testGenReporter.sendCurrentView(annotationBuffer);
|
|
67
|
+
const triggerHintsFlowSpan = trace?.span({
|
|
68
|
+
name: "trigger-hints-flow",
|
|
69
|
+
input: {
|
|
70
|
+
outputFromGetNextAction: output,
|
|
71
|
+
generatedAnnotations: annotationKeys,
|
|
72
|
+
},
|
|
73
|
+
});
|
|
74
|
+
// Provides the action whether its a click, fill etc.
|
|
75
|
+
const result = await (0, with_hints_1.triggerHintsFlow)({
|
|
76
|
+
outputFromGetNextAction: output,
|
|
77
|
+
generatedAnnotations: annotationKeys,
|
|
78
|
+
actions,
|
|
79
|
+
llm,
|
|
80
|
+
trace: triggerHintsFlowSpan,
|
|
81
|
+
});
|
|
82
|
+
shouldTriggerHintsFlow = result.shouldTriggerHintsFlow;
|
|
83
|
+
hintsExecutionCompletion = result.hintsExecutionCompletion;
|
|
84
|
+
triggerHintsFlowSpan?.end({
|
|
85
|
+
output: result,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
if (shouldTriggerHintsFlow && hintsExecutionCompletion) {
|
|
89
|
+
const toolCalls = hintsExecutionCompletion?.tool_calls || [];
|
|
90
|
+
for (const i in toolCalls) {
|
|
91
|
+
const currentToolCall = toolCalls[i];
|
|
92
|
+
const code = await actions.executeAction(currentToolCall.function.name, {
|
|
93
|
+
...JSON.parse(currentToolCall.function.arguments),
|
|
94
|
+
elementAnnotation,
|
|
95
|
+
}, trace);
|
|
96
|
+
if (code) {
|
|
97
|
+
generatedCodeSteps.push(code);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
if (actions.isStuckInLoop()) {
|
|
101
|
+
throw new Error("Agent is not able to figure out next action when using hints");
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
const browserAction = await (0, browsing_1.executeTaskUsingBrowsingAgent)({
|
|
106
|
+
trace,
|
|
107
|
+
action: output.action,
|
|
108
|
+
page,
|
|
109
|
+
llm,
|
|
110
|
+
actions,
|
|
111
|
+
});
|
|
112
|
+
if (browserAction) {
|
|
113
|
+
output.action = browserAction.action;
|
|
114
|
+
if (browserAction.code) {
|
|
115
|
+
generatedCodeSteps.push(browserAction.code);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return {
|
|
120
|
+
generatedCodeSteps,
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
exports.executeBrowserAction = executeBrowserAction;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
+
import { PlaywrightActions } from "../../actions";
|
|
3
|
+
export declare function executeSkillAction({ actions, nextAction, trace, }: {
|
|
4
|
+
actions: PlaywrightActions;
|
|
5
|
+
nextAction: {
|
|
6
|
+
actionType: string;
|
|
7
|
+
toolCallArgs: string;
|
|
8
|
+
};
|
|
9
|
+
trace: TraceClient | undefined;
|
|
10
|
+
}): Promise<string | undefined>;
|
|
11
|
+
//# sourceMappingURL=execute-skill-action.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"execute-skill-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/execute-skill-action.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEhD,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAOlD,wBAAsB,kBAAkB,CAAC,EACvC,OAAO,EACP,UAAU,EACV,KAAK,GACN,EAAE;IACD,OAAO,EAAE,iBAAiB,CAAC;IAC3B,UAAU,EAAE;QACV,UAAU,EAAE,MAAM,CAAC;QACnB,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,KAAK,EAAE,WAAW,GAAG,SAAS,CAAC;CAChC,+BAeA"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.executeSkillAction = void 0;
|
|
4
|
+
const skill_1 = require("../../actions/skill");
|
|
5
|
+
const errors_1 = require("../../errors");
|
|
6
|
+
const human_in_the_loop_1 = require("../../human-in-the-loop");
|
|
7
|
+
const env_1 = require("../../utils/env");
|
|
8
|
+
const utils_1 = require("../utils");
|
|
9
|
+
async function executeSkillAction({ actions, nextAction, trace, }) {
|
|
10
|
+
const args = (0, utils_1.parseJson)(nextAction.toolCallArgs);
|
|
11
|
+
if (!(0, env_1.isRunningOnCloud)()) {
|
|
12
|
+
const { skillDetails } = (0, skill_1.extractSkillFromArgs)(args);
|
|
13
|
+
const feedback = await human_in_the_loop_1.humanLoop.getFeedback({
|
|
14
|
+
message: `Should I use this skill?\n\n${skillDetails?.usageExample}\n\nfrom: ${skillDetails?.filePath}\n\nAnswer with "yes" or "no"`,
|
|
15
|
+
});
|
|
16
|
+
if (feedback.toLowerCase().trim().includes("no")) {
|
|
17
|
+
throw new errors_1.HumanApprovalDenied("Skill usage approval not provided");
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
const code = await actions.executeAction(nextAction.actionType, args, trace);
|
|
21
|
+
if (code) {
|
|
22
|
+
return code;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
exports.executeSkillAction = executeSkillAction;
|
|
@@ -1,22 +1,19 @@
|
|
|
1
1
|
import { LLM, TraceClient } from "@empiricalrun/llm";
|
|
2
|
+
import { TestGenConfigOptions } from "@empiricalrun/shared-types";
|
|
2
3
|
import { Page } from "playwright";
|
|
3
4
|
import { PlaywrightActions } from "../../actions";
|
|
4
5
|
import { CustomLogger } from "../../bin/logger";
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
export declare function getNextAction({ page, pageScreenshot, task, executedActions, failedActions, trace, llm, options, actions, disableSkills, logger, }: {
|
|
7
|
+
page: Page;
|
|
8
|
+
pageScreenshot: string[];
|
|
8
9
|
task: string;
|
|
9
10
|
executedActions: string[];
|
|
10
11
|
failedActions: any[];
|
|
11
|
-
pageUrl: string;
|
|
12
12
|
trace?: TraceClient;
|
|
13
13
|
llm?: LLM;
|
|
14
|
-
options?:
|
|
15
|
-
pageScreenshot: string[];
|
|
14
|
+
options?: Partial<TestGenConfigOptions>;
|
|
16
15
|
actions: PlaywrightActions;
|
|
17
|
-
actionTypes: typeof ActionType;
|
|
18
16
|
disableSkills: boolean;
|
|
19
|
-
page: Page;
|
|
20
17
|
logger?: CustomLogger;
|
|
21
18
|
}): Promise<{
|
|
22
19
|
actionType: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"next-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/next-action.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"next-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/next-action.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAClE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAElD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAWhD,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,cAAc,EACd,IAAI,EACJ,eAAe,EACf,aAAa,EACb,KAAK,EACL,GAAG,EACH,OAAO,EACP,OAAO,EACP,aAAa,EACb,MAAM,GACP,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACxC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CACP;IACE,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;CACtB,GACD,SAAS,CACZ,CA8FA"}
|
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.getNextAction = void 0;
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
|
-
const vision_1 = require("@empiricalrun/llm/vision");
|
|
6
5
|
const skill_1 = require("../../actions/skill");
|
|
7
6
|
const constants_1 = require("../../constants");
|
|
7
|
+
const promptTemplate_0 = "{{#section \"system\"}}\nYou are a web automation tool which is given a task to complete. You need to execute the\ntask provided to you with the help of web page screenshot, a browser automation tool or skills\nwhich are learnt while writing previous tests. \n \nBrowser automation tool is a tool which uses Playwright and browser to execute action using\nnext_action tool call.\nSkill usage is a tool which helps to execute previously known pieces of code to achieve a task.\n\nYou will be provided with a screenshot of the webpage which you will use to extract the action\nthat needs to be taken.\n\nYou will be provided with previously executed actions by the browser automation tool and based\non the current screenshot and previously executed actions, you need to predict the next action\nto be taken.\n\nYou will also be provided with failed next action predicted by you, so that you can avoid\nsuggesting the same action again - which failed.\n\nThe next action should be as atomic as possible. e.g: scroll, click on an element, fill an input\nelement, assert, extract text from an element are valid next action as they are atomic in nature.\n\nYou also need to provide the action type using the list below, action type which is not present in\nthe list is invalid: {{validActionTypes}}\n\nYou will also be provided with skill usage tool which you can use to execute action. These skills\nare compound functions which helps you to complete your action.\n\nYou need to respond with either:\n- Next action to be taken by a browser automation tool \n- Use previously learnt skills in the form of tool call.\n \nYou need to make a decision whether the given skill can be reused if \"YES\" respond with the\nskill else respond with the next action.\n{{/section}}\n\n{{#section \"user\"}}\nTask:\n{{task}}\n\n-----\n\nPrevious executed actions:\n{{executedActions}}\n\n-----\n\nPrevious failed actions:\n{{failedActions}}\n\n-----\n\nYou are also provided with a page screenshot for you to decide the next action.\n\nCurrent page URL: {{pageUrl}}\n\nFollow the instructions before responding:\n- Divide the task into sub tasks\n- Using previously executed actions, identify tasks are complete and which tasks needs to be executed next.\n- You will be provided a skill usage action, if the testStep matches the next action then respond with the skill usage.\n- If responding with next action, ensure next action to be detailed and explicit about what action needs to be done. Provide all the information which can be extracted from the screenshot as a part of next action.\n- Mark task as complete only when executed actions provided to you indicates that the task is done.\n- Refer to the text and references available in the screenshot to create the next action.\n- Do not take any extra actions which are not required for the execution of the task\n- If there are no further actions required based on the task, then respond with task as done.\n- Do not recommend actions which are not available in the screenshot\n\nScreenshots:\n{{images pageScreenshots}}\n\n{{/section}}\n";
|
|
8
8
|
const utils_1 = require("../utils");
|
|
9
9
|
const action_tool_calls_1 = require("./action-tool-calls");
|
|
10
10
|
const scroller_1 = require("./scroller");
|
|
11
|
-
async function getNextAction({ task, executedActions, failedActions,
|
|
11
|
+
async function getNextAction({ page, pageScreenshot, task, executedActions, failedActions, trace, llm, options, actions, disableSkills, logger, }) {
|
|
12
|
+
const pageUrl = page.url();
|
|
12
13
|
const nextActionSpan = trace?.span({
|
|
13
14
|
name: "master-agent-next-action",
|
|
14
15
|
input: {
|
|
@@ -22,96 +23,18 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
|
|
|
22
23
|
skills: skill_1.testCaseSkills.getAvailableSkills(),
|
|
23
24
|
},
|
|
24
25
|
});
|
|
25
|
-
const
|
|
26
|
-
|
|
26
|
+
const messages = (0, llm_1.compilePrompt)(promptTemplate_0, {
|
|
27
|
+
validActionTypes: Object.values(action_tool_calls_1.ActionType).join(", "),
|
|
28
|
+
task,
|
|
29
|
+
executedActions: executedActions.map((a) => a).join("\n"),
|
|
30
|
+
failedActions: failedActions.map((a) => a).join("\n"),
|
|
31
|
+
pageUrl,
|
|
32
|
+
pageScreenshots: pageScreenshot,
|
|
27
33
|
});
|
|
28
|
-
const screenshotsForPrompt = pageScreenshot.map((screenshot) => {
|
|
29
|
-
return {
|
|
30
|
-
type: "image_url",
|
|
31
|
-
image_url: {
|
|
32
|
-
url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, screenshot),
|
|
33
|
-
},
|
|
34
|
-
};
|
|
35
|
-
});
|
|
36
|
-
const systemMessage = {
|
|
37
|
-
role: "system",
|
|
38
|
-
content: `You are a web automation tool which is given a task to complete. You need to execute the task provided to you with the help of web page screenshot, a browser automation tool or skills which are learnt while writing previous tests.
|
|
39
|
-
|
|
40
|
-
Browser automation tool is a tool which uses Playwright and browser to execute action using next_action tool call.
|
|
41
|
-
Skill usage is a tool which helps to execute previously known pieces of code to achieve a task.
|
|
42
|
-
|
|
43
|
-
You will be provided with a screenshot of the webpage which you will use to extract the action that needs to be taken.
|
|
44
|
-
|
|
45
|
-
You will be provided with previously executed actions by the browser automation tool and based on the current screenshot and previously executed actions, you need to predict the next action to be taken.
|
|
46
|
-
|
|
47
|
-
You will also be provided with failed next action predicted by you, so that you can avoid suggesting the same action again - which failed.
|
|
48
|
-
|
|
49
|
-
The next action should be as atomic as possible.
|
|
50
|
-
e.g: scroll, click on an element, fill an input element, assert, extract text from an element are valid next action as they are atomic in nature.
|
|
51
|
-
|
|
52
|
-
You also need to provide the action type using the list below, action type which is not present in the list is invalid.
|
|
53
|
-
${Object.values(actionTypes)}
|
|
54
|
-
|
|
55
|
-
You will also be provided with skill usage tool which you can use to execute action. These skills are compound functions which helps you to complete your action.
|
|
56
|
-
|
|
57
|
-
You need to respond with either:
|
|
58
|
-
- Next action to be taken by a browser automation tool
|
|
59
|
-
- Use previously learnt skills in the form of tool call.
|
|
60
|
-
|
|
61
|
-
You need to make a decision whether the given skill can be reused if "YES" respond with the skill else respond with the next action.`,
|
|
62
|
-
};
|
|
63
|
-
const userMessage = {
|
|
64
|
-
role: "user",
|
|
65
|
-
content: [
|
|
66
|
-
{
|
|
67
|
-
type: "text",
|
|
68
|
-
text: `Task:
|
|
69
|
-
${task}
|
|
70
|
-
|
|
71
|
-
----
|
|
72
|
-
|
|
73
|
-
Previous executed actions:
|
|
74
|
-
${executedActions.map((a) => a).join("\n")}
|
|
75
|
-
|
|
76
|
-
----
|
|
77
|
-
|
|
78
|
-
Previous failed actions:
|
|
79
|
-
${failedActions.map((a) => a).join("\n")}
|
|
80
|
-
|
|
81
|
-
----
|
|
82
|
-
You are also provided with a page screenshot for you to decide the next action.
|
|
83
|
-
|
|
84
|
-
Current page URL: ${pageUrl}
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
Follow the instructions before responding:
|
|
88
|
-
- Divide the task into sub tasks
|
|
89
|
-
- Using previously executed actions, identify tasks are complete and which tasks needs to be executed next.
|
|
90
|
-
- You will be provided a skill usage action, if the testStep matches the next action then respond with the skill usage.
|
|
91
|
-
- If responding with next action, ensure next action to be detailed and explicit about what action needs to be done. Provide all the information which can be extracted from the screenshot as a part of next action.
|
|
92
|
-
- Mark task as complete only when executed actions provided to you indicates that the task is done.
|
|
93
|
-
- Refer to the text and references available in the screenshot to create the next action.
|
|
94
|
-
- Do not take any extra actions which are not required for the execution of the task
|
|
95
|
-
- If there are no further actions required based on the task, then respond with task as done.
|
|
96
|
-
- Do not recommend actions which are not available in the screenshot
|
|
97
|
-
`,
|
|
98
|
-
},
|
|
99
|
-
{
|
|
100
|
-
type: "text",
|
|
101
|
-
text: pageScreenshot.length > 1 ? "Screenshots:" : "Screenshot:",
|
|
102
|
-
},
|
|
103
|
-
...screenshotsForPrompt,
|
|
104
|
-
],
|
|
105
|
-
};
|
|
106
|
-
const messages = [
|
|
107
|
-
systemMessage,
|
|
108
|
-
userMessage,
|
|
109
|
-
];
|
|
110
34
|
const actionSchemas = disableSkills || skill_1.testCaseSkills.getAvailableSkills().length === 0
|
|
111
35
|
? []
|
|
112
36
|
: actions.getMasterActionSchemas();
|
|
113
37
|
const tools = [...(0, action_tool_calls_1.getActionToolCalls)(), ...actionSchemas];
|
|
114
|
-
promptSpan?.end({ output: { messages } });
|
|
115
38
|
llm =
|
|
116
39
|
llm ||
|
|
117
40
|
new llm_1.LLM({
|
|
@@ -154,13 +77,11 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
|
|
|
154
77
|
task,
|
|
155
78
|
executedActions,
|
|
156
79
|
failedActions,
|
|
157
|
-
pageUrl,
|
|
158
80
|
trace,
|
|
159
81
|
llm,
|
|
160
82
|
options,
|
|
161
83
|
pageScreenshot: frames.map((frame) => frame.frameScreenshot),
|
|
162
84
|
actions,
|
|
163
|
-
actionTypes,
|
|
164
85
|
disableSkills,
|
|
165
86
|
page,
|
|
166
87
|
logger,
|
|
@@ -169,7 +90,7 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
|
|
|
169
90
|
}
|
|
170
91
|
if (maxScrollRetries === -1) {
|
|
171
92
|
return {
|
|
172
|
-
actionType:
|
|
93
|
+
actionType: action_tool_calls_1.ActionType.UNKNOWN,
|
|
173
94
|
toolCallArgs: "",
|
|
174
95
|
};
|
|
175
96
|
}
|
|
@@ -179,6 +100,5 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
|
|
|
179
100
|
toolCallArgs: toolCall?.function.arguments,
|
|
180
101
|
};
|
|
181
102
|
}
|
|
182
|
-
return;
|
|
183
103
|
}
|
|
184
104
|
exports.getNextAction = getNextAction;
|
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
import type { TestCase } from "@empiricalrun/shared-types";
|
|
1
|
+
import type { TestCase, TestGenConfigOptions } from "@empiricalrun/shared-types";
|
|
2
2
|
import { Page } from "playwright";
|
|
3
3
|
import { ScopeVars } from "../../types";
|
|
4
|
-
import { BrowsingAgentOptions } from "../browsing";
|
|
5
4
|
export declare function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }: {
|
|
6
5
|
task: string;
|
|
7
6
|
page: Page;
|
|
8
7
|
testCase?: TestCase;
|
|
9
8
|
specPath?: string;
|
|
10
|
-
options:
|
|
9
|
+
options: Partial<TestGenConfigOptions>;
|
|
11
10
|
scopeVars?: ScopeVars;
|
|
12
11
|
}): Promise<{
|
|
13
12
|
code: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAyBxC,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACvC,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAoRA"}
|