@empiricalrun/test-gen 0.35.4 → 0.35.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/browser-injected-scripts/annotate-elements.spec.ts +258 -0
- package/dist/agent/browsing/index.js +2 -2
- package/dist/agent/codegen/run.d.ts +8 -0
- package/dist/agent/codegen/run.d.ts.map +1 -1
- package/dist/agent/codegen/run.js +38 -33
- package/dist/agent/codegen/update-flow.d.ts +9 -0
- package/dist/agent/codegen/update-flow.d.ts.map +1 -1
- package/dist/agent/codegen/update-flow.js +44 -40
- package/dist/agent/master/run.js +1 -1
- package/dist/agent/verification/index.js +1 -1
- package/dist/browser-injected-scripts/annotate-elements.spec.ts +258 -0
- package/dist/evals/add-scenario-agent.evals.d.ts +4 -0
- package/dist/evals/add-scenario-agent.evals.d.ts.map +1 -0
- package/dist/evals/add-scenario-agent.evals.js +23 -0
- package/dist/evals/update-scenario-agent.evals.d.ts +4 -0
- package/dist/evals/update-scenario-agent.evals.d.ts.map +1 -0
- package/dist/evals/update-scenario-agent.evals.js +49 -0
- package/package.json +5 -2
- package/playwright.config.ts +5 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.35.6
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 948f534: fix: update verification agent prompt
|
|
8
|
+
|
|
9
|
+
## 0.35.5
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- 8e18e5b: feat: add scenario code agent evals
|
|
14
|
+
- d6f9de2: fix: add tests for annotation script
|
|
15
|
+
|
|
3
16
|
## 0.35.4
|
|
4
17
|
|
|
5
18
|
### Patch Changes
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
2
|
+
|
|
3
|
+
import { test } from "@playwright/test";
|
|
4
|
+
import path from "path";
|
|
5
|
+
|
|
6
|
+
test("should annotate all links on empirical landing page", async ({
|
|
7
|
+
page,
|
|
8
|
+
}) => {
|
|
9
|
+
await page.goto(
|
|
10
|
+
"https://assets-test.empirical.run/selector-hints-testing/dom-1.html",
|
|
11
|
+
);
|
|
12
|
+
|
|
13
|
+
await page.addScriptTag({
|
|
14
|
+
path: path.resolve(__dirname, "./annotate-elements.js"),
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
const annotations = await page.evaluate(() => {
|
|
18
|
+
const { annotations } = window.annotateClickableElements();
|
|
19
|
+
|
|
20
|
+
return Object.entries(annotations).map(([hint, config]) => ({
|
|
21
|
+
hint,
|
|
22
|
+
innerText: config.node.innerText,
|
|
23
|
+
tagName: config.node.tagName,
|
|
24
|
+
href: config.node.href,
|
|
25
|
+
}));
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test.expect(annotations).toEqual([
|
|
29
|
+
{
|
|
30
|
+
hint: "A",
|
|
31
|
+
innerText: "Empirical",
|
|
32
|
+
tagName: "A",
|
|
33
|
+
href: "https://assets-test.empirical.run/",
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
hint: "B",
|
|
37
|
+
innerText: "Blog",
|
|
38
|
+
tagName: "A",
|
|
39
|
+
href: "https://assets-test.empirical.run/blog",
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
hint: "C",
|
|
43
|
+
innerText: "Contact us",
|
|
44
|
+
tagName: "A",
|
|
45
|
+
href: "https://assets-test.empirical.run/contact",
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
hint: "D",
|
|
49
|
+
href: "https://dash.empirical.run/",
|
|
50
|
+
innerText: "Login ↗\n(opens in a new tab)",
|
|
51
|
+
tagName: "A",
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
hint: "E",
|
|
55
|
+
innerText: "Get early access",
|
|
56
|
+
tagName: "A",
|
|
57
|
+
href: "https://assets-test.empirical.run/contact",
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
hint: "F",
|
|
61
|
+
innerText: "Playwright\n(opens in a new tab)",
|
|
62
|
+
tagName: "A",
|
|
63
|
+
href: "https://github.com/microsoft/playwright",
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
hint: "G",
|
|
67
|
+
innerText: "Meet with us",
|
|
68
|
+
tagName: "A",
|
|
69
|
+
href: "https://assets-test.empirical.run/contact",
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
hint: "H",
|
|
73
|
+
innerText: "Privacy Policy",
|
|
74
|
+
tagName: "A",
|
|
75
|
+
href: "https://assets-test.empirical.run/privacy.html",
|
|
76
|
+
},
|
|
77
|
+
]);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
test("should annotate all important items on quizizz page", async ({
|
|
81
|
+
page,
|
|
82
|
+
}) => {
|
|
83
|
+
await page.goto(
|
|
84
|
+
"https://assets-test.empirical.run/selector-hints-testing/dom-2/index.html",
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
await page.addScriptTag({
|
|
88
|
+
path: path.resolve(__dirname, "./annotate-elements.js"),
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
const annotations = await page.evaluate(() => {
|
|
92
|
+
const { annotations } = window.annotateClickableElements();
|
|
93
|
+
|
|
94
|
+
return Object.entries(annotations).map(([hint, config]) => ({
|
|
95
|
+
hint,
|
|
96
|
+
innerText: config.node.innerText.toLowerCase().trim(),
|
|
97
|
+
tagName: config.node.tagName,
|
|
98
|
+
testId: config.node.getAttribute("data-testid"),
|
|
99
|
+
href: config.node.href,
|
|
100
|
+
}));
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
test
|
|
104
|
+
.expect(annotations.find((item) => item.innerText.includes("enter code")))
|
|
105
|
+
.toBeTruthy();
|
|
106
|
+
test
|
|
107
|
+
.expect(annotations.find((item) => item.innerText.includes("get help")))
|
|
108
|
+
.toBeTruthy();
|
|
109
|
+
test
|
|
110
|
+
.expect(
|
|
111
|
+
annotations.find(
|
|
112
|
+
(item) =>
|
|
113
|
+
item.innerText.includes("create") &&
|
|
114
|
+
item.testId === "create-content-button",
|
|
115
|
+
),
|
|
116
|
+
)
|
|
117
|
+
.toBeTruthy();
|
|
118
|
+
test
|
|
119
|
+
.expect(
|
|
120
|
+
annotations.find(
|
|
121
|
+
(item) =>
|
|
122
|
+
item.innerText.includes("explore") &&
|
|
123
|
+
item.href === "https://quizizz.com/admin",
|
|
124
|
+
),
|
|
125
|
+
)
|
|
126
|
+
.toBeTruthy();
|
|
127
|
+
test
|
|
128
|
+
.expect(
|
|
129
|
+
annotations.find(
|
|
130
|
+
(item) =>
|
|
131
|
+
item.innerText.includes("library") &&
|
|
132
|
+
item.href === "https://quizizz.com/admin/my-library/createdByMe",
|
|
133
|
+
),
|
|
134
|
+
)
|
|
135
|
+
.toBeTruthy();
|
|
136
|
+
test
|
|
137
|
+
.expect(
|
|
138
|
+
annotations.find(
|
|
139
|
+
(item) =>
|
|
140
|
+
item.innerText.includes("reports") &&
|
|
141
|
+
item.href === "https://quizizz.com/admin/reports",
|
|
142
|
+
),
|
|
143
|
+
)
|
|
144
|
+
.toBeTruthy();
|
|
145
|
+
test
|
|
146
|
+
.expect(
|
|
147
|
+
annotations.find(
|
|
148
|
+
(item) =>
|
|
149
|
+
item.innerText.includes("classes") &&
|
|
150
|
+
item.href === "https://quizizz.com/admin/classes",
|
|
151
|
+
),
|
|
152
|
+
)
|
|
153
|
+
.toBeTruthy();
|
|
154
|
+
test
|
|
155
|
+
.expect(
|
|
156
|
+
annotations.find(
|
|
157
|
+
(item) =>
|
|
158
|
+
item.innerText.includes("accommodations") &&
|
|
159
|
+
item.href ===
|
|
160
|
+
"https://quizizz.com/admin/differentiation/accommodations",
|
|
161
|
+
),
|
|
162
|
+
)
|
|
163
|
+
.toBeTruthy();
|
|
164
|
+
test
|
|
165
|
+
.expect(
|
|
166
|
+
annotations.find(
|
|
167
|
+
(item) =>
|
|
168
|
+
item.innerText.includes("quizizz ai") &&
|
|
169
|
+
item.href === "https://quizizz.com/admin/quizizz-ai",
|
|
170
|
+
),
|
|
171
|
+
)
|
|
172
|
+
.toBeTruthy();
|
|
173
|
+
test
|
|
174
|
+
.expect(
|
|
175
|
+
annotations.find(
|
|
176
|
+
(item) =>
|
|
177
|
+
item.innerText.includes("start your free trial") &&
|
|
178
|
+
item.href === "https://quizizz.com/super-pricing",
|
|
179
|
+
),
|
|
180
|
+
)
|
|
181
|
+
.toBeTruthy();
|
|
182
|
+
test
|
|
183
|
+
.expect(
|
|
184
|
+
annotations.find(
|
|
185
|
+
(item) =>
|
|
186
|
+
item.innerText.includes("upgrade") &&
|
|
187
|
+
item.href === "https://quizizz.com/super-pricing?backto=/admin",
|
|
188
|
+
),
|
|
189
|
+
)
|
|
190
|
+
.toBeTruthy();
|
|
191
|
+
|
|
192
|
+
test
|
|
193
|
+
.expect(
|
|
194
|
+
annotations.find(
|
|
195
|
+
(item) =>
|
|
196
|
+
item.tagName === "INPUT" &&
|
|
197
|
+
item.testId === "emphasized-search-bar-input",
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
.toBeTruthy();
|
|
201
|
+
|
|
202
|
+
test
|
|
203
|
+
.expect(
|
|
204
|
+
annotations.find(
|
|
205
|
+
(item) =>
|
|
206
|
+
item.tagName === "BUTTON" &&
|
|
207
|
+
item.innerText.includes("verify details") &&
|
|
208
|
+
item.testId === "verify-profile-cta",
|
|
209
|
+
),
|
|
210
|
+
)
|
|
211
|
+
.toBeTruthy();
|
|
212
|
+
|
|
213
|
+
test
|
|
214
|
+
.expect(
|
|
215
|
+
annotations.find(
|
|
216
|
+
(item) =>
|
|
217
|
+
item.tagName === "BUTTON" && item.innerText.includes("for you"),
|
|
218
|
+
),
|
|
219
|
+
)
|
|
220
|
+
.toBeTruthy();
|
|
221
|
+
|
|
222
|
+
test
|
|
223
|
+
.expect(
|
|
224
|
+
annotations.find(
|
|
225
|
+
(item) =>
|
|
226
|
+
item.tagName === "BUTTON" && item.innerText.includes("assessments"),
|
|
227
|
+
),
|
|
228
|
+
)
|
|
229
|
+
.toBeTruthy();
|
|
230
|
+
|
|
231
|
+
test
|
|
232
|
+
.expect(
|
|
233
|
+
annotations.find(
|
|
234
|
+
(item) =>
|
|
235
|
+
item.tagName === "BUTTON" && item.innerText.includes("lessons"),
|
|
236
|
+
),
|
|
237
|
+
)
|
|
238
|
+
.toBeTruthy();
|
|
239
|
+
|
|
240
|
+
test
|
|
241
|
+
.expect(
|
|
242
|
+
annotations.find(
|
|
243
|
+
(item) =>
|
|
244
|
+
item.tagName === "BUTTON" &&
|
|
245
|
+
item.innerText.includes("interactive videos"),
|
|
246
|
+
),
|
|
247
|
+
)
|
|
248
|
+
.toBeTruthy();
|
|
249
|
+
|
|
250
|
+
test
|
|
251
|
+
.expect(
|
|
252
|
+
annotations.find(
|
|
253
|
+
(item) =>
|
|
254
|
+
item.tagName === "BUTTON" && item.innerText.includes("passages"),
|
|
255
|
+
),
|
|
256
|
+
)
|
|
257
|
+
.toBeTruthy();
|
|
258
|
+
});
|
|
@@ -101,7 +101,7 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
|
|
|
101
101
|
await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments), toolCallsSpan);
|
|
102
102
|
executedActions.push({
|
|
103
103
|
isError: false,
|
|
104
|
-
action: JSON.
|
|
104
|
+
action: JSON.parse(toolCall.function.arguments)?.reason,
|
|
105
105
|
});
|
|
106
106
|
lastActionExecTrace = "";
|
|
107
107
|
}
|
|
@@ -109,7 +109,7 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
|
|
|
109
109
|
// TODO: implement feedback loop to llm
|
|
110
110
|
executedActions.push({
|
|
111
111
|
isError: true,
|
|
112
|
-
action: JSON.
|
|
112
|
+
action: JSON.parse(toolCall.function.arguments)?.reason,
|
|
113
113
|
});
|
|
114
114
|
lastActionExecTrace = e.message;
|
|
115
115
|
void testgenUpdatesReporter.sendMessage(e.message);
|
|
@@ -1,4 +1,12 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
2
|
import { TestCase, TestGenConfigOptions } from "../../types";
|
|
3
|
+
export declare function getAddScenarioCompletion({ testCase, testFiles, pageFiles, testFilePath, trace, options, }: {
|
|
4
|
+
testCase: TestCase;
|
|
5
|
+
testFiles: string;
|
|
6
|
+
pageFiles: string;
|
|
7
|
+
testFilePath: string;
|
|
8
|
+
trace?: TraceClient;
|
|
9
|
+
options?: TestGenConfigOptions;
|
|
10
|
+
}): Promise<string>;
|
|
3
11
|
export declare function generateTest(testCase: TestCase, file: string, options: TestGenConfigOptions, trace?: TraceClient): Promise<TestCase[]>;
|
|
4
12
|
//# sourceMappingURL=run.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkC,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAkBhF,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,QAAQ,EAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkC,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAkBhF,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,wBAAsB,wBAAwB,CAAC,EAC7C,QAAQ,EACR,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,mBA4BA;AAED,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAgFrB"}
|
|
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.generateTest = void 0;
|
|
6
|
+
exports.generateTest = exports.getAddScenarioCompletion = void 0;
|
|
7
7
|
const llm_1 = require("@empiricalrun/llm");
|
|
8
8
|
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
9
9
|
const logger_1 = require("../../bin/logger");
|
|
@@ -12,6 +12,35 @@ const web_1 = require("../../bin/utils/platform/web");
|
|
|
12
12
|
const constants_1 = require("../../constants");
|
|
13
13
|
const fix_ts_errors_1 = require("./fix-ts-errors");
|
|
14
14
|
const update_flow_1 = require("./update-flow");
|
|
15
|
+
async function getAddScenarioCompletion({ testCase, testFiles, pageFiles, testFilePath, trace, options, }) {
|
|
16
|
+
const promptSpan = trace?.span({
|
|
17
|
+
name: "add-scenario-prompt",
|
|
18
|
+
});
|
|
19
|
+
const instruction = await (0, llm_1.getPrompt)("add-scenario", {
|
|
20
|
+
testFiles: testFiles,
|
|
21
|
+
pageFiles: pageFiles,
|
|
22
|
+
scenarioName: testCase.name,
|
|
23
|
+
scenarioSteps: testCase.steps.join("\n"),
|
|
24
|
+
scenarioFile: testFilePath,
|
|
25
|
+
});
|
|
26
|
+
promptSpan?.end({ output: { instruction } });
|
|
27
|
+
const llm = new llm_1.LLM({
|
|
28
|
+
trace,
|
|
29
|
+
provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
30
|
+
defaultModel: options?.model || constants_1.DEFAULT_MODEL,
|
|
31
|
+
providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
|
|
32
|
+
});
|
|
33
|
+
const firstShotMessage = await llm.createChatCompletion({
|
|
34
|
+
messages: instruction,
|
|
35
|
+
modelParameters: {
|
|
36
|
+
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
37
|
+
...options?.modelParameters,
|
|
38
|
+
},
|
|
39
|
+
});
|
|
40
|
+
let response = firstShotMessage?.content || "";
|
|
41
|
+
return response;
|
|
42
|
+
}
|
|
43
|
+
exports.getAddScenarioCompletion = getAddScenarioCompletion;
|
|
15
44
|
async function generateTest(testCase, file, options, trace) {
|
|
16
45
|
const logger = new logger_1.CustomLogger();
|
|
17
46
|
if (!fs_extra_1.default.existsSync(file)) {
|
|
@@ -35,43 +64,19 @@ async function generateTest(testCase, file, options, trace) {
|
|
|
35
64
|
name: "create-test",
|
|
36
65
|
input: {
|
|
37
66
|
testCase,
|
|
38
|
-
|
|
39
|
-
|
|
67
|
+
testFiles: codePrompt,
|
|
68
|
+
pageFiles: pomPrompt,
|
|
69
|
+
testFilePath: file,
|
|
40
70
|
},
|
|
41
71
|
});
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
output: {
|
|
45
|
-
codePrompt,
|
|
46
|
-
pomPrompt,
|
|
47
|
-
testFileContent,
|
|
48
|
-
},
|
|
49
|
-
});
|
|
50
|
-
const promptSpan = createTestSpan?.span({
|
|
51
|
-
name: "add-scenario-prompt",
|
|
52
|
-
});
|
|
53
|
-
const instruction = await (0, llm_1.getPrompt)("add-scenario", {
|
|
72
|
+
const response = await getAddScenarioCompletion({
|
|
73
|
+
testCase,
|
|
54
74
|
testFiles: codePrompt,
|
|
55
75
|
pageFiles: pomPrompt,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
});
|
|
60
|
-
promptSpan?.end({ output: { instruction } });
|
|
61
|
-
const llm = new llm_1.LLM({
|
|
62
|
-
trace,
|
|
63
|
-
provider: options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
64
|
-
defaultModel: options.model || constants_1.DEFAULT_MODEL,
|
|
65
|
-
providerApiKey: constants_1.MODEL_API_KEYS[options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
|
|
66
|
-
});
|
|
67
|
-
const firstShotMessage = await llm.createChatCompletion({
|
|
68
|
-
messages: instruction,
|
|
69
|
-
modelParameters: {
|
|
70
|
-
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
71
|
-
...options.modelParameters,
|
|
72
|
-
},
|
|
76
|
+
testFilePath: file,
|
|
77
|
+
trace: createTestSpan,
|
|
78
|
+
options,
|
|
73
79
|
});
|
|
74
|
-
let response = firstShotMessage?.content || "";
|
|
75
80
|
logger.success("Test generated successfully!");
|
|
76
81
|
const readWriteFileSpan = trace?.span({ name: "write-to-file" });
|
|
77
82
|
let contents = fs_extra_1.default.readFileSync(file, "utf-8");
|
|
@@ -3,6 +3,15 @@ import { TestCase, TestGenConfigOptions } from "../../types";
|
|
|
3
3
|
type UpdatedTestCase = TestCase & {
|
|
4
4
|
updatedFiles: string[];
|
|
5
5
|
};
|
|
6
|
+
export declare function getUpdateTestCodeCompletion({ testCase, testFileContent, testFiles, pageFiles, testFilePath, trace, options, }: {
|
|
7
|
+
testCase: TestCase;
|
|
8
|
+
testFiles: string;
|
|
9
|
+
pageFiles: string;
|
|
10
|
+
testFilePath: string;
|
|
11
|
+
testFileContent: string;
|
|
12
|
+
trace?: TraceClient;
|
|
13
|
+
options?: TestGenConfigOptions;
|
|
14
|
+
}): Promise<string>;
|
|
6
15
|
export declare function updateTest(testCase: TestCase, file: string, options: TestGenConfigOptions | undefined, logging?: boolean, validate?: boolean, trace?: TraceClient): Promise<UpdatedTestCase[]>;
|
|
7
16
|
export declare function appendCreateTestBlock({ testCase, file, options, trace, validateTypes, }: {
|
|
8
17
|
testCase: TestCase;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAsB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAsB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CAAC,MAAM,CAAC,CA6ClB;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CA6D5B;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA+E7B"}
|
|
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.appendCreateTestBlock = exports.updateTest = void 0;
|
|
6
|
+
exports.appendCreateTestBlock = exports.updateTest = exports.getUpdateTestCodeCompletion = void 0;
|
|
7
7
|
const llm_1 = require("@empiricalrun/llm");
|
|
8
8
|
const crypto_1 = __importDefault(require("crypto"));
|
|
9
9
|
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
@@ -103,41 +103,8 @@ async function applyFileChanges({ validateTypes = true, trace, testCase, fileCha
|
|
|
103
103
|
logger.success(`${fileChange.filePath} file formatted successfully!`);
|
|
104
104
|
}));
|
|
105
105
|
}
|
|
106
|
-
async function
|
|
107
|
-
const
|
|
108
|
-
const context = await (0, context_1.contextForGeneration)(file);
|
|
109
|
-
const { codePrompt, pomPrompt, testFileContent } = context;
|
|
110
|
-
const generatedTestCases = [];
|
|
111
|
-
logger.logEmptyLine();
|
|
112
|
-
const session = (0, session_1.getSessionDetails)();
|
|
113
|
-
trace =
|
|
114
|
-
trace ||
|
|
115
|
-
llm_1.langfuseInstance?.trace({
|
|
116
|
-
name: "update-test",
|
|
117
|
-
id: crypto_1.default.randomUUID(),
|
|
118
|
-
release: session.version,
|
|
119
|
-
tags: [
|
|
120
|
-
options?.metadata.projectName || "",
|
|
121
|
-
options?.metadata.environment || "",
|
|
122
|
-
].filter((s) => !!s),
|
|
123
|
-
});
|
|
124
|
-
const updateTestSpan = trace?.span({
|
|
125
|
-
name: "update-test",
|
|
126
|
-
input: {
|
|
127
|
-
testCase,
|
|
128
|
-
file,
|
|
129
|
-
options,
|
|
130
|
-
},
|
|
131
|
-
});
|
|
132
|
-
updateTestSpan?.event({
|
|
133
|
-
name: "collate-files-as-text",
|
|
134
|
-
output: {
|
|
135
|
-
codePrompt,
|
|
136
|
-
pomPrompt,
|
|
137
|
-
testFileContent,
|
|
138
|
-
},
|
|
139
|
-
});
|
|
140
|
-
const promptSpan = updateTestSpan?.span({
|
|
106
|
+
async function getUpdateTestCodeCompletion({ testCase, testFileContent, testFiles, pageFiles, testFilePath, trace, options, }) {
|
|
107
|
+
const promptSpan = trace?.span({
|
|
141
108
|
name: "update-scenario-prompt",
|
|
142
109
|
});
|
|
143
110
|
const promptName = "update-scenario";
|
|
@@ -154,16 +121,16 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
|
|
|
154
121
|
suites: testCase?.suites || [],
|
|
155
122
|
});
|
|
156
123
|
const instruction = await (0, llm_1.getPrompt)(promptName, {
|
|
157
|
-
testFiles:
|
|
158
|
-
pageFiles:
|
|
124
|
+
testFiles: testFiles,
|
|
125
|
+
pageFiles: pageFiles,
|
|
159
126
|
scenarioName,
|
|
160
127
|
scenarioSteps: testCase.steps.join("\n"),
|
|
161
|
-
scenarioFile:
|
|
128
|
+
scenarioFile: testFilePath,
|
|
162
129
|
currentScenarioCodeBlock,
|
|
163
130
|
});
|
|
164
131
|
promptSpan?.end({ output: { instruction } });
|
|
165
132
|
const llm = new llm_1.LLM({
|
|
166
|
-
trace
|
|
133
|
+
trace,
|
|
167
134
|
provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
168
135
|
defaultModel: options?.model || constants_1.DEFAULT_MODEL,
|
|
169
136
|
providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
|
|
@@ -176,6 +143,43 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
|
|
|
176
143
|
},
|
|
177
144
|
});
|
|
178
145
|
let response = firstShotMessage?.content || "";
|
|
146
|
+
return response;
|
|
147
|
+
}
|
|
148
|
+
exports.getUpdateTestCodeCompletion = getUpdateTestCodeCompletion;
|
|
149
|
+
async function updateTest(testCase, file, options, logging = true, validate = true, trace) {
|
|
150
|
+
const logger = new logger_1.CustomLogger({ useReporter: logging });
|
|
151
|
+
const context = await (0, context_1.contextForGeneration)(file);
|
|
152
|
+
const { codePrompt, pomPrompt, testFileContent } = context;
|
|
153
|
+
const generatedTestCases = [];
|
|
154
|
+
logger.logEmptyLine();
|
|
155
|
+
const session = (0, session_1.getSessionDetails)();
|
|
156
|
+
trace =
|
|
157
|
+
trace ||
|
|
158
|
+
llm_1.langfuseInstance?.trace({
|
|
159
|
+
name: "update-test",
|
|
160
|
+
id: crypto_1.default.randomUUID(),
|
|
161
|
+
release: session.version,
|
|
162
|
+
tags: [
|
|
163
|
+
options?.metadata.projectName || "",
|
|
164
|
+
options?.metadata.environment || "",
|
|
165
|
+
].filter((s) => !!s),
|
|
166
|
+
});
|
|
167
|
+
const request = {
|
|
168
|
+
testCase,
|
|
169
|
+
testFileContent,
|
|
170
|
+
testFiles: codePrompt,
|
|
171
|
+
pageFiles: pomPrompt,
|
|
172
|
+
testFilePath: file,
|
|
173
|
+
options,
|
|
174
|
+
};
|
|
175
|
+
const updateTestSpan = trace?.span({
|
|
176
|
+
name: "update-test",
|
|
177
|
+
input: request,
|
|
178
|
+
});
|
|
179
|
+
const response = await getUpdateTestCodeCompletion({
|
|
180
|
+
...request,
|
|
181
|
+
trace: updateTestSpan,
|
|
182
|
+
});
|
|
179
183
|
logger.success("Test generated successfully!");
|
|
180
184
|
const fileChanges = (0, utils_1.extractTestUpdates)(response);
|
|
181
185
|
await applyFileChanges({
|
package/dist/agent/master/run.js
CHANGED
|
@@ -40,7 +40,7 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
|
|
|
40
40
|
failedActions: failedActions.map((a) => a).join("\n"),
|
|
41
41
|
executedActions: executedActions.map((a) => a).join("\n"),
|
|
42
42
|
pageUrl,
|
|
43
|
-
},
|
|
43
|
+
}, 18);
|
|
44
44
|
// assuming there is only one user message in the prompt. if there is a change in langfuse prompt format, this will need to be updated
|
|
45
45
|
const userMessage = promptMessages.filter((m) => m.role === "user")[0];
|
|
46
46
|
const systemMessage = promptMessages.filter((m) => m.role === "system")[0];
|
|
@@ -16,7 +16,7 @@ async function verificationAgent({ trace, task, conversation, }) {
|
|
|
16
16
|
const messages = await (0, llm_1.getPrompt)("agent-steps-verification", {
|
|
17
17
|
task,
|
|
18
18
|
conversation: conversation.join("\n"),
|
|
19
|
-
},
|
|
19
|
+
}, 5);
|
|
20
20
|
const llm = new llm_1.LLM({ provider: "openai" });
|
|
21
21
|
const response = await llm.createChatCompletion({
|
|
22
22
|
trace: verificationAgentSpan,
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
2
|
+
|
|
3
|
+
import { test } from "@playwright/test";
|
|
4
|
+
import path from "path";
|
|
5
|
+
|
|
6
|
+
test("should annotate all links on empirical landing page", async ({
|
|
7
|
+
page,
|
|
8
|
+
}) => {
|
|
9
|
+
await page.goto(
|
|
10
|
+
"https://assets-test.empirical.run/selector-hints-testing/dom-1.html",
|
|
11
|
+
);
|
|
12
|
+
|
|
13
|
+
await page.addScriptTag({
|
|
14
|
+
path: path.resolve(__dirname, "./annotate-elements.js"),
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
const annotations = await page.evaluate(() => {
|
|
18
|
+
const { annotations } = window.annotateClickableElements();
|
|
19
|
+
|
|
20
|
+
return Object.entries(annotations).map(([hint, config]) => ({
|
|
21
|
+
hint,
|
|
22
|
+
innerText: config.node.innerText,
|
|
23
|
+
tagName: config.node.tagName,
|
|
24
|
+
href: config.node.href,
|
|
25
|
+
}));
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test.expect(annotations).toEqual([
|
|
29
|
+
{
|
|
30
|
+
hint: "A",
|
|
31
|
+
innerText: "Empirical",
|
|
32
|
+
tagName: "A",
|
|
33
|
+
href: "https://assets-test.empirical.run/",
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
hint: "B",
|
|
37
|
+
innerText: "Blog",
|
|
38
|
+
tagName: "A",
|
|
39
|
+
href: "https://assets-test.empirical.run/blog",
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
hint: "C",
|
|
43
|
+
innerText: "Contact us",
|
|
44
|
+
tagName: "A",
|
|
45
|
+
href: "https://assets-test.empirical.run/contact",
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
hint: "D",
|
|
49
|
+
href: "https://dash.empirical.run/",
|
|
50
|
+
innerText: "Login ↗\n(opens in a new tab)",
|
|
51
|
+
tagName: "A",
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
hint: "E",
|
|
55
|
+
innerText: "Get early access",
|
|
56
|
+
tagName: "A",
|
|
57
|
+
href: "https://assets-test.empirical.run/contact",
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
hint: "F",
|
|
61
|
+
innerText: "Playwright\n(opens in a new tab)",
|
|
62
|
+
tagName: "A",
|
|
63
|
+
href: "https://github.com/microsoft/playwright",
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
hint: "G",
|
|
67
|
+
innerText: "Meet with us",
|
|
68
|
+
tagName: "A",
|
|
69
|
+
href: "https://assets-test.empirical.run/contact",
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
hint: "H",
|
|
73
|
+
innerText: "Privacy Policy",
|
|
74
|
+
tagName: "A",
|
|
75
|
+
href: "https://assets-test.empirical.run/privacy.html",
|
|
76
|
+
},
|
|
77
|
+
]);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
test("should annotate all important items on quizizz page", async ({
|
|
81
|
+
page,
|
|
82
|
+
}) => {
|
|
83
|
+
await page.goto(
|
|
84
|
+
"https://assets-test.empirical.run/selector-hints-testing/dom-2/index.html",
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
await page.addScriptTag({
|
|
88
|
+
path: path.resolve(__dirname, "./annotate-elements.js"),
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
const annotations = await page.evaluate(() => {
|
|
92
|
+
const { annotations } = window.annotateClickableElements();
|
|
93
|
+
|
|
94
|
+
return Object.entries(annotations).map(([hint, config]) => ({
|
|
95
|
+
hint,
|
|
96
|
+
innerText: config.node.innerText.toLowerCase().trim(),
|
|
97
|
+
tagName: config.node.tagName,
|
|
98
|
+
testId: config.node.getAttribute("data-testid"),
|
|
99
|
+
href: config.node.href,
|
|
100
|
+
}));
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
test
|
|
104
|
+
.expect(annotations.find((item) => item.innerText.includes("enter code")))
|
|
105
|
+
.toBeTruthy();
|
|
106
|
+
test
|
|
107
|
+
.expect(annotations.find((item) => item.innerText.includes("get help")))
|
|
108
|
+
.toBeTruthy();
|
|
109
|
+
test
|
|
110
|
+
.expect(
|
|
111
|
+
annotations.find(
|
|
112
|
+
(item) =>
|
|
113
|
+
item.innerText.includes("create") &&
|
|
114
|
+
item.testId === "create-content-button",
|
|
115
|
+
),
|
|
116
|
+
)
|
|
117
|
+
.toBeTruthy();
|
|
118
|
+
test
|
|
119
|
+
.expect(
|
|
120
|
+
annotations.find(
|
|
121
|
+
(item) =>
|
|
122
|
+
item.innerText.includes("explore") &&
|
|
123
|
+
item.href === "https://quizizz.com/admin",
|
|
124
|
+
),
|
|
125
|
+
)
|
|
126
|
+
.toBeTruthy();
|
|
127
|
+
test
|
|
128
|
+
.expect(
|
|
129
|
+
annotations.find(
|
|
130
|
+
(item) =>
|
|
131
|
+
item.innerText.includes("library") &&
|
|
132
|
+
item.href === "https://quizizz.com/admin/my-library/createdByMe",
|
|
133
|
+
),
|
|
134
|
+
)
|
|
135
|
+
.toBeTruthy();
|
|
136
|
+
test
|
|
137
|
+
.expect(
|
|
138
|
+
annotations.find(
|
|
139
|
+
(item) =>
|
|
140
|
+
item.innerText.includes("reports") &&
|
|
141
|
+
item.href === "https://quizizz.com/admin/reports",
|
|
142
|
+
),
|
|
143
|
+
)
|
|
144
|
+
.toBeTruthy();
|
|
145
|
+
test
|
|
146
|
+
.expect(
|
|
147
|
+
annotations.find(
|
|
148
|
+
(item) =>
|
|
149
|
+
item.innerText.includes("classes") &&
|
|
150
|
+
item.href === "https://quizizz.com/admin/classes",
|
|
151
|
+
),
|
|
152
|
+
)
|
|
153
|
+
.toBeTruthy();
|
|
154
|
+
test
|
|
155
|
+
.expect(
|
|
156
|
+
annotations.find(
|
|
157
|
+
(item) =>
|
|
158
|
+
item.innerText.includes("accommodations") &&
|
|
159
|
+
item.href ===
|
|
160
|
+
"https://quizizz.com/admin/differentiation/accommodations",
|
|
161
|
+
),
|
|
162
|
+
)
|
|
163
|
+
.toBeTruthy();
|
|
164
|
+
test
|
|
165
|
+
.expect(
|
|
166
|
+
annotations.find(
|
|
167
|
+
(item) =>
|
|
168
|
+
item.innerText.includes("quizizz ai") &&
|
|
169
|
+
item.href === "https://quizizz.com/admin/quizizz-ai",
|
|
170
|
+
),
|
|
171
|
+
)
|
|
172
|
+
.toBeTruthy();
|
|
173
|
+
test
|
|
174
|
+
.expect(
|
|
175
|
+
annotations.find(
|
|
176
|
+
(item) =>
|
|
177
|
+
item.innerText.includes("start your free trial") &&
|
|
178
|
+
item.href === "https://quizizz.com/super-pricing",
|
|
179
|
+
),
|
|
180
|
+
)
|
|
181
|
+
.toBeTruthy();
|
|
182
|
+
test
|
|
183
|
+
.expect(
|
|
184
|
+
annotations.find(
|
|
185
|
+
(item) =>
|
|
186
|
+
item.innerText.includes("upgrade") &&
|
|
187
|
+
item.href === "https://quizizz.com/super-pricing?backto=/admin",
|
|
188
|
+
),
|
|
189
|
+
)
|
|
190
|
+
.toBeTruthy();
|
|
191
|
+
|
|
192
|
+
test
|
|
193
|
+
.expect(
|
|
194
|
+
annotations.find(
|
|
195
|
+
(item) =>
|
|
196
|
+
item.tagName === "INPUT" &&
|
|
197
|
+
item.testId === "emphasized-search-bar-input",
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
.toBeTruthy();
|
|
201
|
+
|
|
202
|
+
test
|
|
203
|
+
.expect(
|
|
204
|
+
annotations.find(
|
|
205
|
+
(item) =>
|
|
206
|
+
item.tagName === "BUTTON" &&
|
|
207
|
+
item.innerText.includes("verify details") &&
|
|
208
|
+
item.testId === "verify-profile-cta",
|
|
209
|
+
),
|
|
210
|
+
)
|
|
211
|
+
.toBeTruthy();
|
|
212
|
+
|
|
213
|
+
test
|
|
214
|
+
.expect(
|
|
215
|
+
annotations.find(
|
|
216
|
+
(item) =>
|
|
217
|
+
item.tagName === "BUTTON" && item.innerText.includes("for you"),
|
|
218
|
+
),
|
|
219
|
+
)
|
|
220
|
+
.toBeTruthy();
|
|
221
|
+
|
|
222
|
+
test
|
|
223
|
+
.expect(
|
|
224
|
+
annotations.find(
|
|
225
|
+
(item) =>
|
|
226
|
+
item.tagName === "BUTTON" && item.innerText.includes("assessments"),
|
|
227
|
+
),
|
|
228
|
+
)
|
|
229
|
+
.toBeTruthy();
|
|
230
|
+
|
|
231
|
+
test
|
|
232
|
+
.expect(
|
|
233
|
+
annotations.find(
|
|
234
|
+
(item) =>
|
|
235
|
+
item.tagName === "BUTTON" && item.innerText.includes("lessons"),
|
|
236
|
+
),
|
|
237
|
+
)
|
|
238
|
+
.toBeTruthy();
|
|
239
|
+
|
|
240
|
+
test
|
|
241
|
+
.expect(
|
|
242
|
+
annotations.find(
|
|
243
|
+
(item) =>
|
|
244
|
+
item.tagName === "BUTTON" &&
|
|
245
|
+
item.innerText.includes("interactive videos"),
|
|
246
|
+
),
|
|
247
|
+
)
|
|
248
|
+
.toBeTruthy();
|
|
249
|
+
|
|
250
|
+
test
|
|
251
|
+
.expect(
|
|
252
|
+
annotations.find(
|
|
253
|
+
(item) =>
|
|
254
|
+
item.tagName === "BUTTON" && item.innerText.includes("passages"),
|
|
255
|
+
),
|
|
256
|
+
)
|
|
257
|
+
.toBeTruthy();
|
|
258
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"add-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/add-scenario-agent.evals.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,QAAA,MAAM,4BAA4B,EAAE,UAkBnC,CAAC;AAEF,eAAe,4BAA4B,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const run_1 = require("../agent/codegen/run");
|
|
4
|
+
const addScenarioCodeAgentEvaluate = async ({ item, trace }) => {
|
|
5
|
+
const { testCase, testFiles, pageFiles, testFilePath } = item.input;
|
|
6
|
+
const response = await (0, run_1.getAddScenarioCompletion)({
|
|
7
|
+
testCase,
|
|
8
|
+
testFiles,
|
|
9
|
+
pageFiles,
|
|
10
|
+
testFilePath,
|
|
11
|
+
trace,
|
|
12
|
+
});
|
|
13
|
+
return {
|
|
14
|
+
scores: [
|
|
15
|
+
{
|
|
16
|
+
name: "equality",
|
|
17
|
+
value: item.expectedOutput === response ? 1 : 0,
|
|
18
|
+
},
|
|
19
|
+
],
|
|
20
|
+
output: response,
|
|
21
|
+
};
|
|
22
|
+
};
|
|
23
|
+
exports.default = addScenarioCodeAgentEvaluate;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"update-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/update-scenario-agent.evals.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAUpC,QAAA,MAAM,+BAA+B,EAAE,UAiDtC,CAAC;AAEF,eAAe,+BAA+B,CAAC"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const js_levenshtein_1 = __importDefault(require("js-levenshtein"));
|
|
7
|
+
const update_flow_1 = require("../agent/codegen/update-flow");
|
|
8
|
+
const utils_1 = require("../agent/codegen/utils");
|
|
9
|
+
const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
|
|
10
|
+
const { testCase, testFiles, pageFiles, testFilePath, testFileContent } = item.input;
|
|
11
|
+
const response = await (0, update_flow_1.getUpdateTestCodeCompletion)({
|
|
12
|
+
testCase,
|
|
13
|
+
testFiles,
|
|
14
|
+
pageFiles,
|
|
15
|
+
testFilePath,
|
|
16
|
+
testFileContent,
|
|
17
|
+
trace,
|
|
18
|
+
});
|
|
19
|
+
const fileChanges = (0, utils_1.extractTestUpdates)(response);
|
|
20
|
+
const expectedFileChanges = (0, utils_1.extractTestUpdates)(item.expectedOutput);
|
|
21
|
+
const fileChangeCount = fileChanges.length;
|
|
22
|
+
const expectedFileChangeCount = expectedFileChanges.length;
|
|
23
|
+
const correctFilePathChanges = expectedFileChanges.every((ef) => fileChanges.some((f) => f.filePath === ef.filePath));
|
|
24
|
+
const distanceScores = [];
|
|
25
|
+
expectedFileChanges.forEach((ef) => fileChanges.forEach((f) => {
|
|
26
|
+
if (f.filePath === ef.filePath && f.newCode && ef.newCode) {
|
|
27
|
+
const maxLength = ef.newCode.length > f.newCode.length
|
|
28
|
+
? ef.newCode.length
|
|
29
|
+
: f.newCode.length;
|
|
30
|
+
distanceScores.push(1 - (0, js_levenshtein_1.default)(f.newCode || "", ef.newCode || "") / maxLength);
|
|
31
|
+
}
|
|
32
|
+
}));
|
|
33
|
+
let score = 0;
|
|
34
|
+
if (fileChangeCount === expectedFileChangeCount && correctFilePathChanges) {
|
|
35
|
+
score = distanceScores.length
|
|
36
|
+
? distanceScores.reduce((agg, s) => agg * s)
|
|
37
|
+
: 0;
|
|
38
|
+
}
|
|
39
|
+
return {
|
|
40
|
+
scores: [
|
|
41
|
+
{
|
|
42
|
+
name: "score",
|
|
43
|
+
value: score,
|
|
44
|
+
},
|
|
45
|
+
],
|
|
46
|
+
output: response,
|
|
47
|
+
};
|
|
48
|
+
};
|
|
49
|
+
exports.default = updateScenarioCodeAgentEvaluate;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.35.
|
|
3
|
+
"version": "0.35.6",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -52,8 +52,10 @@
|
|
|
52
52
|
"@types/detect-port": "^1.3.5",
|
|
53
53
|
"@types/express": "^4.17.21",
|
|
54
54
|
"@types/fs-extra": "^11.0.4",
|
|
55
|
+
"@types/js-levenshtein": "^1.1.3",
|
|
55
56
|
"@types/lodash.isequal": "^4.5.8",
|
|
56
|
-
"@types/md5": "^2.3.5"
|
|
57
|
+
"@types/md5": "^2.3.5",
|
|
58
|
+
"js-levenshtein": "^1.1.6"
|
|
57
59
|
},
|
|
58
60
|
"scripts": {
|
|
59
61
|
"dev": "tsc --build --watch",
|
|
@@ -61,6 +63,7 @@
|
|
|
61
63
|
"clean": "tsc --build --clean",
|
|
62
64
|
"lint": "eslint .",
|
|
63
65
|
"test": "vitest run",
|
|
66
|
+
"e2e-test": "npx playwright test",
|
|
64
67
|
"test:watch": "vitest",
|
|
65
68
|
"test:watch-files": "vitest $0 --watch"
|
|
66
69
|
}
|