@empiricalrun/test-gen 0.35.4 → 0.35.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.35.6
4
+
5
+ ### Patch Changes
6
+
7
+ - 948f534: fix: update verification agent prompt
8
+
9
+ ## 0.35.5
10
+
11
+ ### Patch Changes
12
+
13
+ - 8e18e5b: feat: add scenario code agent evals
14
+ - d6f9de2: fix: add tests for annotation script
15
+
3
16
  ## 0.35.4
4
17
 
5
18
  ### Patch Changes
@@ -0,0 +1,258 @@
1
+ // @ts-nocheck
2
+
3
+ import { test } from "@playwright/test";
4
+ import path from "path";
5
+
6
+ test("should annotate all links on empirical landing page", async ({
7
+ page,
8
+ }) => {
9
+ await page.goto(
10
+ "https://assets-test.empirical.run/selector-hints-testing/dom-1.html",
11
+ );
12
+
13
+ await page.addScriptTag({
14
+ path: path.resolve(__dirname, "./annotate-elements.js"),
15
+ });
16
+
17
+ const annotations = await page.evaluate(() => {
18
+ const { annotations } = window.annotateClickableElements();
19
+
20
+ return Object.entries(annotations).map(([hint, config]) => ({
21
+ hint,
22
+ innerText: config.node.innerText,
23
+ tagName: config.node.tagName,
24
+ href: config.node.href,
25
+ }));
26
+ });
27
+
28
+ test.expect(annotations).toEqual([
29
+ {
30
+ hint: "A",
31
+ innerText: "Empirical",
32
+ tagName: "A",
33
+ href: "https://assets-test.empirical.run/",
34
+ },
35
+ {
36
+ hint: "B",
37
+ innerText: "Blog",
38
+ tagName: "A",
39
+ href: "https://assets-test.empirical.run/blog",
40
+ },
41
+ {
42
+ hint: "C",
43
+ innerText: "Contact us",
44
+ tagName: "A",
45
+ href: "https://assets-test.empirical.run/contact",
46
+ },
47
+ {
48
+ hint: "D",
49
+ href: "https://dash.empirical.run/",
50
+ innerText: "Login ↗\n(opens in a new tab)",
51
+ tagName: "A",
52
+ },
53
+ {
54
+ hint: "E",
55
+ innerText: "Get early access",
56
+ tagName: "A",
57
+ href: "https://assets-test.empirical.run/contact",
58
+ },
59
+ {
60
+ hint: "F",
61
+ innerText: "Playwright\n(opens in a new tab)",
62
+ tagName: "A",
63
+ href: "https://github.com/microsoft/playwright",
64
+ },
65
+ {
66
+ hint: "G",
67
+ innerText: "Meet with us",
68
+ tagName: "A",
69
+ href: "https://assets-test.empirical.run/contact",
70
+ },
71
+ {
72
+ hint: "H",
73
+ innerText: "Privacy Policy",
74
+ tagName: "A",
75
+ href: "https://assets-test.empirical.run/privacy.html",
76
+ },
77
+ ]);
78
+ });
79
+
80
+ test("should annotate all important items on quizizz page", async ({
81
+ page,
82
+ }) => {
83
+ await page.goto(
84
+ "https://assets-test.empirical.run/selector-hints-testing/dom-2/index.html",
85
+ );
86
+
87
+ await page.addScriptTag({
88
+ path: path.resolve(__dirname, "./annotate-elements.js"),
89
+ });
90
+
91
+ const annotations = await page.evaluate(() => {
92
+ const { annotations } = window.annotateClickableElements();
93
+
94
+ return Object.entries(annotations).map(([hint, config]) => ({
95
+ hint,
96
+ innerText: config.node.innerText.toLowerCase().trim(),
97
+ tagName: config.node.tagName,
98
+ testId: config.node.getAttribute("data-testid"),
99
+ href: config.node.href,
100
+ }));
101
+ });
102
+
103
+ test
104
+ .expect(annotations.find((item) => item.innerText.includes("enter code")))
105
+ .toBeTruthy();
106
+ test
107
+ .expect(annotations.find((item) => item.innerText.includes("get help")))
108
+ .toBeTruthy();
109
+ test
110
+ .expect(
111
+ annotations.find(
112
+ (item) =>
113
+ item.innerText.includes("create") &&
114
+ item.testId === "create-content-button",
115
+ ),
116
+ )
117
+ .toBeTruthy();
118
+ test
119
+ .expect(
120
+ annotations.find(
121
+ (item) =>
122
+ item.innerText.includes("explore") &&
123
+ item.href === "https://quizizz.com/admin",
124
+ ),
125
+ )
126
+ .toBeTruthy();
127
+ test
128
+ .expect(
129
+ annotations.find(
130
+ (item) =>
131
+ item.innerText.includes("library") &&
132
+ item.href === "https://quizizz.com/admin/my-library/createdByMe",
133
+ ),
134
+ )
135
+ .toBeTruthy();
136
+ test
137
+ .expect(
138
+ annotations.find(
139
+ (item) =>
140
+ item.innerText.includes("reports") &&
141
+ item.href === "https://quizizz.com/admin/reports",
142
+ ),
143
+ )
144
+ .toBeTruthy();
145
+ test
146
+ .expect(
147
+ annotations.find(
148
+ (item) =>
149
+ item.innerText.includes("classes") &&
150
+ item.href === "https://quizizz.com/admin/classes",
151
+ ),
152
+ )
153
+ .toBeTruthy();
154
+ test
155
+ .expect(
156
+ annotations.find(
157
+ (item) =>
158
+ item.innerText.includes("accommodations") &&
159
+ item.href ===
160
+ "https://quizizz.com/admin/differentiation/accommodations",
161
+ ),
162
+ )
163
+ .toBeTruthy();
164
+ test
165
+ .expect(
166
+ annotations.find(
167
+ (item) =>
168
+ item.innerText.includes("quizizz ai") &&
169
+ item.href === "https://quizizz.com/admin/quizizz-ai",
170
+ ),
171
+ )
172
+ .toBeTruthy();
173
+ test
174
+ .expect(
175
+ annotations.find(
176
+ (item) =>
177
+ item.innerText.includes("start your free trial") &&
178
+ item.href === "https://quizizz.com/super-pricing",
179
+ ),
180
+ )
181
+ .toBeTruthy();
182
+ test
183
+ .expect(
184
+ annotations.find(
185
+ (item) =>
186
+ item.innerText.includes("upgrade") &&
187
+ item.href === "https://quizizz.com/super-pricing?backto=/admin",
188
+ ),
189
+ )
190
+ .toBeTruthy();
191
+
192
+ test
193
+ .expect(
194
+ annotations.find(
195
+ (item) =>
196
+ item.tagName === "INPUT" &&
197
+ item.testId === "emphasized-search-bar-input",
198
+ ),
199
+ )
200
+ .toBeTruthy();
201
+
202
+ test
203
+ .expect(
204
+ annotations.find(
205
+ (item) =>
206
+ item.tagName === "BUTTON" &&
207
+ item.innerText.includes("verify details") &&
208
+ item.testId === "verify-profile-cta",
209
+ ),
210
+ )
211
+ .toBeTruthy();
212
+
213
+ test
214
+ .expect(
215
+ annotations.find(
216
+ (item) =>
217
+ item.tagName === "BUTTON" && item.innerText.includes("for you"),
218
+ ),
219
+ )
220
+ .toBeTruthy();
221
+
222
+ test
223
+ .expect(
224
+ annotations.find(
225
+ (item) =>
226
+ item.tagName === "BUTTON" && item.innerText.includes("assessments"),
227
+ ),
228
+ )
229
+ .toBeTruthy();
230
+
231
+ test
232
+ .expect(
233
+ annotations.find(
234
+ (item) =>
235
+ item.tagName === "BUTTON" && item.innerText.includes("lessons"),
236
+ ),
237
+ )
238
+ .toBeTruthy();
239
+
240
+ test
241
+ .expect(
242
+ annotations.find(
243
+ (item) =>
244
+ item.tagName === "BUTTON" &&
245
+ item.innerText.includes("interactive videos"),
246
+ ),
247
+ )
248
+ .toBeTruthy();
249
+
250
+ test
251
+ .expect(
252
+ annotations.find(
253
+ (item) =>
254
+ item.tagName === "BUTTON" && item.innerText.includes("passages"),
255
+ ),
256
+ )
257
+ .toBeTruthy();
258
+ });
@@ -101,7 +101,7 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
101
101
  await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments), toolCallsSpan);
102
102
  executedActions.push({
103
103
  isError: false,
104
- action: JSON.stringify(toolCall),
104
+ action: JSON.parse(toolCall.function.arguments)?.reason,
105
105
  });
106
106
  lastActionExecTrace = "";
107
107
  }
@@ -109,7 +109,7 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
109
109
  // TODO: implement feedback loop to llm
110
110
  executedActions.push({
111
111
  isError: true,
112
- action: JSON.stringify(toolCall.function.arguments)?.reason,
112
+ action: JSON.parse(toolCall.function.arguments)?.reason,
113
113
  });
114
114
  lastActionExecTrace = e.message;
115
115
  void testgenUpdatesReporter.sendMessage(e.message);
@@ -1,4 +1,12 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
2
  import { TestCase, TestGenConfigOptions } from "../../types";
3
+ export declare function getAddScenarioCompletion({ testCase, testFiles, pageFiles, testFilePath, trace, options, }: {
4
+ testCase: TestCase;
5
+ testFiles: string;
6
+ pageFiles: string;
7
+ testFilePath: string;
8
+ trace?: TraceClient;
9
+ options?: TestGenConfigOptions;
10
+ }): Promise<string>;
3
11
  export declare function generateTest(testCase: TestCase, file: string, options: TestGenConfigOptions, trace?: TraceClient): Promise<TestCase[]>;
4
12
  //# sourceMappingURL=run.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkC,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAkBhF,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,QAAQ,EAAE,CAAC,CA0GrB"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkC,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAkBhF,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,wBAAsB,wBAAwB,CAAC,EAC7C,QAAQ,EACR,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,mBA4BA;AAED,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAgFrB"}
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.generateTest = void 0;
6
+ exports.generateTest = exports.getAddScenarioCompletion = void 0;
7
7
  const llm_1 = require("@empiricalrun/llm");
8
8
  const fs_extra_1 = __importDefault(require("fs-extra"));
9
9
  const logger_1 = require("../../bin/logger");
@@ -12,6 +12,35 @@ const web_1 = require("../../bin/utils/platform/web");
12
12
  const constants_1 = require("../../constants");
13
13
  const fix_ts_errors_1 = require("./fix-ts-errors");
14
14
  const update_flow_1 = require("./update-flow");
15
+ async function getAddScenarioCompletion({ testCase, testFiles, pageFiles, testFilePath, trace, options, }) {
16
+ const promptSpan = trace?.span({
17
+ name: "add-scenario-prompt",
18
+ });
19
+ const instruction = await (0, llm_1.getPrompt)("add-scenario", {
20
+ testFiles: testFiles,
21
+ pageFiles: pageFiles,
22
+ scenarioName: testCase.name,
23
+ scenarioSteps: testCase.steps.join("\n"),
24
+ scenarioFile: testFilePath,
25
+ });
26
+ promptSpan?.end({ output: { instruction } });
27
+ const llm = new llm_1.LLM({
28
+ trace,
29
+ provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
30
+ defaultModel: options?.model || constants_1.DEFAULT_MODEL,
31
+ providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
32
+ });
33
+ const firstShotMessage = await llm.createChatCompletion({
34
+ messages: instruction,
35
+ modelParameters: {
36
+ ...constants_1.DEFAULT_MODEL_PARAMETERS,
37
+ ...options?.modelParameters,
38
+ },
39
+ });
40
+ let response = firstShotMessage?.content || "";
41
+ return response;
42
+ }
43
+ exports.getAddScenarioCompletion = getAddScenarioCompletion;
15
44
  async function generateTest(testCase, file, options, trace) {
16
45
  const logger = new logger_1.CustomLogger();
17
46
  if (!fs_extra_1.default.existsSync(file)) {
@@ -35,43 +64,19 @@ async function generateTest(testCase, file, options, trace) {
35
64
  name: "create-test",
36
65
  input: {
37
66
  testCase,
38
- file,
39
- options,
67
+ testFiles: codePrompt,
68
+ pageFiles: pomPrompt,
69
+ testFilePath: file,
40
70
  },
41
71
  });
42
- createTestSpan?.event({
43
- name: "collate-files-as-text",
44
- output: {
45
- codePrompt,
46
- pomPrompt,
47
- testFileContent,
48
- },
49
- });
50
- const promptSpan = createTestSpan?.span({
51
- name: "add-scenario-prompt",
52
- });
53
- const instruction = await (0, llm_1.getPrompt)("add-scenario", {
72
+ const response = await getAddScenarioCompletion({
73
+ testCase,
54
74
  testFiles: codePrompt,
55
75
  pageFiles: pomPrompt,
56
- scenarioName: testCase.name,
57
- scenarioSteps: testCase.steps.join("\n"),
58
- scenarioFile: file,
59
- });
60
- promptSpan?.end({ output: { instruction } });
61
- const llm = new llm_1.LLM({
62
- trace,
63
- provider: options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
64
- defaultModel: options.model || constants_1.DEFAULT_MODEL,
65
- providerApiKey: constants_1.MODEL_API_KEYS[options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
66
- });
67
- const firstShotMessage = await llm.createChatCompletion({
68
- messages: instruction,
69
- modelParameters: {
70
- ...constants_1.DEFAULT_MODEL_PARAMETERS,
71
- ...options.modelParameters,
72
- },
76
+ testFilePath: file,
77
+ trace: createTestSpan,
78
+ options,
73
79
  });
74
- let response = firstShotMessage?.content || "";
75
80
  logger.success("Test generated successfully!");
76
81
  const readWriteFileSpan = trace?.span({ name: "write-to-file" });
77
82
  let contents = fs_extra_1.default.readFileSync(file, "utf-8");
@@ -3,6 +3,15 @@ import { TestCase, TestGenConfigOptions } from "../../types";
3
3
  type UpdatedTestCase = TestCase & {
4
4
  updatedFiles: string[];
5
5
  };
6
+ export declare function getUpdateTestCodeCompletion({ testCase, testFileContent, testFiles, pageFiles, testFilePath, trace, options, }: {
7
+ testCase: TestCase;
8
+ testFiles: string;
9
+ pageFiles: string;
10
+ testFilePath: string;
11
+ testFileContent: string;
12
+ trace?: TraceClient;
13
+ options?: TestGenConfigOptions;
14
+ }): Promise<string>;
6
15
  export declare function updateTest(testCase: TestCase, file: string, options: TestGenConfigOptions | undefined, logging?: boolean, validate?: boolean, trace?: TraceClient): Promise<UpdatedTestCase[]>;
7
16
  export declare function appendCreateTestBlock({ testCase, file, options, trace, validateTypes, }: {
8
17
  testCase: TestCase;
@@ -1 +1 @@
1
- {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAsB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CAsG5B;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA+E7B"}
1
+ {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAsB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CAAC,MAAM,CAAC,CA6ClB;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CA6D5B;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA+E7B"}
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.appendCreateTestBlock = exports.updateTest = void 0;
6
+ exports.appendCreateTestBlock = exports.updateTest = exports.getUpdateTestCodeCompletion = void 0;
7
7
  const llm_1 = require("@empiricalrun/llm");
8
8
  const crypto_1 = __importDefault(require("crypto"));
9
9
  const fs_extra_1 = __importDefault(require("fs-extra"));
@@ -103,41 +103,8 @@ async function applyFileChanges({ validateTypes = true, trace, testCase, fileCha
103
103
  logger.success(`${fileChange.filePath} file formatted successfully!`);
104
104
  }));
105
105
  }
106
- async function updateTest(testCase, file, options, logging = true, validate = true, trace) {
107
- const logger = new logger_1.CustomLogger({ useReporter: logging });
108
- const context = await (0, context_1.contextForGeneration)(file);
109
- const { codePrompt, pomPrompt, testFileContent } = context;
110
- const generatedTestCases = [];
111
- logger.logEmptyLine();
112
- const session = (0, session_1.getSessionDetails)();
113
- trace =
114
- trace ||
115
- llm_1.langfuseInstance?.trace({
116
- name: "update-test",
117
- id: crypto_1.default.randomUUID(),
118
- release: session.version,
119
- tags: [
120
- options?.metadata.projectName || "",
121
- options?.metadata.environment || "",
122
- ].filter((s) => !!s),
123
- });
124
- const updateTestSpan = trace?.span({
125
- name: "update-test",
126
- input: {
127
- testCase,
128
- file,
129
- options,
130
- },
131
- });
132
- updateTestSpan?.event({
133
- name: "collate-files-as-text",
134
- output: {
135
- codePrompt,
136
- pomPrompt,
137
- testFileContent,
138
- },
139
- });
140
- const promptSpan = updateTestSpan?.span({
106
+ async function getUpdateTestCodeCompletion({ testCase, testFileContent, testFiles, pageFiles, testFilePath, trace, options, }) {
107
+ const promptSpan = trace?.span({
141
108
  name: "update-scenario-prompt",
142
109
  });
143
110
  const promptName = "update-scenario";
@@ -154,16 +121,16 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
154
121
  suites: testCase?.suites || [],
155
122
  });
156
123
  const instruction = await (0, llm_1.getPrompt)(promptName, {
157
- testFiles: codePrompt,
158
- pageFiles: pomPrompt,
124
+ testFiles: testFiles,
125
+ pageFiles: pageFiles,
159
126
  scenarioName,
160
127
  scenarioSteps: testCase.steps.join("\n"),
161
- scenarioFile: file,
128
+ scenarioFile: testFilePath,
162
129
  currentScenarioCodeBlock,
163
130
  });
164
131
  promptSpan?.end({ output: { instruction } });
165
132
  const llm = new llm_1.LLM({
166
- trace: updateTestSpan,
133
+ trace,
167
134
  provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
168
135
  defaultModel: options?.model || constants_1.DEFAULT_MODEL,
169
136
  providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
@@ -176,6 +143,43 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
176
143
  },
177
144
  });
178
145
  let response = firstShotMessage?.content || "";
146
+ return response;
147
+ }
148
+ exports.getUpdateTestCodeCompletion = getUpdateTestCodeCompletion;
149
+ async function updateTest(testCase, file, options, logging = true, validate = true, trace) {
150
+ const logger = new logger_1.CustomLogger({ useReporter: logging });
151
+ const context = await (0, context_1.contextForGeneration)(file);
152
+ const { codePrompt, pomPrompt, testFileContent } = context;
153
+ const generatedTestCases = [];
154
+ logger.logEmptyLine();
155
+ const session = (0, session_1.getSessionDetails)();
156
+ trace =
157
+ trace ||
158
+ llm_1.langfuseInstance?.trace({
159
+ name: "update-test",
160
+ id: crypto_1.default.randomUUID(),
161
+ release: session.version,
162
+ tags: [
163
+ options?.metadata.projectName || "",
164
+ options?.metadata.environment || "",
165
+ ].filter((s) => !!s),
166
+ });
167
+ const request = {
168
+ testCase,
169
+ testFileContent,
170
+ testFiles: codePrompt,
171
+ pageFiles: pomPrompt,
172
+ testFilePath: file,
173
+ options,
174
+ };
175
+ const updateTestSpan = trace?.span({
176
+ name: "update-test",
177
+ input: request,
178
+ });
179
+ const response = await getUpdateTestCodeCompletion({
180
+ ...request,
181
+ trace: updateTestSpan,
182
+ });
179
183
  logger.success("Test generated successfully!");
180
184
  const fileChanges = (0, utils_1.extractTestUpdates)(response);
181
185
  await applyFileChanges({
@@ -40,7 +40,7 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
40
40
  failedActions: failedActions.map((a) => a).join("\n"),
41
41
  executedActions: executedActions.map((a) => a).join("\n"),
42
42
  pageUrl,
43
- }, useHints ? 16 : 14);
43
+ }, 18);
44
44
  // assuming there is only one user message in the prompt. if there is a change in langfuse prompt format, this will need to be updated
45
45
  const userMessage = promptMessages.filter((m) => m.role === "user")[0];
46
46
  const systemMessage = promptMessages.filter((m) => m.role === "system")[0];
@@ -16,7 +16,7 @@ async function verificationAgent({ trace, task, conversation, }) {
16
16
  const messages = await (0, llm_1.getPrompt)("agent-steps-verification", {
17
17
  task,
18
18
  conversation: conversation.join("\n"),
19
- }, 4);
19
+ }, 5);
20
20
  const llm = new llm_1.LLM({ provider: "openai" });
21
21
  const response = await llm.createChatCompletion({
22
22
  trace: verificationAgentSpan,
@@ -0,0 +1,258 @@
1
+ // @ts-nocheck
2
+
3
+ import { test } from "@playwright/test";
4
+ import path from "path";
5
+
6
+ test("should annotate all links on empirical landing page", async ({
7
+ page,
8
+ }) => {
9
+ await page.goto(
10
+ "https://assets-test.empirical.run/selector-hints-testing/dom-1.html",
11
+ );
12
+
13
+ await page.addScriptTag({
14
+ path: path.resolve(__dirname, "./annotate-elements.js"),
15
+ });
16
+
17
+ const annotations = await page.evaluate(() => {
18
+ const { annotations } = window.annotateClickableElements();
19
+
20
+ return Object.entries(annotations).map(([hint, config]) => ({
21
+ hint,
22
+ innerText: config.node.innerText,
23
+ tagName: config.node.tagName,
24
+ href: config.node.href,
25
+ }));
26
+ });
27
+
28
+ test.expect(annotations).toEqual([
29
+ {
30
+ hint: "A",
31
+ innerText: "Empirical",
32
+ tagName: "A",
33
+ href: "https://assets-test.empirical.run/",
34
+ },
35
+ {
36
+ hint: "B",
37
+ innerText: "Blog",
38
+ tagName: "A",
39
+ href: "https://assets-test.empirical.run/blog",
40
+ },
41
+ {
42
+ hint: "C",
43
+ innerText: "Contact us",
44
+ tagName: "A",
45
+ href: "https://assets-test.empirical.run/contact",
46
+ },
47
+ {
48
+ hint: "D",
49
+ href: "https://dash.empirical.run/",
50
+ innerText: "Login ↗\n(opens in a new tab)",
51
+ tagName: "A",
52
+ },
53
+ {
54
+ hint: "E",
55
+ innerText: "Get early access",
56
+ tagName: "A",
57
+ href: "https://assets-test.empirical.run/contact",
58
+ },
59
+ {
60
+ hint: "F",
61
+ innerText: "Playwright\n(opens in a new tab)",
62
+ tagName: "A",
63
+ href: "https://github.com/microsoft/playwright",
64
+ },
65
+ {
66
+ hint: "G",
67
+ innerText: "Meet with us",
68
+ tagName: "A",
69
+ href: "https://assets-test.empirical.run/contact",
70
+ },
71
+ {
72
+ hint: "H",
73
+ innerText: "Privacy Policy",
74
+ tagName: "A",
75
+ href: "https://assets-test.empirical.run/privacy.html",
76
+ },
77
+ ]);
78
+ });
79
+
80
+ test("should annotate all important items on quizizz page", async ({
81
+ page,
82
+ }) => {
83
+ await page.goto(
84
+ "https://assets-test.empirical.run/selector-hints-testing/dom-2/index.html",
85
+ );
86
+
87
+ await page.addScriptTag({
88
+ path: path.resolve(__dirname, "./annotate-elements.js"),
89
+ });
90
+
91
+ const annotations = await page.evaluate(() => {
92
+ const { annotations } = window.annotateClickableElements();
93
+
94
+ return Object.entries(annotations).map(([hint, config]) => ({
95
+ hint,
96
+ innerText: config.node.innerText.toLowerCase().trim(),
97
+ tagName: config.node.tagName,
98
+ testId: config.node.getAttribute("data-testid"),
99
+ href: config.node.href,
100
+ }));
101
+ });
102
+
103
+ test
104
+ .expect(annotations.find((item) => item.innerText.includes("enter code")))
105
+ .toBeTruthy();
106
+ test
107
+ .expect(annotations.find((item) => item.innerText.includes("get help")))
108
+ .toBeTruthy();
109
+ test
110
+ .expect(
111
+ annotations.find(
112
+ (item) =>
113
+ item.innerText.includes("create") &&
114
+ item.testId === "create-content-button",
115
+ ),
116
+ )
117
+ .toBeTruthy();
118
+ test
119
+ .expect(
120
+ annotations.find(
121
+ (item) =>
122
+ item.innerText.includes("explore") &&
123
+ item.href === "https://quizizz.com/admin",
124
+ ),
125
+ )
126
+ .toBeTruthy();
127
+ test
128
+ .expect(
129
+ annotations.find(
130
+ (item) =>
131
+ item.innerText.includes("library") &&
132
+ item.href === "https://quizizz.com/admin/my-library/createdByMe",
133
+ ),
134
+ )
135
+ .toBeTruthy();
136
+ test
137
+ .expect(
138
+ annotations.find(
139
+ (item) =>
140
+ item.innerText.includes("reports") &&
141
+ item.href === "https://quizizz.com/admin/reports",
142
+ ),
143
+ )
144
+ .toBeTruthy();
145
+ test
146
+ .expect(
147
+ annotations.find(
148
+ (item) =>
149
+ item.innerText.includes("classes") &&
150
+ item.href === "https://quizizz.com/admin/classes",
151
+ ),
152
+ )
153
+ .toBeTruthy();
154
+ test
155
+ .expect(
156
+ annotations.find(
157
+ (item) =>
158
+ item.innerText.includes("accommodations") &&
159
+ item.href ===
160
+ "https://quizizz.com/admin/differentiation/accommodations",
161
+ ),
162
+ )
163
+ .toBeTruthy();
164
+ test
165
+ .expect(
166
+ annotations.find(
167
+ (item) =>
168
+ item.innerText.includes("quizizz ai") &&
169
+ item.href === "https://quizizz.com/admin/quizizz-ai",
170
+ ),
171
+ )
172
+ .toBeTruthy();
173
+ test
174
+ .expect(
175
+ annotations.find(
176
+ (item) =>
177
+ item.innerText.includes("start your free trial") &&
178
+ item.href === "https://quizizz.com/super-pricing",
179
+ ),
180
+ )
181
+ .toBeTruthy();
182
+ test
183
+ .expect(
184
+ annotations.find(
185
+ (item) =>
186
+ item.innerText.includes("upgrade") &&
187
+ item.href === "https://quizizz.com/super-pricing?backto=/admin",
188
+ ),
189
+ )
190
+ .toBeTruthy();
191
+
192
+ test
193
+ .expect(
194
+ annotations.find(
195
+ (item) =>
196
+ item.tagName === "INPUT" &&
197
+ item.testId === "emphasized-search-bar-input",
198
+ ),
199
+ )
200
+ .toBeTruthy();
201
+
202
+ test
203
+ .expect(
204
+ annotations.find(
205
+ (item) =>
206
+ item.tagName === "BUTTON" &&
207
+ item.innerText.includes("verify details") &&
208
+ item.testId === "verify-profile-cta",
209
+ ),
210
+ )
211
+ .toBeTruthy();
212
+
213
+ test
214
+ .expect(
215
+ annotations.find(
216
+ (item) =>
217
+ item.tagName === "BUTTON" && item.innerText.includes("for you"),
218
+ ),
219
+ )
220
+ .toBeTruthy();
221
+
222
+ test
223
+ .expect(
224
+ annotations.find(
225
+ (item) =>
226
+ item.tagName === "BUTTON" && item.innerText.includes("assessments"),
227
+ ),
228
+ )
229
+ .toBeTruthy();
230
+
231
+ test
232
+ .expect(
233
+ annotations.find(
234
+ (item) =>
235
+ item.tagName === "BUTTON" && item.innerText.includes("lessons"),
236
+ ),
237
+ )
238
+ .toBeTruthy();
239
+
240
+ test
241
+ .expect(
242
+ annotations.find(
243
+ (item) =>
244
+ item.tagName === "BUTTON" &&
245
+ item.innerText.includes("interactive videos"),
246
+ ),
247
+ )
248
+ .toBeTruthy();
249
+
250
+ test
251
+ .expect(
252
+ annotations.find(
253
+ (item) =>
254
+ item.tagName === "BUTTON" && item.innerText.includes("passages"),
255
+ ),
256
+ )
257
+ .toBeTruthy();
258
+ });
@@ -0,0 +1,4 @@
1
+ import { EvaluateFn } from "./type";
2
+ declare const addScenarioCodeAgentEvaluate: EvaluateFn;
3
+ export default addScenarioCodeAgentEvaluate;
4
+ //# sourceMappingURL=add-scenario-agent.evals.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"add-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/add-scenario-agent.evals.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,QAAA,MAAM,4BAA4B,EAAE,UAkBnC,CAAC;AAEF,eAAe,4BAA4B,CAAC"}
@@ -0,0 +1,23 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const run_1 = require("../agent/codegen/run");
4
+ const addScenarioCodeAgentEvaluate = async ({ item, trace }) => {
5
+ const { testCase, testFiles, pageFiles, testFilePath } = item.input;
6
+ const response = await (0, run_1.getAddScenarioCompletion)({
7
+ testCase,
8
+ testFiles,
9
+ pageFiles,
10
+ testFilePath,
11
+ trace,
12
+ });
13
+ return {
14
+ scores: [
15
+ {
16
+ name: "equality",
17
+ value: item.expectedOutput === response ? 1 : 0,
18
+ },
19
+ ],
20
+ output: response,
21
+ };
22
+ };
23
+ exports.default = addScenarioCodeAgentEvaluate;
@@ -0,0 +1,4 @@
1
+ import { EvaluateFn } from "./type";
2
+ declare const updateScenarioCodeAgentEvaluate: EvaluateFn;
3
+ export default updateScenarioCodeAgentEvaluate;
4
+ //# sourceMappingURL=update-scenario-agent.evals.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"update-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/update-scenario-agent.evals.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAUpC,QAAA,MAAM,+BAA+B,EAAE,UAiDtC,CAAC;AAEF,eAAe,+BAA+B,CAAC"}
@@ -0,0 +1,49 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const js_levenshtein_1 = __importDefault(require("js-levenshtein"));
7
+ const update_flow_1 = require("../agent/codegen/update-flow");
8
+ const utils_1 = require("../agent/codegen/utils");
9
+ const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
10
+ const { testCase, testFiles, pageFiles, testFilePath, testFileContent } = item.input;
11
+ const response = await (0, update_flow_1.getUpdateTestCodeCompletion)({
12
+ testCase,
13
+ testFiles,
14
+ pageFiles,
15
+ testFilePath,
16
+ testFileContent,
17
+ trace,
18
+ });
19
+ const fileChanges = (0, utils_1.extractTestUpdates)(response);
20
+ const expectedFileChanges = (0, utils_1.extractTestUpdates)(item.expectedOutput);
21
+ const fileChangeCount = fileChanges.length;
22
+ const expectedFileChangeCount = expectedFileChanges.length;
23
+ const correctFilePathChanges = expectedFileChanges.every((ef) => fileChanges.some((f) => f.filePath === ef.filePath));
24
+ const distanceScores = [];
25
+ expectedFileChanges.forEach((ef) => fileChanges.forEach((f) => {
26
+ if (f.filePath === ef.filePath && f.newCode && ef.newCode) {
27
+ const maxLength = ef.newCode.length > f.newCode.length
28
+ ? ef.newCode.length
29
+ : f.newCode.length;
30
+ distanceScores.push(1 - (0, js_levenshtein_1.default)(f.newCode || "", ef.newCode || "") / maxLength);
31
+ }
32
+ }));
33
+ let score = 0;
34
+ if (fileChangeCount === expectedFileChangeCount && correctFilePathChanges) {
35
+ score = distanceScores.length
36
+ ? distanceScores.reduce((agg, s) => agg * s)
37
+ : 0;
38
+ }
39
+ return {
40
+ scores: [
41
+ {
42
+ name: "score",
43
+ value: score,
44
+ },
45
+ ],
46
+ output: response,
47
+ };
48
+ };
49
+ exports.default = updateScenarioCodeAgentEvaluate;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.35.4",
3
+ "version": "0.35.6",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -52,8 +52,10 @@
52
52
  "@types/detect-port": "^1.3.5",
53
53
  "@types/express": "^4.17.21",
54
54
  "@types/fs-extra": "^11.0.4",
55
+ "@types/js-levenshtein": "^1.1.3",
55
56
  "@types/lodash.isequal": "^4.5.8",
56
- "@types/md5": "^2.3.5"
57
+ "@types/md5": "^2.3.5",
58
+ "js-levenshtein": "^1.1.6"
57
59
  },
58
60
  "scripts": {
59
61
  "dev": "tsc --build --watch",
@@ -61,6 +63,7 @@
61
63
  "clean": "tsc --build --clean",
62
64
  "lint": "eslint .",
63
65
  "test": "vitest run",
66
+ "e2e-test": "npx playwright test",
64
67
  "test:watch": "vitest",
65
68
  "test:watch-files": "vitest $0 --watch"
66
69
  }
@@ -0,0 +1,5 @@
1
+ import { defineConfig } from "@playwright/test";
2
+
3
+ export default defineConfig({
4
+ testDir: "./browser-injected-scripts",
5
+ });