@empiricalrun/test-gen 0.46.2 → 0.46.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.46.4
4
+
5
+ ### Patch Changes
6
+
7
+ - 0c29798: test: fix assertion for icon registry test
8
+
9
+ ## 0.46.3
10
+
11
+ ### Patch Changes
12
+
13
+ - f26142f: fix: browsing agent giving wrong executed action
14
+ - 112b429: test: enable parallel test execution in playwright
15
+
3
16
  ## 0.46.2
4
17
 
5
18
  ### Patch Changes
@@ -7,6 +7,10 @@ export type BrowsingAgentOptions = Partial<TestGenConfigOptions> & {
7
7
  disallowedStrings?: string[];
8
8
  };
9
9
  };
10
+ export type BrowserAgentAction = {
11
+ action: string;
12
+ code: string | undefined;
13
+ };
10
14
  export declare function executeTaskUsingBrowsingAgent({ action, page, actions, llm, options, trace, }: {
11
15
  action: string;
12
16
  page: Page;
@@ -14,5 +18,5 @@ export declare function executeTaskUsingBrowsingAgent({ action, page, actions, l
14
18
  llm: LLM;
15
19
  trace?: TraceClient;
16
20
  options: BrowsingAgentOptions;
17
- }): Promise<string[] | undefined>;
21
+ }): Promise<BrowserAgentAction | undefined>;
18
22
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAQlD,MAAM,MAAM,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IACjE,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,MAAM,EACN,IAAI,EACJ,OAAO,EACP,GAAG,EACH,OAAO,EACP,KAAK,GACN,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,IAAI,CAAC;IACX,OAAO,EAAE,iBAAiB,CAAC;IAC3B,GAAG,EAAE,GAAG,CAAC;IACT,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,EAAE,oBAAoB,CAAC;CAC/B,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,SAAS,CAAC,CAiEhC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAQlD,MAAM,MAAM,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IACjE,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1B,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,MAAM,EACN,IAAI,EACJ,OAAO,EACP,GAAG,EACH,OAAO,EACP,KAAK,GACN,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,IAAI,CAAC;IACX,OAAO,EAAE,iBAAiB,CAAC;IAC3B,GAAG,EAAE,GAAG,CAAC;IACT,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,EAAE,oBAAoB,CAAC;CAC/B,GAAG,OAAO,CAAC,kBAAkB,GAAG,SAAS,CAAC,CAoE1C"}
@@ -9,7 +9,7 @@ const html_1 = require("../../utils/html");
9
9
  const utils_1 = require("../utils");
10
10
  const o1_completion_1 = require("./o1-completion");
11
11
  async function executeTaskUsingBrowsingAgent({ action, page, actions, llm, options, trace, }) {
12
- let generatedCodeSteps = [];
12
+ let generatedCodeSteps;
13
13
  const tools = actions.getBrowsingActionSchemas();
14
14
  const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
15
15
  const browsingAgentSpan = trace?.span({
@@ -55,11 +55,14 @@ async function executeTaskUsingBrowsingAgent({ action, page, actions, llm, optio
55
55
  const toolCallsSpan = browsingAgentSpan?.span({ name: "tool-calls" });
56
56
  for (const i in toolCalls) {
57
57
  const toolCall = toolCalls[i];
58
+ const args = (0, utils_1.parseJson)(toolCall.function.arguments);
58
59
  try {
59
60
  const code = await actions.executeAction(toolCall.function.name, (0, utils_1.parseJson)(toolCall.function.arguments), toolCallsSpan);
60
- if (code) {
61
- generatedCodeSteps.push(code);
62
- }
61
+ generatedCodeSteps = {
62
+ // Passing reason as action, in order to pass the correct action that took place to runtime planner
63
+ action: args.reason,
64
+ code,
65
+ };
63
66
  }
64
67
  catch (e) {
65
68
  void testgenUpdatesReporter.sendMessage(e.message);
@@ -10,27 +10,31 @@ const path_1 = __importDefault(require("path"));
10
10
  const utils_1 = require("../../browsing/utils");
11
11
  const element_annotation_1 = require("../element-annotation");
12
12
  const run_1 = require("../run");
13
- let server;
14
- let PORT = 2345;
15
- test_1.test.beforeAll(async () => {
16
- server = http_server_1.default.createServer({
17
- root: path_1.default.join(process.cwd(), "test-data"),
18
- });
19
- // Start the server
20
- await new Promise((resolve) => {
21
- server.listen(PORT, () => {
22
- console.log(`Server running at http://localhost:${PORT}`);
23
- resolve(true);
24
- });
25
- });
26
- });
27
- test_1.test.afterAll(() => {
28
- if (server) {
29
- server.close();
30
- }
13
+ const test = test_1.test.extend({
14
+ server: [
15
+ // eslint-disable-next-line no-empty-pattern
16
+ async ({}, use, workerInfo) => {
17
+ const port = 2345 + workerInfo.workerIndex;
18
+ const server = http_server_1.default.createServer({
19
+ root: path_1.default.join(process.cwd(), "test-data"),
20
+ });
21
+ await new Promise((resolve) => {
22
+ server.listen(port, () => {
23
+ console.log(`Server running at http://localhost:${port}`);
24
+ resolve();
25
+ });
26
+ });
27
+ await use({
28
+ port,
29
+ baseURL: `http://localhost:${port}`,
30
+ });
31
+ server.close();
32
+ },
33
+ { scope: "worker" },
34
+ ],
31
35
  });
32
- (0, test_1.test)("able to scroll and interact with elements", async ({ page }) => {
33
- await page.goto(`http://localhost:${PORT}/blog-page.html`);
36
+ test("able to scroll and interact with elements", async ({ page, server }) => {
37
+ await page.goto(`${server.baseURL}/blog-page.html`);
34
38
  const response = await (0, run_1.createTestUsingMasterAgent)({
35
39
  task: `fill test@test.com into the email field and click the submit`,
36
40
  page,
@@ -41,8 +45,8 @@ test_1.test.afterAll(() => {
41
45
  (0, test_1.expect)(response.code).toContain("await page.getByPlaceholder('Enter your email').fill(\"test@test.com\")");
42
46
  (0, test_1.expect)(response.code).toContain("await page.getByRole('button', { name: 'Subscribe' }).click()");
43
47
  });
44
- (0, test_1.test)("scroll when element does not exist", async ({ page }) => {
45
- await page.goto(`http://localhost:${PORT}/blog-page.html`);
48
+ test("scroll when element does not exist", async ({ page, server }) => {
49
+ await page.goto(`${server.baseURL}/blog-page.html`);
46
50
  const response = await (0, run_1.createTestUsingMasterAgent)({
47
51
  task: `click search button`,
48
52
  page,
@@ -52,8 +56,8 @@ test_1.test.afterAll(() => {
52
56
  (0, test_1.expect)(response.importPaths.length).toBe(0);
53
57
  (0, test_1.expect)(response.code.length).toBe(0);
54
58
  });
55
- (0, test_1.test)("agent can click icons accurately", async ({ page }) => {
56
- await page.goto(`http://localhost:${PORT}/icons-navbar.html`);
59
+ test("agent can click icons accurately", async ({ page, server }) => {
60
+ await page.goto(`${server.baseURL}/icons-navbar.html`);
57
61
  await (0, test_1.expect)(page.getByText("select an icon")).toBeVisible();
58
62
  const response = await (0, run_1.createTestUsingMasterAgent)({
59
63
  task: `click on the gear icon`,
@@ -69,12 +73,15 @@ test_1.test.afterAll(() => {
69
73
  // Validate icons registry
70
74
  const iconsRegistryFile = path_1.default.join(process.cwd(), "icons.json");
71
75
  const icons = JSON.parse(fs_1.default.readFileSync(iconsRegistryFile, "utf-8"));
72
- (0, test_1.expect)(icons.length).toBe(4); // 1 for each unique icon
76
+ (0, test_1.expect)(icons.length).toBeGreaterThan(0);
77
+ // Commenting out this check since with parallel test executions,
78
+ // we can have icons from other tests in the same file
79
+ // expect(icons.length).toBe(4); // 1 for each unique icon
73
80
  fs_1.default.unlinkSync(iconsRegistryFile);
74
81
  });
75
- (0, test_1.test)("annotate and enrich annotations correctly", async ({ page }) => {
82
+ test("annotate and enrich annotations correctly", async ({ page, server }) => {
76
83
  await (0, utils_1.injectPwLocatorGenerator)(page);
77
- await page.goto(`http://localhost:${PORT}/iframe-elements.html`);
84
+ await page.goto(`${server.baseURL}/iframe-elements.html`);
78
85
  const { annotationKeys: keys } = await (0, element_annotation_1.getAnnotationKeys)({
79
86
  page,
80
87
  preference: {
@@ -92,7 +99,7 @@ test_1.test.afterAll(() => {
92
99
  // 2 clickable divs: 1 in main frame, 1 in iframe
93
100
  (0, test_1.expect)(keys.filter((k) => k.text.includes("Lorem Ipsum")).length).toBe(2);
94
101
  });
95
- (0, test_1.test)("fill action with multiple pages", async ({ context }) => {
102
+ test("fill action with multiple pages", async ({ context }) => {
96
103
  const page1 = await context.newPage();
97
104
  const page2 = await context.newPage();
98
105
  const response = await (0, run_1.createTestUsingMasterAgent)({
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAqBlC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAExC,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AA6BrB,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GA8XA"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAqBlC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAExC,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AA6BrB,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAmYA"}
@@ -282,7 +282,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
282
282
  }
283
283
  }
284
284
  else {
285
- generatedCodeSteps = await (0, browsing_1.executeTaskUsingBrowsingAgent)({
285
+ const browserAction = await (0, browsing_1.executeTaskUsingBrowsingAgent)({
286
286
  trace: masterAgentActionSpan,
287
287
  action: output.action,
288
288
  page,
@@ -290,6 +290,12 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
290
290
  llm,
291
291
  actions,
292
292
  });
293
+ if (browserAction) {
294
+ output.action = browserAction.action;
295
+ if (browserAction.code) {
296
+ generatedCodeSteps.push(browserAction.code);
297
+ }
298
+ }
293
299
  }
294
300
  }
295
301
  // resetting error count as there is a successful action
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.46.2",
3
+ "version": "0.46.4",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -9,4 +9,6 @@ export default defineConfig({
9
9
  testMatch: "**/*.spec.js",
10
10
  // Setting large timeout as master agent takes time to run.
11
11
  timeout: 240_000,
12
+ fullyParallel: true,
13
+ workers: "50%",
12
14
  });