@empiricalrun/test-gen 0.51.3 → 0.51.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.51.4
4
+
5
+ ### Patch Changes
6
+
7
+ - 7613f20: fix: update test-run-fetcher tool call response to include project
8
+ - f57ab88: fix: add playwright best practices to system prompt
9
+ - 83a4788: feat: use cua in chat agent tool call with feature flag
10
+ - a8c11eb: fix: tool call log should show error
11
+ - Updated dependencies [307a1cf]
12
+ - @empiricalrun/llm@0.11.3
13
+
3
14
  ## 0.51.3
4
15
 
5
16
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AA+EA,wBAAsB,SAAS,CAAC,EAC9B,SAAwC,EACxC,mBAAmB,GACpB,EAAE;IACD,SAAS,CAAC,EAAE,4BAA4B,GAAG,4BAA4B,CAAC;IACxE,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B,mBA4GA"}
1
+ {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AA6FA,wBAAsB,SAAS,CAAC,EAC9B,SAAwC,EACxC,mBAAmB,GACpB,EAAE;IACD,SAAS,CAAC,EAAE,4BAA4B,GAAG,4BAA4B,CAAC;IACxE,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B,mBAkHA"}
@@ -8,9 +8,9 @@ const claude_1 = require("@empiricalrun/llm/claude");
8
8
  const path_1 = __importDefault(require("path"));
9
9
  const picocolors_1 = require("picocolors");
10
10
  const human_in_the_loop_1 = require("../human-in-the-loop");
11
- const browser_agent_1 = require("../tools/browser-agent");
12
11
  const diagnosis_fetcher_1 = require("../tools/diagnosis-fetcher");
13
12
  const grep_1 = require("../tools/grep");
13
+ const test_gen_browser_1 = require("../tools/test-gen-browser");
14
14
  const test_run_1 = require("../tools/test-run");
15
15
  const test_run_fetcher_1 = require("../tools/test-run-fetcher");
16
16
  const zod_schema_1 = require("../tools/zod-schema");
@@ -20,6 +20,8 @@ You are a helpful assistant that can answer questions and help with tasks.
20
20
  You are given a set of tools to use to fulfill the user's request. Read their descriptions to
21
21
  understand what each tool does.
22
22
 
23
+ # Tools
24
+
23
25
  For example, if the user asks you to run a test, you could use the runTest tool.
24
26
  Once the test is run, you will receive the results in the form of a JSON object.
25
27
  Summarize the results in a few sentences.
@@ -46,6 +48,7 @@ the actual code to click on the login button. If you are fixing a failing test,
46
48
  around the failing line of code, so that it can be replaced/modified.
47
49
 
48
50
  # Repo context
51
+
49
52
  You are running as a CLI tool inside the directory of the repo where this test file is located. Here is
50
53
  the repo directory structure:
51
54
 
@@ -55,15 +58,26 @@ While specifying paths to files, use relative paths from the current working dir
55
58
  - Correct path: "tests/lesson.spec.ts"
56
59
  - Incorrect path: "/repo/tests/lesson.spec.ts" or "${path_1.default.basename(process.cwd())}/tests/lesson.spec.ts"
57
60
 
61
+ # Rules for fixing Playwright tests
62
+
63
+ You must follow these rules while adding new tests or modifying existing tests. There can be exceptions to these rules, but
64
+ ONLY when explicitly asked for by the user.
65
+
66
+ 1. Do not add any conditional logic or try catch blocks in a test. A good test deterministically tests a user scenario
67
+ 2. Trust Playwright's ability to auto-wait while taking actions on elements. For example, do not add checks on locator.isVisible() before clicking on it: Playwright already does this
68
+ 3. Do not add waitForTimeout or waitForLoadState in a test. Playwright will automatically wait for the page to load.
69
+ 4. You can't delete some steps from the test to make it pass. The test needs to accomplish its objective (which is to validate a particular user scenario)
70
+
58
71
  # Proactiveness
72
+
59
73
  You are allowed to be proactive, but only when the user asks you to do something. You should strive to
60
74
  strike a balance between:
61
75
  1. Doing the right thing when asked, including taking actions and follow-up actions
62
- 2. Not surprising the user with actions you take without asking
76
+ 2. Not surprising the user with actions you take without asking. It is okay to ask the user for confirmation before taking actions.
63
77
  `;
64
78
  const tools = [
65
79
  test_run_1.runTestTool,
66
- browser_agent_1.browserAgentTool,
80
+ test_gen_browser_1.generateTestWithBrowserAgent,
67
81
  diagnosis_fetcher_1.diagnosisTool,
68
82
  grep_1.grepTool,
69
83
  test_run_fetcher_1.testRunTool,
@@ -129,7 +143,12 @@ async function chatAgent({ chatModel = "claude-3-7-sonnet-20250219", useDiskForC
129
143
  throw new Error(`Tool ${toolUse.name} not found`);
130
144
  }
131
145
  const toolResult = await toolExecutor(toolUse.input);
132
- spinner.succeed(`Tool ${toolUse.name} completed`);
146
+ if (toolResult.isError) {
147
+ spinner.fail(`Tool ${toolUse.name} failed with error: ${toolResult.result}`);
148
+ }
149
+ else {
150
+ spinner.succeed(`Tool ${toolUse.name} completed`);
151
+ }
133
152
  chatState.pushMessage({
134
153
  role: "user",
135
154
  content: [
@@ -3,7 +3,7 @@ export declare function startPlaywrightCodegen(page: Page): Promise<void>;
3
3
  /**
4
4
  * Run the loop that executes computer actions until no 'computer_call' is found.
5
5
  */
6
- export declare function executeUsingComputerUseAgent({ page, task, }: {
6
+ export declare function createTestUsingComputerUseAgent({ page, task, }: {
7
7
  page: Page;
8
8
  task: string;
9
9
  }): Promise<{
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAWlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED;;GAEG;AACH,wBAAsB,4BAA4B,CAAC,EACjD,IAAI,EACJ,IAAI,GACL,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;CACvB,CAAC,CAkGD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAWlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED;;GAEG;AACH,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,GACL,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;CACvB,CAAC,CAkGD"}
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.executeUsingComputerUseAgent = exports.startPlaywrightCodegen = void 0;
6
+ exports.createTestUsingComputerUseAgent = exports.startPlaywrightCodegen = void 0;
7
7
  const openai_1 = __importDefault(require("openai"));
8
8
  const utils_1 = require("../browsing/utils");
9
9
  const computer_1 = require("./computer");
@@ -37,7 +37,7 @@ exports.startPlaywrightCodegen = startPlaywrightCodegen;
37
37
  /**
38
38
  * Run the loop that executes computer actions until no 'computer_call' is found.
39
39
  */
40
- async function executeUsingComputerUseAgent({ page, task, }) {
40
+ async function createTestUsingComputerUseAgent({ page, task, }) {
41
41
  let generatedCode = "";
42
42
  await (0, utils_1.injectPwLocatorGenerator)(page);
43
43
  const screenshotBytes = await (0, computer_1.getScreenshot)(page);
@@ -129,4 +129,4 @@ async function executeUsingComputerUseAgent({ page, task, }) {
129
129
  importPaths: [],
130
130
  };
131
131
  }
132
- exports.executeUsingComputerUseAgent = executeUsingComputerUseAgent;
132
+ exports.createTestUsingComputerUseAgent = createTestUsingComputerUseAgent;
@@ -75,7 +75,7 @@ click on maverick inside ford dropdown`,
75
75
  (0, fixtures_1.test)("cua agent can click icons accurately", async ({ page, server }) => {
76
76
  await page.goto(`${server.baseURL}/icons-navbar.html`);
77
77
  await (0, fixtures_1.expect)(page.getByText("select an icon")).toBeVisible();
78
- const response = await (0, run_1.executeUsingComputerUseAgent)({
78
+ const response = await (0, run_1.createTestUsingComputerUseAgent)({
79
79
  task: `click on the gear icon`,
80
80
  page,
81
81
  });
@@ -1,7 +1,7 @@
1
1
  import type { TestCase, TestGenConfigOptions } from "@empiricalrun/shared-types";
2
2
  import { Page } from "playwright";
3
3
  import { ScopeVars } from "../../types";
4
- export { executeUsingComputerUseAgent } from "../cua";
4
+ export { createTestUsingComputerUseAgent } from "../cua";
5
5
  export declare const IS_ALLOWED_TO_USE_SKILLS = false;
6
6
  export declare function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }: {
7
7
  task: string;
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,OAAO,EAAE,4BAA4B,EAAE,MAAM,QAAQ,CAAC;AAKtD,eAAO,MAAM,wBAAwB,QAAQ,CAAC;AAqB9C,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACvC,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAwRA"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,OAAO,EAAE,+BAA+B,EAAE,MAAM,QAAQ,CAAC;AAKzD,eAAO,MAAM,wBAAwB,QAAQ,CAAC;AAqB9C,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACvC,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAwRA"}
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.createTestUsingMasterAgent = exports.IS_ALLOWED_TO_USE_SKILLS = exports.executeUsingComputerUseAgent = void 0;
3
+ exports.createTestUsingMasterAgent = exports.IS_ALLOWED_TO_USE_SKILLS = exports.createTestUsingComputerUseAgent = void 0;
4
4
  const llm_1 = require("@empiricalrun/llm");
5
5
  const actions_1 = require("../../actions");
6
6
  const skill_1 = require("../../actions/skill");
@@ -20,7 +20,7 @@ const execute_browser_action_1 = require("./execute-browser-action");
20
20
  const execute_skill_action_1 = require("./execute-skill-action");
21
21
  const next_action_1 = require("./next-action");
22
22
  var cua_1 = require("../cua");
23
- Object.defineProperty(exports, "executeUsingComputerUseAgent", { enumerable: true, get: function () { return cua_1.executeUsingComputerUseAgent; } });
23
+ Object.defineProperty(exports, "createTestUsingComputerUseAgent", { enumerable: true, get: function () { return cua_1.createTestUsingComputerUseAgent; } });
24
24
  const MAX_ERROR_COUNT = 2;
25
25
  // Disabling skills as we're seeing false usage with chat agent
26
26
  exports.IS_ALLOWED_TO_USE_SKILLS = false;
@@ -1,6 +1,8 @@
1
1
  import type { TestGenConfig } from "@empiricalrun/shared-types";
2
2
  import { CliOptions } from "../index";
3
- export declare function buildTokenFromOptions(options: Omit<CliOptions, "token">): string;
3
+ export declare function buildTokenFromOptions(options: Omit<CliOptions, "token">, featureFlags?: {
4
+ useComputerUseAgent?: boolean;
5
+ }): string;
4
6
  export declare function buildTestConfigFromOptions(options: Omit<CliOptions, "token">): TestGenConfig;
5
7
  export declare function loadTestConfigs(testGenToken: string): TestGenConfig;
6
8
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/bin/utils/scenarios/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAIV,aAAa,EAEd,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAetC,wBAAgB,qBAAqB,CACnC,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE,OAAO,CAAC,GACjC,MAAM,CAgBR;AAED,wBAAgB,0BAA0B,CACxC,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE,OAAO,CAAC,GACjC,aAAa,CAaf;AAED,wBAAgB,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,aAAa,CAmBnE"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/bin/utils/scenarios/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAIV,aAAa,EAEd,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAetC,wBAAgB,qBAAqB,CACnC,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE,OAAO,CAAC,EAClC,YAAY,CAAC,EAAE;IACb,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B,GACA,MAAM,CAqBR;AAED,wBAAgB,0BAA0B,CACxC,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE,OAAO,CAAC,GACjC,aAAa,CAaf;AAED,wBAAgB,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,aAAa,CAmBnE"}
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.loadTestConfigs = exports.buildTestConfigFromOptions = exports.buildTokenFromOptions = void 0;
4
- function buildTokenFromOptions(options) {
4
+ function buildTokenFromOptions(options, featureFlags) {
5
5
  const genConfig = buildTestConfigFromOptions(options);
6
6
  const requestConfig = {
7
7
  specPath: genConfig.specPath,
@@ -12,7 +12,12 @@ function buildTokenFromOptions(options) {
12
12
  suites: genConfig.testCase.suites,
13
13
  ai_gist: genConfig.testCase.ai_gist,
14
14
  build: genConfig.build,
15
- options: genConfig.options,
15
+ options: genConfig.options
16
+ ? {
17
+ ...genConfig.options,
18
+ ...featureFlags,
19
+ }
20
+ : featureFlags,
16
21
  environment: genConfig.environment,
17
22
  testErrorDiagnosis: genConfig.testErrorDiagnosis,
18
23
  };
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAepC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAyC3E"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAepC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAsD3E"}
package/dist/index.js CHANGED
@@ -5,6 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.createTest = void 0;
7
7
  const llm_1 = require("@empiricalrun/llm");
8
+ const cua_1 = require("./agent/cua");
8
9
  const run_1 = require("./agent/master/run");
9
10
  const scenarios_1 = require("./bin/utils/scenarios");
10
11
  const client_1 = __importDefault(require("./file/client"));
@@ -26,29 +27,41 @@ async function createTest(task, page, scope) {
26
27
  try {
27
28
  const testConfigArg = process.env.TEST_GEN_TOKEN;
28
29
  const testGenConfig = (0, scenarios_1.loadTestConfigs)(testConfigArg);
29
- (0, reporter_1.setReporterConfig)({
30
- projectRepoName: testGenConfig.options?.metadata.projectRepoName,
31
- testSessionId: testGenConfig.options?.metadata.testSessionId,
32
- generationId: testGenConfig.options?.metadata.generationId,
33
- });
34
- (0, session_1.setSessionDetails)({
35
- sessionId: testGenConfig.options?.metadata.testSessionId,
36
- generationId: testGenConfig.options?.metadata.generationId,
37
- testCaseId: testGenConfig.testCase.id,
38
- projectRepoName: testGenConfig.options?.metadata.projectRepoName,
39
- });
30
+ if (testGenConfig.options && testGenConfig.options.metadata) {
31
+ (0, reporter_1.setReporterConfig)({
32
+ projectRepoName: testGenConfig.options?.metadata.projectRepoName,
33
+ testSessionId: testGenConfig.options?.metadata.testSessionId,
34
+ generationId: testGenConfig.options?.metadata.generationId,
35
+ });
36
+ (0, session_1.setSessionDetails)({
37
+ sessionId: testGenConfig.options?.metadata.testSessionId,
38
+ generationId: testGenConfig.options?.metadata.generationId,
39
+ testCaseId: testGenConfig.testCase.id,
40
+ projectRepoName: testGenConfig.options?.metadata.projectRepoName,
41
+ });
42
+ }
40
43
  const fileService = new client_1.default();
41
- const { testCase, specPath } = testGenConfig;
42
- const { code, importPaths } = await (0, run_1.createTestUsingMasterAgent)({
43
- testCase,
44
- specPath,
45
- page,
46
- task,
47
- options: {
48
- ...testGenConfig.options,
49
- },
50
- scopeVars: scope,
51
- });
44
+ const useComputerUseAgent = testGenConfig.options?.useComputerUseAgent;
45
+ let agentResult;
46
+ if (useComputerUseAgent) {
47
+ agentResult = await (0, cua_1.createTestUsingComputerUseAgent)({
48
+ task,
49
+ page,
50
+ });
51
+ }
52
+ else {
53
+ agentResult = await (0, run_1.createTestUsingMasterAgent)({
54
+ task,
55
+ page,
56
+ testCase: testGenConfig.testCase,
57
+ specPath: testGenConfig.specPath,
58
+ options: {
59
+ ...testGenConfig.options,
60
+ },
61
+ scopeVars: scope,
62
+ });
63
+ }
64
+ const { code, importPaths } = agentResult;
52
65
  await fileService.updateTest({
53
66
  task,
54
67
  generatedCode: code,
@@ -0,0 +1,3 @@
1
+ import type { Tool } from "./types";
2
+ export declare const generateTestWithBrowserAgent: Tool;
3
+ //# sourceMappingURL=test-gen-browser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAmDpC,eAAO,MAAM,4BAA4B,EAAE,IAiE1C,CAAC"}
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.browserAgentTool = void 0;
3
+ exports.generateTestWithBrowserAgent = void 0;
4
4
  const zod_1 = require("zod");
5
5
  const run_1 = require("../agent/browsing/run");
6
6
  const utils_1 = require("../agent/browsing/utils");
@@ -49,7 +49,7 @@ test("Example test code", async ({ page }) => {
49
49
  });
50
50
  \`\`\`
51
51
  `;
52
- exports.browserAgentTool = {
52
+ exports.generateTestWithBrowserAgent = {
53
53
  schema: {
54
54
  name: "generateTestWithBrowserAgent",
55
55
  description: BROWSER_AGENT_DESCRIPTION,
@@ -57,6 +57,14 @@ exports.browserAgentTool = {
57
57
  },
58
58
  execute: async (input) => {
59
59
  const { testName, testSuites, fileName, changeToMake, project } = input;
60
+ const playwrightConfig = await (0, utils_1.readPlaywrightConfig)(process.cwd());
61
+ const validProjectNames = await (0, utils_1.getValidProjectNames)(playwrightConfig);
62
+ if (!validProjectNames.includes(project)) {
63
+ return {
64
+ isError: true,
65
+ result: `Invalid project name: ${project}. Valid project names are: ${validProjectNames.join(", ")}`,
66
+ };
67
+ }
60
68
  try {
61
69
  await (0, utils_1.replaceTodoWithCreateTest)({
62
70
  testCaseName: testName,
@@ -70,14 +78,6 @@ exports.browserAgentTool = {
70
78
  result: `Error running tool: ${error}`,
71
79
  };
72
80
  }
73
- const playwrightConfig = await (0, utils_1.readPlaywrightConfig)(process.cwd());
74
- const validProjectNames = await (0, utils_1.getValidProjectNames)(playwrightConfig);
75
- if (!validProjectNames.includes(project)) {
76
- return {
77
- isError: true,
78
- result: `Invalid project name: ${project}. Valid project names are: ${validProjectNames.join(", ")}`,
79
- };
80
- }
81
81
  const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
82
82
  testFilePath: fileName,
83
83
  filePathToUpdate: fileName,
@@ -86,6 +86,8 @@ exports.browserAgentTool = {
86
86
  name: testName,
87
87
  file: fileName,
88
88
  prompt: changeToMake,
89
+ }, {
90
+ useComputerUseAgent: true,
89
91
  }),
90
92
  repoDir: process.cwd(),
91
93
  });
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/test-run-fetcher/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,UAAU,CAAC;AAWrC,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAOnE;AAED,eAAO,MAAM,WAAW,EAAE,IAwHzB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/test-run-fetcher/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,UAAU,CAAC;AAWrC,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAOnE;AAED,eAAO,MAAM,WAAW,EAAE,IA4HzB,CAAC"}
@@ -52,55 +52,57 @@ exports.testRunTool = {
52
52
  // 1. Truncate stack trace to last 300 characters
53
53
  // 2. Remove request/response headers from network metadata
54
54
  // 3. Focus on failed tests only
55
- if (data.data?.test_run?.flattenedSummaryDetails) {
56
- data.data.test_run.flattenedSummaryDetails =
57
- data.data.test_run.flattenedSummaryDetails.map((detail) => {
58
- // Truncate stack trace to last 300 characters
59
- if (detail.failed_run_metadata?.stack) {
60
- const stack = detail.failed_run_metadata.stack;
61
- // TODO: change to last 10 lines
62
- detail.failed_run_metadata.stack =
63
- stack.length > 300 ? `...${stack.slice(-300)}` : stack;
64
- }
65
- // Remove headers from network metadata
66
- if (detail.network_metadata?.failed_calls) {
67
- detail.network_metadata.failed_calls =
68
- detail.network_metadata.failed_calls.map((call) => ({
69
- ...call,
70
- request: { ...call.request, headers: [] },
71
- response: { ...call.response, headers: [] },
72
- }));
73
- }
74
- if (detail.network_metadata?.failed_calls_within_time_range) {
75
- detail.network_metadata.failed_calls_within_time_range =
76
- detail.network_metadata.failed_calls_within_time_range.map((call) => ({
77
- ...call,
78
- request: { ...call.request, headers: [] },
79
- response: { ...call.response, headers: [] },
80
- }));
81
- }
82
- return detail;
83
- });
55
+ const testRun = data.data?.test_run;
56
+ if (testRun?.flattenedSummaryDetails) {
57
+ testRun.flattenedSummaryDetails = testRun.flattenedSummaryDetails.map((detail) => {
58
+ // Truncate stack trace to last 300 characters
59
+ if (detail.failed_run_metadata?.stack) {
60
+ const stack = detail.failed_run_metadata.stack;
61
+ // TODO: change to last 10 lines
62
+ detail.failed_run_metadata.stack =
63
+ stack.length > 300 ? `...${stack.slice(-300)}` : stack;
64
+ }
65
+ // Remove headers from network metadata
66
+ if (detail.network_metadata?.failed_calls) {
67
+ detail.network_metadata.failed_calls =
68
+ detail.network_metadata.failed_calls.map((call) => ({
69
+ ...call,
70
+ request: { ...call.request, headers: [] },
71
+ response: { ...call.response, headers: [] },
72
+ }));
73
+ }
74
+ if (detail.network_metadata?.failed_calls_within_time_range) {
75
+ detail.network_metadata.failed_calls_within_time_range =
76
+ detail.network_metadata.failed_calls_within_time_range.map((call) => ({
77
+ ...call,
78
+ request: { ...call.request, headers: [] },
79
+ response: { ...call.response, headers: [] },
80
+ }));
81
+ }
82
+ return detail;
83
+ });
84
84
  }
85
- const failedTests = data.data?.test_run?.flattenedSummaryDetails.filter((detail) => detail.status === "failed");
85
+ const failedTests = testRun?.flattenedSummaryDetails.filter((detail) => detail.status === "failed");
86
86
  const failedTestInMarkdown = failedTests
87
87
  ?.map((detail) => {
88
88
  const { nesting } = detail.failed_run_metadata;
89
89
  return {
90
90
  testName: nesting.slice(1).join(" > "),
91
91
  fileName: extractPathAfterSourceRepo(detail.failed_run_metadata.location.file),
92
+ project: detail.test_project,
92
93
  detail,
93
94
  };
94
95
  })
95
- .map((test) => {
96
+ .map((res) => {
96
97
  return `
97
- ### Test name: ${test.testName}
98
- File: ${test.fileName}
98
+ ### Test name: ${res.testName}
99
+ File: ${res.fileName}
100
+ Project: ${res.project}
99
101
 
100
102
  Failure data below
101
103
 
102
104
  \`\`\`json
103
- ${JSON.stringify(test.detail, null, 2)}
105
+ ${JSON.stringify(res.detail, null, 2)}
104
106
  \`\`\`
105
107
  `;
106
108
  })
@@ -111,10 +113,10 @@ ${JSON.stringify(test.detail, null, 2)}
111
113
  ## Run info
112
114
  - Run ID: ${runId}
113
115
  - Repository: ${repoName}
114
- - Started at: ${data.data?.test_run?.testRun.run_started_at}
115
- - Ended at: ${data.data?.test_run?.testRun.run_ended_at}
116
- - Duration: ${data.data?.test_run?.testRun.duration} seconds
117
- - Environment name: ${data.data?.test_run?.testRun.environment_name}
116
+ - Started at: ${testRun?.testRun.run_started_at}
117
+ - Ended at: ${testRun?.testRun.run_ended_at}
118
+ - Duration: ${testRun?.testRun.duration} seconds
119
+ - Environment name: ${testRun?.testRun.environment_name}
118
120
 
119
121
  ## Failed tests
120
122
  ${failedTestInMarkdown}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.51.3",
3
+ "version": "0.51.4",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -74,7 +74,7 @@
74
74
  "tsx": "^4.16.2",
75
75
  "typescript": "^5.3.3",
76
76
  "zod": "^3.23.8",
77
- "@empiricalrun/llm": "^0.11.2",
77
+ "@empiricalrun/llm": "^0.11.3",
78
78
  "@empiricalrun/r2-uploader": "^0.3.8",
79
79
  "@empiricalrun/reporter": "^0.23.2",
80
80
  "@empiricalrun/test-run": "^0.7.6"
@@ -92,7 +92,7 @@
92
92
  "js-levenshtein": "^1.1.6",
93
93
  "playwright": "1.47.1",
94
94
  "ts-patch": "^3.3.0",
95
- "@empiricalrun/shared-types": "0.0.6"
95
+ "@empiricalrun/shared-types": "0.0.7"
96
96
  },
97
97
  "scripts": {
98
98
  "dev": "tspc --build --watch",
@@ -1,3 +0,0 @@
1
- import type { Tool } from "./types";
2
- export declare const browserAgentTool: Tool;
3
- //# sourceMappingURL=browser-agent.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"browser-agent.d.ts","sourceRoot":"","sources":["../../src/tools/browser-agent.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAmDpC,eAAO,MAAM,gBAAgB,EAAE,IA4D9B,CAAC"}