@empiricalrun/test-gen 0.49.0 → 0.50.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.50.1
4
+
5
+ ### Patch Changes
6
+
7
+ - b070af3: fix: error handling in test run tool
8
+ - a94ef14: fix: chat agent system prompt for proactiveness
9
+ - Updated dependencies [b070af3]
10
+ - @empiricalrun/test-run@0.7.4
11
+
12
+ ## 0.50.0
13
+
14
+ ### Minor Changes
15
+
16
+ - 0eeff70: feat: add test-run-fetcher tool call, response body changes
17
+
18
+ ### Patch Changes
19
+
20
+ - b14d5bf: feat: support headed executions of test run tool
21
+ - Updated dependencies [b14d5bf]
22
+ - @empiricalrun/test-run@0.7.3
23
+
3
24
  ## 0.49.0
4
25
 
5
26
  ### Minor Changes
@@ -1 +1 @@
1
- {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AA6D1D,wBAAsB,SAAS,CAAC,EAC9B,MAAM,GACP,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,8CAqEA"}
1
+ {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AA8E1D,wBAAsB,SAAS,CAAC,EAC9B,MAAM,GACP,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,8CAqEA"}
@@ -11,6 +11,7 @@ const browser_agent_1 = require("../tools/browser-agent");
11
11
  const diagnosis_fetcher_1 = require("../tools/diagnosis-fetcher");
12
12
  const grep_1 = require("../tools/grep");
13
13
  const test_run_1 = require("../tools/test-run");
14
+ const test_run_fetcher_1 = require("../tools/test-run-fetcher");
14
15
  const zod_schema_1 = require("../tools/zod-schema");
15
16
  const repo_tree_1 = require("../utils/repo-tree");
16
17
  const systemPrompt = `
@@ -25,6 +26,9 @@ Summarize the results in a few sentences.
25
26
  If the user provides a diagnosis URL, you can use the fetchDiagnosisDetails tool
26
27
  to get more information about the test case and its results.
27
28
 
29
+ If the user provides a test run URL, you can use the fetchTestRunDetails tool
30
+ to get detailed information about a specific test run.
31
+
28
32
  Or if the user asks you to modify a test, you could use the generateTestWithBrowserAgent tool. If you suspect
29
33
  that a UI selector needs to be updated, using the browser agent is a good idea.
30
34
 
@@ -40,6 +44,7 @@ The position of the comment is important: the browser agent will look for this c
40
44
  the actual code to click on the login button. If you are fixing a failing test, your comment should be
41
45
  around the failing line of code, so that it can be replaced/modified.
42
46
 
47
+ # Repo context
43
48
  You are running as a CLI tool inside the directory of the repo where this test file is located. Here is
44
49
  the repo directory structure:
45
50
 
@@ -48,8 +53,20 @@ ${(0, repo_tree_1.generateAsciiTree)(process.cwd())}
48
53
  While specifying paths to files, use relative paths from the current working directory. For example:
49
54
  - Correct path: "tests/lesson.spec.ts"
50
55
  - Incorrect path: "/repo/tests/lesson.spec.ts" or "${path_1.default.basename(process.cwd())}/tests/lesson.spec.ts"
56
+
57
+ # Proactiveness
58
+ You are allowed to be proactive, but only when the user asks you to do something. You should strive to
59
+ strike a balance between:
60
+ 1. Doing the right thing when asked, including taking actions and follow-up actions
61
+ 2. Not surprising the user with actions you take without asking
51
62
  `;
52
- const tools = [test_run_1.runTestTool, browser_agent_1.browserAgentTool, diagnosis_fetcher_1.diagnosisTool, grep_1.grepTool];
63
+ const tools = [
64
+ test_run_1.runTestTool,
65
+ browser_agent_1.browserAgentTool,
66
+ diagnosis_fetcher_1.diagnosisTool,
67
+ grep_1.grepTool,
68
+ test_run_fetcher_1.testRunTool,
69
+ ];
53
70
  const toolExecutors = {
54
71
  ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
55
72
  str_replace_editor: claude_1.strReplaceEditorTool,
@@ -1 +1 @@
1
- {"version":3,"file":"diagnosis-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/diagnosis-fetcher.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAUpC,eAAO,MAAM,aAAa,EAAE,IA+E3B,CAAC"}
1
+ {"version":3,"file":"diagnosis-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/diagnosis-fetcher.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAUpC,eAAO,MAAM,aAAa,EAAE,IAgF3B,CAAC"}
@@ -25,11 +25,10 @@ exports.diagnosisTool = {
25
25
  if (!slug) {
26
26
  throw new Error("Invalid diagnosis URL - could not extract slug");
27
27
  }
28
- // Make the API call to fetch diagnosis details
29
28
  const response = await fetch(`https://dash.empirical.run/api/diagnosis/${slug}/detailed`, {
30
29
  method: "GET",
31
30
  headers: {
32
- Authorization: "weQPMWKT", // Using the auth token from test-endpoint.mdc
31
+ Authorization: "weQPMWKT",
33
32
  },
34
33
  });
35
34
  if (!response.ok) {
@@ -40,8 +39,10 @@ exports.diagnosisTool = {
40
39
  }
41
40
  const data = await response.json();
42
41
  const { test_case, diagnosis } = data.data;
43
- const project = diagnosis[0]?.test_project || "unknown";
42
+ const project = diagnosis?.test_project || "unknown";
44
43
  const sourceContext = await promises_1.default.readFile(path_1.default.join("tests", test_case.file_path), "utf-8");
44
+ const repoName = path_1.default.basename(process.cwd());
45
+ const cleanErrorStack = diagnosis?.failed_run_metadata?.stack?.replace(`"/runner/_work/${repoName}/${repoName}/source-repo/"`, "");
45
46
  // Format the response as markdown
46
47
  const markdownResponse = `
47
48
  # Test Case Diagnosis
@@ -58,21 +59,21 @@ ${sourceContext}
58
59
  ## What Happened in the Test Run
59
60
 
60
61
  ### Failure Details
61
- - **Failing Line**: ${diagnosis[0]?.failing_line || "No failing line available"}
62
+ - **Failing Line**: ${diagnosis?.failing_line || "No failing line available"}
62
63
 
63
64
  #### Error Stack
64
65
  \`\`\`
65
- ${diagnosis[0]?.failed_run_metadata?.stack?.replace("/runner/_work/shopflo-tests/shopflo-tests/source-repo/", "") || "No error stack available"}
66
+ ${cleanErrorStack || "No error stack available"}
66
67
  \`\`\`
67
68
 
68
69
  #### Error Summary
69
- ${diagnosis[0]?.error_stack_summary?.content || "No error summary available"}
70
+ ${diagnosis?.error_stack_summary?.content || "No error summary available"}
70
71
 
71
72
  #### Visual Analysis
72
- ${diagnosis[0]?.visual_diff_summary?.summary || "No visual analysis available"}
73
+ ${diagnosis?.visual_diff_summary?.summary || "No visual analysis available"}
73
74
 
74
75
  #### Merged Summary
75
- ${diagnosis[0]?.merged_summary?.content || "No merged summary available"}
76
+ ${diagnosis?.merged_summary?.content || "No merged summary available"}
76
77
  `;
77
78
  return {
78
79
  result: markdownResponse,
@@ -0,0 +1,3 @@
1
+ import type { Tool } from "./types";
2
+ export declare const testRunTool: Tool;
3
+ //# sourceMappingURL=test-run-fetcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"test-run-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/test-run-fetcher.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAUpC,eAAO,MAAM,WAAW,EAAE,IA0DzB,CAAC"}
@@ -0,0 +1,59 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.testRunTool = void 0;
4
+ const zod_1 = require("zod");
5
+ const TestRunSchema = zod_1.z.object({
6
+ testRunUrl: zod_1.z
7
+ .string()
8
+ .describe("The full URL of the test run (e.g. https://dash.empirical.run/sortment-tests/test-runs/20269 or with query params like ?status=failed)"),
9
+ });
10
+ exports.testRunTool = {
11
+ schema: {
12
+ name: "fetchTestRunDetails",
13
+ description: "Fetch details about a test run using its URL",
14
+ parameters: TestRunSchema,
15
+ },
16
+ execute: async (input) => {
17
+ const { testRunUrl } = input;
18
+ // Remove query parameters if they exist
19
+ const urlWithoutParams = testRunUrl.split("?")[0] || testRunUrl;
20
+ // Extract the run ID and repo name from the URL
21
+ const urlParts = urlWithoutParams.split("/");
22
+ const runId = urlParts.pop(); // Last part is the run ID
23
+ const repoName = urlParts[urlParts.length - 2]; // Second to last part is the repo name
24
+ if (!runId || !repoName) {
25
+ throw new Error("Invalid test run URL - could not extract run ID or repo name");
26
+ }
27
+ // Make the API call to fetch test run details
28
+ const response = await fetch(`https://dash.empirical.run/api/test-runs/${runId}?repo_name=${repoName}`, {
29
+ method: "GET",
30
+ headers: {
31
+ Authorization: "weQPMWKT",
32
+ },
33
+ });
34
+ if (!response.ok) {
35
+ return {
36
+ result: `Failed to fetch test run details: ${response.statusText}`,
37
+ isError: true,
38
+ };
39
+ }
40
+ const data = await response.json();
41
+ // Format the response as markdown
42
+ const markdownResponse = `
43
+ # Test Run Details
44
+
45
+ ## Run Information
46
+ - **Run ID**: ${runId}
47
+ - **Repository**: ${repoName}
48
+
49
+ ## Test Run Data
50
+ \`\`\`json
51
+ ${JSON.stringify(data, null, 2)}
52
+ \`\`\`
53
+ `;
54
+ return {
55
+ result: markdownResponse,
56
+ isError: false,
57
+ };
58
+ },
59
+ };
@@ -1 +1 @@
1
- {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAepC,eAAO,MAAM,WAAW,EAAE,IAmBzB,CAAC"}
1
+ {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAsBpC,eAAO,MAAM,WAAW,EAAE,IA8BzB,CAAC"}
@@ -12,6 +12,11 @@ const RunTestSchema = zod_1.z.object({
12
12
  .string()
13
13
  .describe("The name of the file where the test is located. File name must end with .spec.ts"),
14
14
  project: zod_1.z.string().describe("The project to run the test on"),
15
+ headed: zod_1.z
16
+ .boolean()
17
+ .describe("Whether to run the test in headed mode (default is false, which is headless)")
18
+ .optional()
19
+ .default(false),
15
20
  });
16
21
  exports.runTestTool = {
17
22
  schema: {
@@ -20,16 +25,27 @@ exports.runTestTool = {
20
25
  parameters: RunTestSchema,
21
26
  },
22
27
  execute: async (input) => {
23
- const { testName, suites, fileName, project } = input;
24
- const result = await (0, test_run_1.runSingleTest)({
25
- testName,
26
- suites,
27
- fileName,
28
- projects: [project],
29
- });
30
- return {
31
- result: JSON.stringify(result),
32
- isError: false,
33
- };
28
+ const { testName, suites, fileName, project, headed } = input;
29
+ try {
30
+ const result = await (0, test_run_1.runSingleTest)({
31
+ testName,
32
+ suites,
33
+ fileName,
34
+ projects: [project],
35
+ headed,
36
+ });
37
+ return {
38
+ result: JSON.stringify(result),
39
+ isError: false,
40
+ };
41
+ }
42
+ catch (error) {
43
+ // Ensure we capture the full error message regardless of error type
44
+ const errorMessage = error instanceof Error ? error.message : String(error);
45
+ return {
46
+ result: JSON.stringify({ error: errorMessage }),
47
+ isError: true,
48
+ };
49
+ }
34
50
  },
35
51
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.49.0",
3
+ "version": "0.50.1",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -74,10 +74,10 @@
74
74
  "tsx": "^4.16.2",
75
75
  "typescript": "^5.3.3",
76
76
  "zod": "^3.23.8",
77
- "@empiricalrun/llm": "^0.10.1",
78
77
  "@empiricalrun/r2-uploader": "^0.3.8",
79
78
  "@empiricalrun/reporter": "^0.23.1",
80
- "@empiricalrun/test-run": "^0.7.2"
79
+ "@empiricalrun/test-run": "^0.7.4",
80
+ "@empiricalrun/llm": "^0.10.1"
81
81
  },
82
82
  "devDependencies": {
83
83
  "@playwright/test": "1.47.1",