@empiricalrun/test-gen 0.49.0 → 0.50.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/dist/agent/chat.d.ts.map +1 -1
- package/dist/agent/chat.js +18 -1
- package/dist/tools/diagnosis-fetcher.d.ts.map +1 -1
- package/dist/tools/diagnosis-fetcher.js +9 -8
- package/dist/tools/test-run-fetcher.d.ts +3 -0
- package/dist/tools/test-run-fetcher.d.ts.map +1 -0
- package/dist/tools/test-run-fetcher.js +59 -0
- package/dist/tools/test-run.d.ts.map +1 -1
- package/dist/tools/test-run.js +27 -11
- package/package.json +3 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,26 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.50.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- b070af3: fix: error handling in test run tool
|
|
8
|
+
- a94ef14: fix: chat agent system prompt for proactiveness
|
|
9
|
+
- Updated dependencies [b070af3]
|
|
10
|
+
- @empiricalrun/test-run@0.7.4
|
|
11
|
+
|
|
12
|
+
## 0.50.0
|
|
13
|
+
|
|
14
|
+
### Minor Changes
|
|
15
|
+
|
|
16
|
+
- 0eeff70: feat: add test-run-fetcher tool call, response body changes
|
|
17
|
+
|
|
18
|
+
### Patch Changes
|
|
19
|
+
|
|
20
|
+
- b14d5bf: feat: support headed executions of test run tool
|
|
21
|
+
- Updated dependencies [b14d5bf]
|
|
22
|
+
- @empiricalrun/test-run@0.7.3
|
|
23
|
+
|
|
3
24
|
## 0.49.0
|
|
4
25
|
|
|
5
26
|
### Minor Changes
|
package/dist/agent/chat.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AA8E1D,wBAAsB,SAAS,CAAC,EAC9B,MAAM,GACP,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,8CAqEA"}
|
package/dist/agent/chat.js
CHANGED
|
@@ -11,6 +11,7 @@ const browser_agent_1 = require("../tools/browser-agent");
|
|
|
11
11
|
const diagnosis_fetcher_1 = require("../tools/diagnosis-fetcher");
|
|
12
12
|
const grep_1 = require("../tools/grep");
|
|
13
13
|
const test_run_1 = require("../tools/test-run");
|
|
14
|
+
const test_run_fetcher_1 = require("../tools/test-run-fetcher");
|
|
14
15
|
const zod_schema_1 = require("../tools/zod-schema");
|
|
15
16
|
const repo_tree_1 = require("../utils/repo-tree");
|
|
16
17
|
const systemPrompt = `
|
|
@@ -25,6 +26,9 @@ Summarize the results in a few sentences.
|
|
|
25
26
|
If the user provides a diagnosis URL, you can use the fetchDiagnosisDetails tool
|
|
26
27
|
to get more information about the test case and its results.
|
|
27
28
|
|
|
29
|
+
If the user provides a test run URL, you can use the fetchTestRunDetails tool
|
|
30
|
+
to get detailed information about a specific test run.
|
|
31
|
+
|
|
28
32
|
Or if the user asks you to modify a test, you could use the generateTestWithBrowserAgent tool. If you suspect
|
|
29
33
|
that a UI selector needs to be updated, using the browser agent is a good idea.
|
|
30
34
|
|
|
@@ -40,6 +44,7 @@ The position of the comment is important: the browser agent will look for this c
|
|
|
40
44
|
the actual code to click on the login button. If you are fixing a failing test, your comment should be
|
|
41
45
|
around the failing line of code, so that it can be replaced/modified.
|
|
42
46
|
|
|
47
|
+
# Repo context
|
|
43
48
|
You are running as a CLI tool inside the directory of the repo where this test file is located. Here is
|
|
44
49
|
the repo directory structure:
|
|
45
50
|
|
|
@@ -48,8 +53,20 @@ ${(0, repo_tree_1.generateAsciiTree)(process.cwd())}
|
|
|
48
53
|
While specifying paths to files, use relative paths from the current working directory. For example:
|
|
49
54
|
- Correct path: "tests/lesson.spec.ts"
|
|
50
55
|
- Incorrect path: "/repo/tests/lesson.spec.ts" or "${path_1.default.basename(process.cwd())}/tests/lesson.spec.ts"
|
|
56
|
+
|
|
57
|
+
# Proactiveness
|
|
58
|
+
You are allowed to be proactive, but only when the user asks you to do something. You should strive to
|
|
59
|
+
strike a balance between:
|
|
60
|
+
1. Doing the right thing when asked, including taking actions and follow-up actions
|
|
61
|
+
2. Not surprising the user with actions you take without asking
|
|
51
62
|
`;
|
|
52
|
-
const tools = [
|
|
63
|
+
const tools = [
|
|
64
|
+
test_run_1.runTestTool,
|
|
65
|
+
browser_agent_1.browserAgentTool,
|
|
66
|
+
diagnosis_fetcher_1.diagnosisTool,
|
|
67
|
+
grep_1.grepTool,
|
|
68
|
+
test_run_fetcher_1.testRunTool,
|
|
69
|
+
];
|
|
53
70
|
const toolExecutors = {
|
|
54
71
|
...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
|
|
55
72
|
str_replace_editor: claude_1.strReplaceEditorTool,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"diagnosis-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/diagnosis-fetcher.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAUpC,eAAO,MAAM,aAAa,EAAE,
|
|
1
|
+
{"version":3,"file":"diagnosis-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/diagnosis-fetcher.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAUpC,eAAO,MAAM,aAAa,EAAE,IAgF3B,CAAC"}
|
|
@@ -25,11 +25,10 @@ exports.diagnosisTool = {
|
|
|
25
25
|
if (!slug) {
|
|
26
26
|
throw new Error("Invalid diagnosis URL - could not extract slug");
|
|
27
27
|
}
|
|
28
|
-
// Make the API call to fetch diagnosis details
|
|
29
28
|
const response = await fetch(`https://dash.empirical.run/api/diagnosis/${slug}/detailed`, {
|
|
30
29
|
method: "GET",
|
|
31
30
|
headers: {
|
|
32
|
-
Authorization: "weQPMWKT",
|
|
31
|
+
Authorization: "weQPMWKT",
|
|
33
32
|
},
|
|
34
33
|
});
|
|
35
34
|
if (!response.ok) {
|
|
@@ -40,8 +39,10 @@ exports.diagnosisTool = {
|
|
|
40
39
|
}
|
|
41
40
|
const data = await response.json();
|
|
42
41
|
const { test_case, diagnosis } = data.data;
|
|
43
|
-
const project = diagnosis
|
|
42
|
+
const project = diagnosis?.test_project || "unknown";
|
|
44
43
|
const sourceContext = await promises_1.default.readFile(path_1.default.join("tests", test_case.file_path), "utf-8");
|
|
44
|
+
const repoName = path_1.default.basename(process.cwd());
|
|
45
|
+
const cleanErrorStack = diagnosis?.failed_run_metadata?.stack?.replace(`"/runner/_work/${repoName}/${repoName}/source-repo/"`, "");
|
|
45
46
|
// Format the response as markdown
|
|
46
47
|
const markdownResponse = `
|
|
47
48
|
# Test Case Diagnosis
|
|
@@ -58,21 +59,21 @@ ${sourceContext}
|
|
|
58
59
|
## What Happened in the Test Run
|
|
59
60
|
|
|
60
61
|
### Failure Details
|
|
61
|
-
- **Failing Line**: ${diagnosis
|
|
62
|
+
- **Failing Line**: ${diagnosis?.failing_line || "No failing line available"}
|
|
62
63
|
|
|
63
64
|
#### Error Stack
|
|
64
65
|
\`\`\`
|
|
65
|
-
${
|
|
66
|
+
${cleanErrorStack || "No error stack available"}
|
|
66
67
|
\`\`\`
|
|
67
68
|
|
|
68
69
|
#### Error Summary
|
|
69
|
-
${diagnosis
|
|
70
|
+
${diagnosis?.error_stack_summary?.content || "No error summary available"}
|
|
70
71
|
|
|
71
72
|
#### Visual Analysis
|
|
72
|
-
${diagnosis
|
|
73
|
+
${diagnosis?.visual_diff_summary?.summary || "No visual analysis available"}
|
|
73
74
|
|
|
74
75
|
#### Merged Summary
|
|
75
|
-
${diagnosis
|
|
76
|
+
${diagnosis?.merged_summary?.content || "No merged summary available"}
|
|
76
77
|
`;
|
|
77
78
|
return {
|
|
78
79
|
result: markdownResponse,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test-run-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/test-run-fetcher.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAUpC,eAAO,MAAM,WAAW,EAAE,IA0DzB,CAAC"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.testRunTool = void 0;
|
|
4
|
+
const zod_1 = require("zod");
|
|
5
|
+
const TestRunSchema = zod_1.z.object({
|
|
6
|
+
testRunUrl: zod_1.z
|
|
7
|
+
.string()
|
|
8
|
+
.describe("The full URL of the test run (e.g. https://dash.empirical.run/sortment-tests/test-runs/20269 or with query params like ?status=failed)"),
|
|
9
|
+
});
|
|
10
|
+
exports.testRunTool = {
|
|
11
|
+
schema: {
|
|
12
|
+
name: "fetchTestRunDetails",
|
|
13
|
+
description: "Fetch details about a test run using its URL",
|
|
14
|
+
parameters: TestRunSchema,
|
|
15
|
+
},
|
|
16
|
+
execute: async (input) => {
|
|
17
|
+
const { testRunUrl } = input;
|
|
18
|
+
// Remove query parameters if they exist
|
|
19
|
+
const urlWithoutParams = testRunUrl.split("?")[0] || testRunUrl;
|
|
20
|
+
// Extract the run ID and repo name from the URL
|
|
21
|
+
const urlParts = urlWithoutParams.split("/");
|
|
22
|
+
const runId = urlParts.pop(); // Last part is the run ID
|
|
23
|
+
const repoName = urlParts[urlParts.length - 2]; // Second to last part is the repo name
|
|
24
|
+
if (!runId || !repoName) {
|
|
25
|
+
throw new Error("Invalid test run URL - could not extract run ID or repo name");
|
|
26
|
+
}
|
|
27
|
+
// Make the API call to fetch test run details
|
|
28
|
+
const response = await fetch(`https://dash.empirical.run/api/test-runs/${runId}?repo_name=${repoName}`, {
|
|
29
|
+
method: "GET",
|
|
30
|
+
headers: {
|
|
31
|
+
Authorization: "weQPMWKT",
|
|
32
|
+
},
|
|
33
|
+
});
|
|
34
|
+
if (!response.ok) {
|
|
35
|
+
return {
|
|
36
|
+
result: `Failed to fetch test run details: ${response.statusText}`,
|
|
37
|
+
isError: true,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
const data = await response.json();
|
|
41
|
+
// Format the response as markdown
|
|
42
|
+
const markdownResponse = `
|
|
43
|
+
# Test Run Details
|
|
44
|
+
|
|
45
|
+
## Run Information
|
|
46
|
+
- **Run ID**: ${runId}
|
|
47
|
+
- **Repository**: ${repoName}
|
|
48
|
+
|
|
49
|
+
## Test Run Data
|
|
50
|
+
\`\`\`json
|
|
51
|
+
${JSON.stringify(data, null, 2)}
|
|
52
|
+
\`\`\`
|
|
53
|
+
`;
|
|
54
|
+
return {
|
|
55
|
+
result: markdownResponse,
|
|
56
|
+
isError: false,
|
|
57
|
+
};
|
|
58
|
+
},
|
|
59
|
+
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAsBpC,eAAO,MAAM,WAAW,EAAE,IA8BzB,CAAC"}
|
package/dist/tools/test-run.js
CHANGED
|
@@ -12,6 +12,11 @@ const RunTestSchema = zod_1.z.object({
|
|
|
12
12
|
.string()
|
|
13
13
|
.describe("The name of the file where the test is located. File name must end with .spec.ts"),
|
|
14
14
|
project: zod_1.z.string().describe("The project to run the test on"),
|
|
15
|
+
headed: zod_1.z
|
|
16
|
+
.boolean()
|
|
17
|
+
.describe("Whether to run the test in headed mode (default is false, which is headless)")
|
|
18
|
+
.optional()
|
|
19
|
+
.default(false),
|
|
15
20
|
});
|
|
16
21
|
exports.runTestTool = {
|
|
17
22
|
schema: {
|
|
@@ -20,16 +25,27 @@ exports.runTestTool = {
|
|
|
20
25
|
parameters: RunTestSchema,
|
|
21
26
|
},
|
|
22
27
|
execute: async (input) => {
|
|
23
|
-
const { testName, suites, fileName, project } = input;
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
28
|
+
const { testName, suites, fileName, project, headed } = input;
|
|
29
|
+
try {
|
|
30
|
+
const result = await (0, test_run_1.runSingleTest)({
|
|
31
|
+
testName,
|
|
32
|
+
suites,
|
|
33
|
+
fileName,
|
|
34
|
+
projects: [project],
|
|
35
|
+
headed,
|
|
36
|
+
});
|
|
37
|
+
return {
|
|
38
|
+
result: JSON.stringify(result),
|
|
39
|
+
isError: false,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
catch (error) {
|
|
43
|
+
// Ensure we capture the full error message regardless of error type
|
|
44
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
45
|
+
return {
|
|
46
|
+
result: JSON.stringify({ error: errorMessage }),
|
|
47
|
+
isError: true,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
34
50
|
},
|
|
35
51
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.50.1",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -74,10 +74,10 @@
|
|
|
74
74
|
"tsx": "^4.16.2",
|
|
75
75
|
"typescript": "^5.3.3",
|
|
76
76
|
"zod": "^3.23.8",
|
|
77
|
-
"@empiricalrun/llm": "^0.10.1",
|
|
78
77
|
"@empiricalrun/r2-uploader": "^0.3.8",
|
|
79
78
|
"@empiricalrun/reporter": "^0.23.1",
|
|
80
|
-
"@empiricalrun/test-run": "^0.7.
|
|
79
|
+
"@empiricalrun/test-run": "^0.7.4",
|
|
80
|
+
"@empiricalrun/llm": "^0.10.1"
|
|
81
81
|
},
|
|
82
82
|
"devDependencies": {
|
|
83
83
|
"@playwright/test": "1.47.1",
|