@empiricalrun/test-gen 0.50.0 → 0.50.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/dist/agent/chat.d.ts +2 -1
- package/dist/agent/chat.d.ts.map +1 -1
- package/dist/agent/chat.js +14 -8
- package/dist/bin/index.js +13 -2
- package/dist/bin/utils/index.d.ts +1 -0
- package/dist/bin/utils/index.d.ts.map +1 -1
- package/dist/reporter/index.d.ts +0 -7
- package/dist/reporter/index.d.ts.map +1 -1
- package/dist/reporter/index.js +4 -15
- package/dist/tools/test-run.d.ts.map +1 -1
- package/dist/tools/test-run.js +21 -11
- package/package.json +4 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.50.2
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- d808dda: feat: support claude3.5 as chat agent model with cli flag
|
|
8
|
+
- 8e13e16: fix: move away from assets.empirical.run for test run reports
|
|
9
|
+
- Updated dependencies [d808dda]
|
|
10
|
+
- Updated dependencies [8e13e16]
|
|
11
|
+
- @empiricalrun/llm@0.10.2
|
|
12
|
+
- @empiricalrun/reporter@0.23.2
|
|
13
|
+
|
|
14
|
+
## 0.50.1
|
|
15
|
+
|
|
16
|
+
### Patch Changes
|
|
17
|
+
|
|
18
|
+
- b070af3: fix: error handling in test run tool
|
|
19
|
+
- a94ef14: fix: chat agent system prompt for proactiveness
|
|
20
|
+
- Updated dependencies [b070af3]
|
|
21
|
+
- @empiricalrun/test-run@0.7.4
|
|
22
|
+
|
|
3
23
|
## 0.50.0
|
|
4
24
|
|
|
5
25
|
### Minor Changes
|
package/dist/agent/chat.d.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
2
|
import type { Anthropic } from "@empiricalrun/llm/claude";
|
|
3
|
-
export declare function chatAgent({ prompt, }: {
|
|
3
|
+
export declare function chatAgent({ prompt, chatModel, }: {
|
|
4
4
|
prompt: string;
|
|
5
5
|
trace?: TraceClient;
|
|
6
|
+
chatModel?: "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022";
|
|
6
7
|
}): Promise<Anthropic.Messages.MessageParam[]>;
|
|
7
8
|
//# sourceMappingURL=chat.d.ts.map
|
package/dist/agent/chat.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AA8E1D,wBAAsB,SAAS,CAAC,EAC9B,MAAM,EACN,SAAwC,GACzC,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,4BAA4B,GAAG,4BAA4B,CAAC;CACzE,8CAiEA"}
|
package/dist/agent/chat.js
CHANGED
|
@@ -53,6 +53,12 @@ ${(0, repo_tree_1.generateAsciiTree)(process.cwd())}
|
|
|
53
53
|
While specifying paths to files, use relative paths from the current working directory. For example:
|
|
54
54
|
- Correct path: "tests/lesson.spec.ts"
|
|
55
55
|
- Incorrect path: "/repo/tests/lesson.spec.ts" or "${path_1.default.basename(process.cwd())}/tests/lesson.spec.ts"
|
|
56
|
+
|
|
57
|
+
# Proactiveness
|
|
58
|
+
You are allowed to be proactive, but only when the user asks you to do something. You should strive to
|
|
59
|
+
strike a balance between:
|
|
60
|
+
1. Doing the right thing when asked, including taking actions and follow-up actions
|
|
61
|
+
2. Not surprising the user with actions you take without asking
|
|
56
62
|
`;
|
|
57
63
|
const tools = [
|
|
58
64
|
test_run_1.runTestTool,
|
|
@@ -65,7 +71,7 @@ const toolExecutors = {
|
|
|
65
71
|
...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
|
|
66
72
|
str_replace_editor: claude_1.strReplaceEditorTool,
|
|
67
73
|
};
|
|
68
|
-
async function chatAgent({ prompt, }) {
|
|
74
|
+
async function chatAgent({ prompt, chatModel = "claude-3-7-sonnet-20250219", }) {
|
|
69
75
|
let userPrompt = prompt;
|
|
70
76
|
let chatState = new claude_1.ChatState();
|
|
71
77
|
chatState.pushTextMessage({ message: { role: "user", content: userPrompt } });
|
|
@@ -96,13 +102,13 @@ async function chatAgent({ prompt, }) {
|
|
|
96
102
|
shouldAskUserForInput = false;
|
|
97
103
|
continue;
|
|
98
104
|
}
|
|
99
|
-
const response = await (0, claude_1.createChatCompletion)(
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
105
|
+
const response = await (0, claude_1.createChatCompletion)({
|
|
106
|
+
systemPrompt,
|
|
107
|
+
messages: chatState.getMessages(),
|
|
108
|
+
tools: tools.map((tool) => (0, claude_1.convertOpenAISchemaToAnthropic)((0, zod_schema_1.zodToOpenAITool)(tool.schema))),
|
|
109
|
+
model: chatModel,
|
|
110
|
+
withStrReplaceEditor: true,
|
|
111
|
+
});
|
|
106
112
|
if (!response) {
|
|
107
113
|
throw new Error("No response from LLM");
|
|
108
114
|
}
|
package/dist/bin/index.js
CHANGED
|
@@ -32,9 +32,19 @@ process.on("beforeExit", async () => await flushEvents());
|
|
|
32
32
|
process.on("exit", async () => await flushEvents());
|
|
33
33
|
process.on("SIGINT", async () => await flushEvents());
|
|
34
34
|
process.on("SIGTERM", async () => await flushEvents());
|
|
35
|
-
async function runChatAgent(prompt) {
|
|
35
|
+
async function runChatAgent(prompt, modelInput) {
|
|
36
|
+
const MODEL_MAPPING = {
|
|
37
|
+
"claude-3-7": "claude-3-7-sonnet-20250219",
|
|
38
|
+
"3-7": "claude-3-7-sonnet-20250219",
|
|
39
|
+
"claude-3-5": "claude-3-5-sonnet-20241022",
|
|
40
|
+
"3-5": "claude-3-5-sonnet-20241022",
|
|
41
|
+
};
|
|
42
|
+
if (modelInput && !MODEL_MAPPING[modelInput]) {
|
|
43
|
+
throw new Error(`Invalid chat model: ${modelInput}`);
|
|
44
|
+
}
|
|
36
45
|
return await (0, chat_1.chatAgent)({
|
|
37
46
|
prompt,
|
|
47
|
+
chatModel: modelInput ? MODEL_MAPPING[modelInput] : undefined,
|
|
38
48
|
});
|
|
39
49
|
}
|
|
40
50
|
async function runAgentsWorkflow(testGenConfig, testGenToken) {
|
|
@@ -171,6 +181,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
|
|
|
171
181
|
.option("--file <test-file>", "File path of the test case (inside tests dir)")
|
|
172
182
|
.option("--suites <suites>", "Comma separated list of describe blocks")
|
|
173
183
|
.option("--use-chat", "Use chat agent (and not the workflow)")
|
|
184
|
+
.option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022)")
|
|
174
185
|
.parse(process.argv);
|
|
175
186
|
const options = program.opts();
|
|
176
187
|
const completedOptions = await (0, utils_2.validateAndCompleteCliOptions)(options);
|
|
@@ -197,7 +208,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
|
|
|
197
208
|
await (0, test_build_1.downloadBuild)(testGenConfig.build || {});
|
|
198
209
|
try {
|
|
199
210
|
if (completedOptions.useChat) {
|
|
200
|
-
await runChatAgent(completedOptions.prompt);
|
|
211
|
+
await runChatAgent(completedOptions.prompt, completedOptions.chatModel);
|
|
201
212
|
return;
|
|
202
213
|
}
|
|
203
214
|
else {
|
|
@@ -5,6 +5,7 @@ export interface CliOptions {
|
|
|
5
5
|
prompt?: string;
|
|
6
6
|
suites?: string;
|
|
7
7
|
useChat?: boolean;
|
|
8
|
+
chatModel?: "claude-3-7" | "3-7" | "claude-3-5" | "3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022";
|
|
8
9
|
}
|
|
9
10
|
export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;
|
|
10
11
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,CAAC;CAClC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}
|
package/dist/reporter/index.d.ts
CHANGED
|
@@ -8,18 +8,11 @@ export declare function getReporter(): Reporter | undefined;
|
|
|
8
8
|
/**
|
|
9
9
|
* function will upload videos and json summary of test results to r2 and report them to reporter.
|
|
10
10
|
* method won't throw error if it fails to report
|
|
11
|
-
* @param {{
|
|
12
|
-
* projectRepoName: string;
|
|
13
|
-
* }} {
|
|
14
|
-
* projectRepoName
|
|
15
|
-
* }
|
|
16
|
-
* @returns Promise<void> returns void
|
|
17
11
|
*/
|
|
18
12
|
export declare function setReporterConfig(config: ReporterConfigType): void;
|
|
19
13
|
export declare class TestGenUpdatesReporter {
|
|
20
14
|
private repoDir;
|
|
21
15
|
constructor();
|
|
22
|
-
sendGenTrace(trace: string): Promise<void>;
|
|
23
16
|
reportGenAssets({ projectRepoName, testName, }: {
|
|
24
17
|
projectRepoName: string;
|
|
25
18
|
testName: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporter/index.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAa5E,KAAK,kBAAkB,GAAG;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAKF,wBAAgB,WAAW,IAAI,QAAQ,GAAG,SAAS,CAUlD;AAED
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporter/index.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAa5E,KAAK,kBAAkB,GAAG;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAKF,wBAAgB,WAAW,IAAI,QAAQ,GAAG,SAAS,CAUlD;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI,CAGlE;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,OAAO,CAAS;;IAKlB,eAAe,CAAC,EACpB,eAAe,EACf,QAAQ,GACT,EAAE;QACD,eAAe,EAAE,MAAM,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;KAClB;IAgDK,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA8C9C,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY3C,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY1C,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAWxD"}
|
package/dist/reporter/index.js
CHANGED
|
@@ -27,12 +27,6 @@ exports.getReporter = getReporter;
|
|
|
27
27
|
/**
|
|
28
28
|
* function will upload videos and json summary of test results to r2 and report them to reporter.
|
|
29
29
|
* method won't throw error if it fails to report
|
|
30
|
-
* @param {{
|
|
31
|
-
* projectRepoName: string;
|
|
32
|
-
* }} {
|
|
33
|
-
* projectRepoName
|
|
34
|
-
* }
|
|
35
|
-
* @returns Promise<void> returns void
|
|
36
30
|
*/
|
|
37
31
|
function setReporterConfig(config) {
|
|
38
32
|
console.info("initialised reporter config");
|
|
@@ -44,10 +38,6 @@ class TestGenUpdatesReporter {
|
|
|
44
38
|
constructor() {
|
|
45
39
|
this.repoDir = process.cwd();
|
|
46
40
|
}
|
|
47
|
-
async sendGenTrace(trace) {
|
|
48
|
-
console.log("trace", trace);
|
|
49
|
-
// upload trace to r2 and report it to reporter
|
|
50
|
-
}
|
|
51
41
|
async reportGenAssets({ projectRepoName, testName, }) {
|
|
52
42
|
const logger = new logger_1.CustomLogger();
|
|
53
43
|
try {
|
|
@@ -62,16 +52,15 @@ class TestGenUpdatesReporter {
|
|
|
62
52
|
repoDir: this.repoDir,
|
|
63
53
|
});
|
|
64
54
|
const reporter = getReporter();
|
|
65
|
-
const message = {
|
|
66
|
-
type: "video",
|
|
67
|
-
videoUrls,
|
|
68
|
-
};
|
|
69
55
|
await Promise.allSettled([
|
|
70
56
|
...(videoUrls.length
|
|
71
57
|
? [
|
|
72
58
|
reporter?.report(new reporter_1.ProcessLogMessageBuilder({
|
|
73
59
|
type: "video",
|
|
74
|
-
message: JSON.stringify(
|
|
60
|
+
message: JSON.stringify({
|
|
61
|
+
type: "video",
|
|
62
|
+
videoUrls,
|
|
63
|
+
}),
|
|
75
64
|
})),
|
|
76
65
|
]
|
|
77
66
|
: []),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAsBpC,eAAO,MAAM,WAAW,EAAE,
|
|
1
|
+
{"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAsBpC,eAAO,MAAM,WAAW,EAAE,IA8BzB,CAAC"}
|
package/dist/tools/test-run.js
CHANGED
|
@@ -26,16 +26,26 @@ exports.runTestTool = {
|
|
|
26
26
|
},
|
|
27
27
|
execute: async (input) => {
|
|
28
28
|
const { testName, suites, fileName, project, headed } = input;
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
29
|
+
try {
|
|
30
|
+
const result = await (0, test_run_1.runSingleTest)({
|
|
31
|
+
testName,
|
|
32
|
+
suites,
|
|
33
|
+
fileName,
|
|
34
|
+
projects: [project],
|
|
35
|
+
headed,
|
|
36
|
+
});
|
|
37
|
+
return {
|
|
38
|
+
result: JSON.stringify(result),
|
|
39
|
+
isError: false,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
catch (error) {
|
|
43
|
+
// Ensure we capture the full error message regardless of error type
|
|
44
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
45
|
+
return {
|
|
46
|
+
result: JSON.stringify({ error: errorMessage }),
|
|
47
|
+
isError: true,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
40
50
|
},
|
|
41
51
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.50.
|
|
3
|
+
"version": "0.50.2",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -74,10 +74,10 @@
|
|
|
74
74
|
"tsx": "^4.16.2",
|
|
75
75
|
"typescript": "^5.3.3",
|
|
76
76
|
"zod": "^3.23.8",
|
|
77
|
-
"@empiricalrun/llm": "^0.10.
|
|
77
|
+
"@empiricalrun/llm": "^0.10.2",
|
|
78
78
|
"@empiricalrun/r2-uploader": "^0.3.8",
|
|
79
|
-
"@empiricalrun/reporter": "^0.23.
|
|
80
|
-
"@empiricalrun/test-run": "^0.7.
|
|
79
|
+
"@empiricalrun/reporter": "^0.23.2",
|
|
80
|
+
"@empiricalrun/test-run": "^0.7.4"
|
|
81
81
|
},
|
|
82
82
|
"devDependencies": {
|
|
83
83
|
"@playwright/test": "1.47.1",
|