@empiricalrun/test-gen 0.50.0 → 0.50.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.50.2
4
+
5
+ ### Patch Changes
6
+
7
+ - d808dda: feat: support claude3.5 as chat agent model with cli flag
8
+ - 8e13e16: fix: move away from assets.empirical.run for test run reports
9
+ - Updated dependencies [d808dda]
10
+ - Updated dependencies [8e13e16]
11
+ - @empiricalrun/llm@0.10.2
12
+ - @empiricalrun/reporter@0.23.2
13
+
14
+ ## 0.50.1
15
+
16
+ ### Patch Changes
17
+
18
+ - b070af3: fix: error handling in test run tool
19
+ - a94ef14: fix: chat agent system prompt for proactiveness
20
+ - Updated dependencies [b070af3]
21
+ - @empiricalrun/test-run@0.7.4
22
+
3
23
  ## 0.50.0
4
24
 
5
25
  ### Minor Changes
@@ -1,7 +1,8 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
2
  import type { Anthropic } from "@empiricalrun/llm/claude";
3
- export declare function chatAgent({ prompt, }: {
3
+ export declare function chatAgent({ prompt, chatModel, }: {
4
4
  prompt: string;
5
5
  trace?: TraceClient;
6
+ chatModel?: "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022";
6
7
  }): Promise<Anthropic.Messages.MessageParam[]>;
7
8
  //# sourceMappingURL=chat.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAwE1D,wBAAsB,SAAS,CAAC,EAC9B,MAAM,GACP,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,8CAqEA"}
1
+ {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AA8E1D,wBAAsB,SAAS,CAAC,EAC9B,MAAM,EACN,SAAwC,GACzC,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,4BAA4B,GAAG,4BAA4B,CAAC;CACzE,8CAiEA"}
@@ -53,6 +53,12 @@ ${(0, repo_tree_1.generateAsciiTree)(process.cwd())}
53
53
  While specifying paths to files, use relative paths from the current working directory. For example:
54
54
  - Correct path: "tests/lesson.spec.ts"
55
55
  - Incorrect path: "/repo/tests/lesson.spec.ts" or "${path_1.default.basename(process.cwd())}/tests/lesson.spec.ts"
56
+
57
+ # Proactiveness
58
+ You are allowed to be proactive, but only when the user asks you to do something. You should strive to
59
+ strike a balance between:
60
+ 1. Doing the right thing when asked, including taking actions and follow-up actions
61
+ 2. Not surprising the user with actions you take without asking
56
62
  `;
57
63
  const tools = [
58
64
  test_run_1.runTestTool,
@@ -65,7 +71,7 @@ const toolExecutors = {
65
71
  ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
66
72
  str_replace_editor: claude_1.strReplaceEditorTool,
67
73
  };
68
- async function chatAgent({ prompt, }) {
74
+ async function chatAgent({ prompt, chatModel = "claude-3-7-sonnet-20250219", }) {
69
75
  let userPrompt = prompt;
70
76
  let chatState = new claude_1.ChatState();
71
77
  chatState.pushTextMessage({ message: { role: "user", content: userPrompt } });
@@ -96,13 +102,13 @@ async function chatAgent({ prompt, }) {
96
102
  shouldAskUserForInput = false;
97
103
  continue;
98
104
  }
99
- const response = await (0, claude_1.createChatCompletion)(systemPrompt, chatState.getMessages(), [
100
- ...tools.map((tool) => (0, claude_1.convertOpenAISchemaToAnthropic)((0, zod_schema_1.zodToOpenAITool)(tool.schema))),
101
- {
102
- type: "text_editor_20250124",
103
- name: "str_replace_editor",
104
- },
105
- ]);
105
+ const response = await (0, claude_1.createChatCompletion)({
106
+ systemPrompt,
107
+ messages: chatState.getMessages(),
108
+ tools: tools.map((tool) => (0, claude_1.convertOpenAISchemaToAnthropic)((0, zod_schema_1.zodToOpenAITool)(tool.schema))),
109
+ model: chatModel,
110
+ withStrReplaceEditor: true,
111
+ });
106
112
  if (!response) {
107
113
  throw new Error("No response from LLM");
108
114
  }
package/dist/bin/index.js CHANGED
@@ -32,9 +32,19 @@ process.on("beforeExit", async () => await flushEvents());
32
32
  process.on("exit", async () => await flushEvents());
33
33
  process.on("SIGINT", async () => await flushEvents());
34
34
  process.on("SIGTERM", async () => await flushEvents());
35
- async function runChatAgent(prompt) {
35
+ async function runChatAgent(prompt, modelInput) {
36
+ const MODEL_MAPPING = {
37
+ "claude-3-7": "claude-3-7-sonnet-20250219",
38
+ "3-7": "claude-3-7-sonnet-20250219",
39
+ "claude-3-5": "claude-3-5-sonnet-20241022",
40
+ "3-5": "claude-3-5-sonnet-20241022",
41
+ };
42
+ if (modelInput && !MODEL_MAPPING[modelInput]) {
43
+ throw new Error(`Invalid chat model: ${modelInput}`);
44
+ }
36
45
  return await (0, chat_1.chatAgent)({
37
46
  prompt,
47
+ chatModel: modelInput ? MODEL_MAPPING[modelInput] : undefined,
38
48
  });
39
49
  }
40
50
  async function runAgentsWorkflow(testGenConfig, testGenToken) {
@@ -171,6 +181,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
171
181
  .option("--file <test-file>", "File path of the test case (inside tests dir)")
172
182
  .option("--suites <suites>", "Comma separated list of describe blocks")
173
183
  .option("--use-chat", "Use chat agent (and not the workflow)")
184
+ .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022)")
174
185
  .parse(process.argv);
175
186
  const options = program.opts();
176
187
  const completedOptions = await (0, utils_2.validateAndCompleteCliOptions)(options);
@@ -197,7 +208,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
197
208
  await (0, test_build_1.downloadBuild)(testGenConfig.build || {});
198
209
  try {
199
210
  if (completedOptions.useChat) {
200
- await runChatAgent(completedOptions.prompt);
211
+ await runChatAgent(completedOptions.prompt, completedOptions.chatModel);
201
212
  return;
202
213
  }
203
214
  else {
@@ -5,6 +5,7 @@ export interface CliOptions {
5
5
  prompt?: string;
6
6
  suites?: string;
7
7
  useChat?: boolean;
8
+ chatModel?: "claude-3-7" | "3-7" | "claude-3-5" | "3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022";
8
9
  }
9
10
  export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;
10
11
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,CAAC;CAClC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}
@@ -8,18 +8,11 @@ export declare function getReporter(): Reporter | undefined;
8
8
  /**
9
9
  * function will upload videos and json summary of test results to r2 and report them to reporter.
10
10
  * method won't throw error if it fails to report
11
- * @param {{
12
- * projectRepoName: string;
13
- * }} {
14
- * projectRepoName
15
- * }
16
- * @returns Promise<void> returns void
17
11
  */
18
12
  export declare function setReporterConfig(config: ReporterConfigType): void;
19
13
  export declare class TestGenUpdatesReporter {
20
14
  private repoDir;
21
15
  constructor();
22
- sendGenTrace(trace: string): Promise<void>;
23
16
  reportGenAssets({ projectRepoName, testName, }: {
24
17
  projectRepoName: string;
25
18
  testName: string;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporter/index.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAa5E,KAAK,kBAAkB,GAAG;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAKF,wBAAgB,WAAW,IAAI,QAAQ,GAAG,SAAS,CAUlD;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI,CAGlE;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,OAAO,CAAS;;IAKlB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK1C,eAAe,CAAC,EACpB,eAAe,EACf,QAAQ,GACT,EAAE;QACD,eAAe,EAAE,MAAM,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;KAClB;IAiDK,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA8C9C,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY3C,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY1C,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAWxD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporter/index.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAa5E,KAAK,kBAAkB,GAAG;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAKF,wBAAgB,WAAW,IAAI,QAAQ,GAAG,SAAS,CAUlD;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI,CAGlE;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,OAAO,CAAS;;IAKlB,eAAe,CAAC,EACpB,eAAe,EACf,QAAQ,GACT,EAAE;QACD,eAAe,EAAE,MAAM,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;KAClB;IAgDK,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA8C9C,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY3C,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY1C,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAWxD"}
@@ -27,12 +27,6 @@ exports.getReporter = getReporter;
27
27
  /**
28
28
  * function will upload videos and json summary of test results to r2 and report them to reporter.
29
29
  * method won't throw error if it fails to report
30
- * @param {{
31
- * projectRepoName: string;
32
- * }} {
33
- * projectRepoName
34
- * }
35
- * @returns Promise<void> returns void
36
30
  */
37
31
  function setReporterConfig(config) {
38
32
  console.info("initialised reporter config");
@@ -44,10 +38,6 @@ class TestGenUpdatesReporter {
44
38
  constructor() {
45
39
  this.repoDir = process.cwd();
46
40
  }
47
- async sendGenTrace(trace) {
48
- console.log("trace", trace);
49
- // upload trace to r2 and report it to reporter
50
- }
51
41
  async reportGenAssets({ projectRepoName, testName, }) {
52
42
  const logger = new logger_1.CustomLogger();
53
43
  try {
@@ -62,16 +52,15 @@ class TestGenUpdatesReporter {
62
52
  repoDir: this.repoDir,
63
53
  });
64
54
  const reporter = getReporter();
65
- const message = {
66
- type: "video",
67
- videoUrls,
68
- };
69
55
  await Promise.allSettled([
70
56
  ...(videoUrls.length
71
57
  ? [
72
58
  reporter?.report(new reporter_1.ProcessLogMessageBuilder({
73
59
  type: "video",
74
- message: JSON.stringify(message),
60
+ message: JSON.stringify({
61
+ type: "video",
62
+ videoUrls,
63
+ }),
75
64
  })),
76
65
  ]
77
66
  : []),
@@ -1 +1 @@
1
- {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAsBpC,eAAO,MAAM,WAAW,EAAE,IAoBzB,CAAC"}
1
+ {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAsBpC,eAAO,MAAM,WAAW,EAAE,IA8BzB,CAAC"}
@@ -26,16 +26,26 @@ exports.runTestTool = {
26
26
  },
27
27
  execute: async (input) => {
28
28
  const { testName, suites, fileName, project, headed } = input;
29
- const result = await (0, test_run_1.runSingleTest)({
30
- testName,
31
- suites,
32
- fileName,
33
- projects: [project],
34
- headed,
35
- });
36
- return {
37
- result: JSON.stringify(result),
38
- isError: false,
39
- };
29
+ try {
30
+ const result = await (0, test_run_1.runSingleTest)({
31
+ testName,
32
+ suites,
33
+ fileName,
34
+ projects: [project],
35
+ headed,
36
+ });
37
+ return {
38
+ result: JSON.stringify(result),
39
+ isError: false,
40
+ };
41
+ }
42
+ catch (error) {
43
+ // Ensure we capture the full error message regardless of error type
44
+ const errorMessage = error instanceof Error ? error.message : String(error);
45
+ return {
46
+ result: JSON.stringify({ error: errorMessage }),
47
+ isError: true,
48
+ };
49
+ }
40
50
  },
41
51
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.50.0",
3
+ "version": "0.50.2",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -74,10 +74,10 @@
74
74
  "tsx": "^4.16.2",
75
75
  "typescript": "^5.3.3",
76
76
  "zod": "^3.23.8",
77
- "@empiricalrun/llm": "^0.10.1",
77
+ "@empiricalrun/llm": "^0.10.2",
78
78
  "@empiricalrun/r2-uploader": "^0.3.8",
79
- "@empiricalrun/reporter": "^0.23.1",
80
- "@empiricalrun/test-run": "^0.7.3"
79
+ "@empiricalrun/reporter": "^0.23.2",
80
+ "@empiricalrun/test-run": "^0.7.4"
81
81
  },
82
82
  "devDependencies": {
83
83
  "@playwright/test": "1.47.1",