npm - @empiricalrun/test-gen - Versions diffs - 0.50.0 → 0.50.2 - Mend

@empiricalrun/test-gen 0.50.0 → 0.50.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/CHANGELOG.md +20 -0
package/dist/agent/chat.d.ts +2 -1
package/dist/agent/chat.d.ts.map +1 -1
package/dist/agent/chat.js +14 -8
package/dist/bin/index.js +13 -2
package/dist/bin/utils/index.d.ts +1 -0
package/dist/bin/utils/index.d.ts.map +1 -1
package/dist/reporter/index.d.ts +0 -7
package/dist/reporter/index.d.ts.map +1 -1
package/dist/reporter/index.js +4 -15
package/dist/tools/test-run.d.ts.map +1 -1
package/dist/tools/test-run.js +21 -11
package/package.json +4 -4

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,25 @@
 # @empiricalrun/test-gen
+## 0.50.2
+### Patch Changes
+- d808dda: feat: support claude3.5 as chat agent model with cli flag
+- 8e13e16: fix: move away from assets.empirical.run for test run reports
+- Updated dependencies [d808dda]
+- Updated dependencies [8e13e16]
+  - @empiricalrun/llm@0.10.2
+  - @empiricalrun/reporter@0.23.2
+## 0.50.1
+### Patch Changes
+- b070af3: fix: error handling in test run tool
+- a94ef14: fix: chat agent system prompt for proactiveness
+- Updated dependencies [b070af3]
+  - @empiricalrun/test-run@0.7.4
 ## 0.50.0
 ### Minor Changes

package/dist/agent/chat.d.ts CHANGED Viewed

@@ -1,7 +1,8 @@
 import { TraceClient } from "@empiricalrun/llm";
 import type { Anthropic } from "@empiricalrun/llm/claude";
-export declare function chatAgent({ prompt, }: {
+export declare function chatAgent({ prompt, chatModel, }: {
     prompt: string;
     trace?: TraceClient;
+    chatModel?: "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022";
 }): Promise<Anthropic.Messages.MessageParam[]>;
 //# sourceMappingURL=chat.d.ts.map

package/dist/agent/chat.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;~~AAwE1D~~,wBAAsB,SAAS,CAAC,EAC9B,MAAM,~~GACP~~,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;~~CACrB~~,~~8CAqEA~~"}
1	+ {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AA8E1D,wBAAsB,SAAS,CAAC,EAC9B,MAAM,EACN,SAAwC,GACzC,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,4BAA4B,GAAG,4BAA4B,CAAC;CACzE,8CAiEA"}

package/dist/agent/chat.js CHANGED Viewed

@@ -53,6 +53,12 @@ ${(0, repo_tree_1.generateAsciiTree)(process.cwd())}
 While specifying paths to files, use relative paths from the current working directory. For example:
 - Correct path: "tests/lesson.spec.ts"
 - Incorrect path: "/repo/tests/lesson.spec.ts" or "${path_1.default.basename(process.cwd())}/tests/lesson.spec.ts"
+# Proactiveness
+You are allowed to be proactive, but only when the user asks you to do something. You should strive to
+strike a balance between:
+1. Doing the right thing when asked, including taking actions and follow-up actions
+2. Not surprising the user with actions you take without asking
 `;
 const tools = [
     test_run_1.runTestTool,
@@ -65,7 +71,7 @@ const toolExecutors = {
     ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
     str_replace_editor: claude_1.strReplaceEditorTool,
 };
-async function chatAgent({ prompt, }) {
+async function chatAgent({ prompt, chatModel = "claude-3-7-sonnet-20250219", }) {
     let userPrompt = prompt;
     let chatState = new claude_1.ChatState();
     chatState.pushTextMessage({ message: { role: "user", content: userPrompt } });
@@ -96,13 +102,13 @@ async function chatAgent({ prompt, }) {
             shouldAskUserForInput = false;
             continue;
         }
-        const response = await (0, claude_1.createChatCompletion)(systemPrompt, chatState.getMessages(), [
-            ...tools.map((tool) => (0, claude_1.convertOpenAISchemaToAnthropic)((0, zod_schema_1.zodToOpenAITool)(tool.schema))),
-            {
-                type: "text_editor_20250124",
-                name: "str_replace_editor",
-            },
-        ]);
+        const response = await (0, claude_1.createChatCompletion)({
+            systemPrompt,
+            messages: chatState.getMessages(),
+            tools: tools.map((tool) => (0, claude_1.convertOpenAISchemaToAnthropic)((0, zod_schema_1.zodToOpenAITool)(tool.schema))),
+            model: chatModel,
+            withStrReplaceEditor: true,
+        });
         if (!response) {
             throw new Error("No response from LLM");
         }

package/dist/bin/index.js CHANGED Viewed

@@ -32,9 +32,19 @@ process.on("beforeExit", async () => await flushEvents());
 process.on("exit", async () => await flushEvents());
 process.on("SIGINT", async () => await flushEvents());
 process.on("SIGTERM", async () => await flushEvents());
-async function runChatAgent(prompt) {
+async function runChatAgent(prompt, modelInput) {
+    const MODEL_MAPPING = {
+        "claude-3-7": "claude-3-7-sonnet-20250219",
+        "3-7": "claude-3-7-sonnet-20250219",
+        "claude-3-5": "claude-3-5-sonnet-20241022",
+        "3-5": "claude-3-5-sonnet-20241022",
+    };
+    if (modelInput && !MODEL_MAPPING[modelInput]) {
+        throw new Error(`Invalid chat model: ${modelInput}`);
+    }
     return await (0, chat_1.chatAgent)({
         prompt,
+        chatModel: modelInput ? MODEL_MAPPING[modelInput] : undefined,
     });
 }
 async function runAgentsWorkflow(testGenConfig, testGenToken) {
@@ -171,6 +181,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
         .option("--file <test-file>", "File path of the test case (inside tests dir)")
         .option("--suites <suites>", "Comma separated list of describe blocks")
         .option("--use-chat", "Use chat agent (and not the workflow)")
+        .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022)")
         .parse(process.argv);
     const options = program.opts();
     const completedOptions = await (0, utils_2.validateAndCompleteCliOptions)(options);
@@ -197,7 +208,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
     await (0, test_build_1.downloadBuild)(testGenConfig.build || {});
     try {
         if (completedOptions.useChat) {
-            await runChatAgent(completedOptions.prompt);
+            await runChatAgent(completedOptions.prompt, completedOptions.chatModel);
             return;
         }
         else {

package/dist/bin/utils/index.d.ts CHANGED Viewed

@@ -5,6 +5,7 @@ export interface CliOptions {
     prompt?: string;
     suites?: string;
     useChat?: boolean;
+    chatModel?: "claude-3-7" | "3-7" | "claude-3-5" | "3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022";
 }
 export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;
 //# sourceMappingURL=index.d.ts.map

package/dist/bin/utils/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;~~CACnB~~;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,CAAC;CAClC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}

package/dist/reporter/index.d.ts CHANGED Viewed

@@ -8,18 +8,11 @@ export declare function getReporter(): Reporter | undefined;
 /**
  * function will upload videos and json summary of test results to r2 and report them to reporter.
  * method won't throw error if it fails to report
- * @param {{
- *   projectRepoName: string;
- * }} {
- *   projectRepoName
- * }
- * @returns Promise<void> returns void
  */
 export declare function setReporterConfig(config: ReporterConfigType): void;
 export declare class TestGenUpdatesReporter {
     private repoDir;
     constructor();
-    sendGenTrace(trace: string): Promise<void>;
     reportGenAssets({ projectRepoName, testName, }: {
         projectRepoName: string;
         testName: string;

package/dist/reporter/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporter/index.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAa5E,KAAK,kBAAkB,GAAG;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAKF,wBAAgB,WAAW,IAAI,QAAQ,GAAG,SAAS,CAUlD;AAED~~;;;;;;;;;GASG~~;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI,CAGlE;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,OAAO,CAAS;;IAKlB,~~YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK1C,~~eAAe,CAAC,EACpB,eAAe,EACf,QAAQ,GACT,EAAE;QACD,eAAe,EAAE,MAAM,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;KAClB;~~IAiDK~~,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA8C9C,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY3C,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY1C,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAWxD"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporter/index.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAa5E,KAAK,kBAAkB,GAAG;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAKF,wBAAgB,WAAW,IAAI,QAAQ,GAAG,SAAS,CAUlD;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI,CAGlE;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,OAAO,CAAS;;IAKlB,eAAe,CAAC,EACpB,eAAe,EACf,QAAQ,GACT,EAAE;QACD,eAAe,EAAE,MAAM,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;KAClB;IAgDK,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA8C9C,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY3C,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY1C,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAWxD"}

package/dist/reporter/index.js CHANGED Viewed

@@ -27,12 +27,6 @@ exports.getReporter = getReporter;
 /**
  * function will upload videos and json summary of test results to r2 and report them to reporter.
  * method won't throw error if it fails to report
- * @param {{
- *   projectRepoName: string;
- * }} {
- *   projectRepoName
- * }
- * @returns Promise<void> returns void
  */
 function setReporterConfig(config) {
     console.info("initialised reporter config");
@@ -44,10 +38,6 @@ class TestGenUpdatesReporter {
     constructor() {
         this.repoDir = process.cwd();
     }
-    async sendGenTrace(trace) {
-        console.log("trace", trace);
-        // upload trace to r2 and report it to reporter
-    }
     async reportGenAssets({ projectRepoName, testName, }) {
         const logger = new logger_1.CustomLogger();
         try {
@@ -62,16 +52,15 @@ class TestGenUpdatesReporter {
                 repoDir: this.repoDir,
             });
             const reporter = getReporter();
-            const message = {
-                type: "video",
-                videoUrls,
-            };
             await Promise.allSettled([
                 ...(videoUrls.length
                     ? [
                         reporter?.report(new reporter_1.ProcessLogMessageBuilder({
                             type: "video",
-                            message: JSON.stringify(message),
+                            message: JSON.stringify({
+                                type: "video",
+                                videoUrls,
+                            }),
                         })),
                     ]
                     : []),

package/dist/tools/test-run.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAsBpC,eAAO,MAAM,WAAW,EAAE,~~IAoBzB~~,CAAC"}
1	+ {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAsBpC,eAAO,MAAM,WAAW,EAAE,IA8BzB,CAAC"}

package/dist/tools/test-run.js CHANGED Viewed

@@ -26,16 +26,26 @@ exports.runTestTool = {
     },
     execute: async (input) => {
         const { testName, suites, fileName, project, headed } = input;
-        const result = await (0, test_run_1.runSingleTest)({
-            testName,
-            suites,
-            fileName,
-            projects: [project],
-            headed,
-        });
-        return {
-            result: JSON.stringify(result),
-            isError: false,
-        };
+        try {
+            const result = await (0, test_run_1.runSingleTest)({
+                testName,
+                suites,
+                fileName,
+                projects: [project],
+                headed,
+            });
+            return {
+                result: JSON.stringify(result),
+                isError: false,
+            };
+        }
+        catch (error) {
+            // Ensure we capture the full error message regardless of error type
+            const errorMessage = error instanceof Error ? error.message : String(error);
+            return {
+                result: JSON.stringify({ error: errorMessage }),
+                isError: true,
+            };
+        }
     },
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.50.0",
+  "version": "0.50.2",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"
@@ -74,10 +74,10 @@
     "tsx": "^4.16.2",
     "typescript": "^5.3.3",
     "zod": "^3.23.8",
-    "@empiricalrun/llm": "^0.10.1",
+    "@empiricalrun/llm": "^0.10.2",
     "@empiricalrun/r2-uploader": "^0.3.8",
-    "@empiricalrun/reporter": "^0.23.1",
-    "@empiricalrun/test-run": "^0.7.3"
+    "@empiricalrun/reporter": "^0.23.2",
+    "@empiricalrun/test-run": "^0.7.4"
   },
   "devDependencies": {
     "@playwright/test": "1.47.1",