npm - @empiricalrun/test-gen - Versions diffs - 0.50.4 → 0.51.1 - Mend

@empiricalrun/test-gen 0.50.4 → 0.51.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +24 -0
package/dist/agent/browsing/run.d.ts.map +1 -1
package/dist/agent/browsing/run.js +7 -4
package/dist/agent/browsing/utils.d.ts +2 -1
package/dist/agent/browsing/utils.d.ts.map +1 -1
package/dist/agent/browsing/utils.js +25 -14
package/dist/agent/chat.d.ts +7 -6
package/dist/agent/chat.d.ts.map +1 -1
package/dist/agent/chat.js +60 -33
package/dist/bin/index.js +19 -16
package/dist/bin/utils/index.d.ts +1 -0
package/dist/bin/utils/index.d.ts.map +1 -1
package/dist/bin/utils/index.js +2 -2
package/dist/index.d.ts.map +1 -1
package/dist/index.js +48 -36
package/dist/tools/browser-agent.d.ts.map +1 -1
package/dist/tools/browser-agent.js +21 -5
package/dist/utils/repo-tree.d.ts.map +1 -1
package/dist/utils/repo-tree.js +1 -0
package/package.json +3 -3

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,29 @@
 # @empiricalrun/test-gen
+## 0.51.1
+### Patch Changes
+- d04190f: fix: remove process.on listeners to avoid leaks
+- 75c7921: fix: show chat usage summary on ctrl+C
+- Updated dependencies [d04190f]
+  - @empiricalrun/test-run@0.7.6
+  - @empiricalrun/llm@0.11.1
+## 0.51.0
+### Minor Changes
+- ac754ae: feat: enable disk persistence for chat state
+- 561aa8e: feat: add usage summary (tokens, cost) for chat agent
+### Patch Changes
+- 3e3d937: fix: add some validations for browser agent tool call
+- Updated dependencies [ac754ae]
+- Updated dependencies [561aa8e]
+  - @empiricalrun/llm@0.11.0
 ## 0.50.4
 ### Patch Changes

package/dist/agent/browsing/run.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,GACR,EAAE,iBAAiB;;;~~GA8EnB~~"}
1	+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,GACR,EAAE,iBAAiB;;;GAgFnB"}

package/dist/agent/browsing/run.js CHANGED Viewed

@@ -32,13 +32,14 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
     const testsDirectory = `${repoDir}/tests`;
     const isTestRunTriggeredForTeardown = teardownFileRegex.test(testFilePath);
     const teardowns = new utils_1.TeardownManager(testsDirectory);
-    if (!isTestRunTriggeredForTeardown) {
-        await teardowns.skipAll();
-    }
+    let removeListeners;
     const command = `npx playwright test ${testFilePath} --retries 0 --project ${project} --timeout 0 --headed`;
     let isError = false;
     let error = "";
     try {
+        if (!isTestRunTriggeredForTeardown) {
+            removeListeners = await teardowns.skipAll();
+        }
         await (0, exec_1.cmd)(command.split(" "), {
             env: {
                 APP_PORT: port.toString(),
@@ -55,7 +56,9 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
         console.error(error);
         isError = true;
     }
-    if (!isTestRunTriggeredForTeardown) {
+    finally {
+        // Remove process listeners before unskipping files
+        removeListeners?.();
         teardowns.unskipAll();
     }
     // clean up the file if there is any error

package/dist/agent/browsing/utils.d.ts CHANGED Viewed

@@ -25,6 +25,7 @@ export declare function injectPwLocatorGenerator(page: Page): Promise<void>;
  * @return {*}  {Promise<PlaywrightTestConfig>}
  */
 export declare function readPlaywrightConfig(repoDir: string): Promise<PlaywrightTestConfig>;
+export declare function getValidProjectNames(playwrightConfig: PlaywrightTestConfig): Promise<string[]>;
 /**
  * detect the project name for the given file in playwright test repo
  * if project and test file path for running test don't match, then playwright throws error
@@ -38,7 +39,7 @@ export declare class TeardownManager {
     private teardownFiles;
     private getAllTeardownFiles;
     private skipTeardownFile;
-    skipAll(): Promise<void>;
+    skipAll(): Promise<() => void>;
     unskipAll(): void;
 }
 //# sourceMappingURL=utils.d.ts.map

package/dist/agent/browsing/utils.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,~~iBAkBA~~;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;~~IAuBb~~,SAAS;CAKjB"}
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,iBAyBA;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD,wBAAsB,oBAAoB,CACxC,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,EAAE,CAAC,CAQnB;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAoBb,SAAS;CAKjB"}

package/dist/agent/browsing/utils.js CHANGED Viewed

@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.TeardownManager = exports.detectProjectName = exports.readPlaywrightConfig = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.markTestAsOnly = exports.replaceTodoWithCreateTest = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
+exports.TeardownManager = exports.detectProjectName = exports.getValidProjectNames = exports.readPlaywrightConfig = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.markTestAsOnly = exports.replaceTodoWithCreateTest = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
 const fs_extra_1 = __importDefault(require("fs-extra"));
 const minimatch_1 = require("minimatch");
 const path_1 = __importDefault(require("path"));
@@ -92,7 +92,12 @@ async function replaceTodoWithCreateTest({ testFilePath, testCaseName, testCaseS
     // This method is an alternative to prepareFileForUpdateScenario
     // TODO: Does not support multiple pages, scoped variables, updates in POM files
     const fileContent = await fs_extra_1.default.readFile(testFilePath, "utf-8");
-    await fs_extra_1.default.writeFile(testFilePath, fileContent.replace(/\/\/ TODO\(agent\): (.*)/, (_, todoText) => `await createTest("${todoText.replace(/"/g, '\\"')}", page);`));
+    const todoRegex = /\/\/ TODO\(agent\): (.*)/;
+    const todoMatch = fileContent.match(todoRegex);
+    if (!todoMatch) {
+        throw new Error(`No "// TODO(agent):" comment found in file: ${testFilePath}`);
+    }
+    await fs_extra_1.default.writeFile(testFilePath, fileContent.replace(todoRegex, (_, todoText) => `await createTest("${todoText.replace(/"/g, '\\"')}", page);`));
     await addImportForCreateTest(testFilePath);
     await markTestAsOnly({
         testCaseName,
@@ -302,6 +307,16 @@ function matchAgainstPattern(pattern, filePathToTest) {
         return (0, minimatch_1.minimatch)(filePathToTest, pattern);
     }
 }
+async function getValidProjectNames(playwrightConfig) {
+    if (!playwrightConfig.projects) {
+        return [];
+    }
+    const filteredProjectNames = playwrightConfig.projects
+        .map((p) => p.name)
+        .filter((p) => !!p);
+    return filteredProjectNames;
+}
+exports.getValidProjectNames = getValidProjectNames;
 /**
  * detect the project name for the given file in playwright test repo
  * if project and test file path for running test don't match, then playwright throws error
@@ -385,18 +400,14 @@ class TeardownManager {
     async skipAll() {
         this.teardownFiles = await this.getAllTeardownFiles();
         await Promise.all(this.teardownFiles.map(async ({ filePath }) => await this.skipTeardownFile(filePath)));
-        process.on("beforeExit", () => {
-            this.unskipAll();
-        });
-        process.on("exit", () => {
-            this.unskipAll();
-        });
-        process.on("SIGINT", () => {
-            this.unskipAll();
-        });
-        process.on("SIGTERM", () => {
-            this.unskipAll();
-        });
+        const setupProcessListeners = (cleanup) => {
+            const events = ["beforeExit", "exit", "SIGINT", "SIGTERM"];
+            events.forEach((event) => process.on(event, cleanup));
+            return () => {
+                events.forEach((event) => process.removeListener(event, cleanup));
+            };
+        };
+        return setupProcessListeners(this.unskipAll.bind(this));
     }
     unskipAll() {
         this.teardownFiles.forEach(({ filePath, content }) => {

package/dist/agent/chat.d.ts CHANGED Viewed

@@ -1,8 +1,9 @@
-import { TraceClient } from "@empiricalrun/llm";
-import type { Anthropic } from "@empiricalrun/llm/claude";
-export declare function chatAgent({ prompt, chatModel, }: {
-    prompt: string;
-    trace?: TraceClient;
+export declare function chatAgent({ chatModel, useDiskForChatState, }: {
     chatModel?: "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022";
-}): Promise<Anthropic.Messages.MessageParam[]>;
+    useDiskForChatState?: boolean;
+}): Promise<{
+    input: number;
+    output: number;
+    cost: number;
+}>;
 //# sourceMappingURL=chat.d.ts.map

package/dist/agent/chat.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"~~AAAA~~,~~OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AA8E1D,~~wBAAsB,SAAS,CAAC,EAC9B,~~MAAM~~,~~EACN~~,~~SAAwC~~,~~GACzC~~,EAAE;IACD,~~MAAM~~,~~EAAE,MAAM,~~CAAC~~;IACf~~,~~KAAK~~,~~CAAC~~,~~EAAE~~,~~WAAW~~,CAAC;~~IACpB~~,~~SAAS~~,CAAC,EAAE,~~4BAA4B~~,~~GAAG,4BAA4B,~~CAAC;~~CACzE,8CAiEA~~"}
1	+ {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AA8EA,wBAAsB,SAAS,CAAC,EAC9B,SAAwC,EACxC,mBAAmB,GACpB,EAAE;IACD,SAAS,CAAC,EAAE,4BAA4B,GAAG,4BAA4B,CAAC;IACxE,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B;;;;GA6FA"}

package/dist/agent/chat.js CHANGED Viewed

@@ -71,12 +71,48 @@ const toolExecutors = {
     ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
     str_replace_editor: claude_1.strReplaceEditorTool,
 };
-async function chatAgent({ prompt, chatModel = "claude-3-7-sonnet-20250219", }) {
-    let userPrompt = prompt;
-    let chatState = new claude_1.ChatState();
-    chatState.pushTextMessage({ message: { role: "user", content: userPrompt } });
-    let shouldAskUserForInput = false;
-    while (!userPrompt.toLowerCase().includes("stop")) {
+async function chatAgent({ chatModel = "claude-3-7-sonnet-20250219", useDiskForChatState, }) {
+    let userPrompt = undefined;
+    let chatState = useDiskForChatState ? claude_1.ChatState.load() : new claude_1.ChatState(false);
+    if (chatState.askUserForInput) {
+        // Show last message to the user for context when we loaded from disk
+        const messages = chatState.messages;
+        const lastMessage = messages[messages.length - 1];
+        if (lastMessage && Array.isArray(lastMessage.content)) {
+            const textContent = lastMessage.content.find((b) => b.type === "text");
+            if (textContent) {
+                const role = lastMessage.role.charAt(0).toUpperCase() + lastMessage.role.slice(1);
+                console.log(`${role}: ${textContent.text}`);
+            }
+        }
+    }
+    while (!userPrompt?.toLowerCase().includes("stop")) {
+        chatState.saveToDisk();
+        if (chatState.askUserForInput) {
+            try {
+                userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
+                    message: "User:",
+                });
+            }
+            catch (e) {
+                // https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
+                if (e instanceof Error && e.name === "ExitPromptError") {
+                    console.log("Exiting. Usage summary:", chatState.getUsageSummary());
+                    process.exit(0);
+                }
+                throw e;
+            }
+            chatState.pushMessage({
+                role: "user",
+                content: [
+                    {
+                        type: "text",
+                        text: userPrompt,
+                    },
+                ],
+            });
+            continue;
+        }
         const toolUse = chatState.getPendingToolCall();
         if (toolUse) {
             console.log("Executing tool:", toolUse.name, "with args:", toolUse.input);
@@ -85,26 +121,22 @@ async function chatAgent({ prompt, chatModel = "claude-3-7-sonnet-20250219", })
                 throw new Error(`Tool ${toolUse.name} not found`);
             }
             const toolResult = await toolExecutor(toolUse.input);
-            chatState.pushToolResultToMessages({
-                toolCall: toolUse,
-                isError: toolResult.isError,
-                result: toolResult.result,
+            chatState.pushMessage({
+                role: "user",
+                content: [
+                    {
+                        type: "tool_result",
+                        tool_use_id: toolUse.id,
+                        content: toolResult.result,
+                        is_error: toolResult.isError,
+                    },
+                ],
             });
             continue;
         }
-        if (shouldAskUserForInput) {
-            userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
-                message: "Your response?",
-            });
-            chatState.pushTextMessage({
-                message: { role: "user", content: userPrompt },
-            });
-            shouldAskUserForInput = false;
-            continue;
-        }
-        const response = await (0, claude_1.createChatCompletion)({
+        const response = await (0, claude_1.createClaudeMessage)({
             systemPrompt,
-            messages: chatState.getMessages(),
+            messages: chatState.getMessagesForCreateCompletion(),
             tools: tools.map((tool) => (0, claude_1.convertOpenAISchemaToAnthropic)((0, zod_schema_1.zodToOpenAITool)(tool.schema))),
             model: chatModel,
             withStrReplaceEditor: true,
@@ -112,19 +144,14 @@ async function chatAgent({ prompt, chatModel = "claude-3-7-sonnet-20250219", })
         if (!response) {
             throw new Error("No response from LLM");
         }
-        chatState.pushTextMessage({
-            message: { role: "assistant", content: response.content },
-        });
+        chatState.pushMessage(response);
         const textBlock = response.content.find((b) => b.type === "text");
-        const toolUseBlock = response.content.find((b) => b.type === "tool_use");
-        console.log("Assistant response:", textBlock?.text);
-        if (toolUseBlock) {
-            chatState.addPendingToolCall({ toolCall: toolUseBlock });
-        }
-        else {
-            shouldAskUserForInput = true;
+        if (textBlock) {
+            console.log("Assistant:", textBlock.text);
         }
     }
-    return chatState.getMessages();
+    const usageSummary = chatState.getUsageSummary();
+    console.log("Usage summary:", usageSummary);
+    return usageSummary;
 }
 exports.chatAgent = chatAgent;

package/dist/bin/index.js CHANGED Viewed

@@ -28,11 +28,14 @@ dotenv_1.default.config({
 const flushEvents = async () => {
     await (0, llm_1.flushAllTraces)();
 };
-process.on("beforeExit", async () => await flushEvents());
-process.on("exit", async () => await flushEvents());
-process.on("SIGINT", async () => await flushEvents());
-process.on("SIGTERM", async () => await flushEvents());
-async function runChatAgent(prompt, modelInput) {
+function setupProcessListeners(cleanup) {
+    const events = ["beforeExit", "exit", "SIGINT", "SIGTERM"];
+    events.forEach((event) => process.on(event, cleanup));
+    return () => {
+        events.forEach((event) => process.removeListener(event, cleanup));
+    };
+}
+async function runChatAgent(modelInput, useDiskForChatState) {
     const MODEL_MAPPING = {
         "claude-3-7": "claude-3-7-sonnet-20250219",
         "3-7": "claude-3-7-sonnet-20250219",
@@ -43,8 +46,8 @@ async function runChatAgent(prompt, modelInput) {
         throw new Error(`Invalid chat model: ${modelInput}`);
     }
     return await (0, chat_1.chatAgent)({
-        prompt,
         chatModel: modelInput ? MODEL_MAPPING[modelInput] : undefined,
+        useDiskForChatState,
     });
 }
 async function runAgentsWorkflow(testGenConfig, testGenToken) {
@@ -172,6 +175,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
     return agent;
 }
 (async function main() {
+    const removeListeners = setupProcessListeners(flushEvents);
     console.log(`Running test-gen v${require("../../package.json").version} from ${__dirname}`);
     const program = new commander_1.Command();
     program
@@ -181,6 +185,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
         .option("--file <test-file>", "File path of the test case (inside tests dir)")
         .option("--suites <suites>", "Comma separated list of describe blocks")
         .option("--use-chat", "Use chat agent (and not the workflow)")
+        .option("--use-disk-for-chat-state", "Save and load chat state from disk")
         .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022)")
         .parse(process.argv);
     const options = program.opts();
@@ -202,18 +207,16 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
         generationId: testGenConfig.options?.metadata.generationId,
         projectRepoName: testGenConfig.options?.metadata.projectRepoName,
     });
-    let testGenFailed = false;
-    let agentUsed;
     // Download the build if repo has a download script
     await (0, test_build_1.downloadBuild)(testGenConfig.build || {});
+    if (completedOptions.useChat) {
+        await runChatAgent(completedOptions.chatModel, completedOptions.useDiskForChatState);
+        return;
+    }
+    let agentUsed;
+    let testGenFailed = false;
     try {
-        if (completedOptions.useChat) {
-            await runChatAgent(completedOptions.prompt, completedOptions.chatModel);
-            return;
-        }
-        else {
-            agentUsed = await runAgentsWorkflow(testGenConfig, testGenToken);
-        }
+        agentUsed = await runAgentsWorkflow(testGenConfig, testGenToken);
     }
     catch (e) {
         testGenFailed = true;
@@ -229,7 +232,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
             testName: testGenConfig.testCase.name,
         });
     }
-    // TODO: move these reporters to a better lifecycle
+    removeListeners();
     await (0, llm_1.flushAllTraces)();
     await (0, logger_1.waitForLogsToFlush)();
     await (0, session_1.endSession)();

package/dist/bin/utils/index.d.ts CHANGED Viewed

@@ -5,6 +5,7 @@ export interface CliOptions {
     prompt?: string;
     suites?: string;
     useChat?: boolean;
+    useDiskForChatState?: boolean;
     chatModel?: "claude-3-7" | "3-7" | "claude-3-5" | "3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022";
 }
 export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;

package/dist/bin/utils/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,CAAC;CAClC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,CAAC;CAClC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}

package/dist/bin/utils/index.js CHANGED Viewed

@@ -12,9 +12,9 @@ async function validateAndCompleteCliOptions(options) {
         return options;
     }
     let requiredFields = ["name", "file", "prompt"];
-    // For new chat flow in local CLI usage, only prompt is required
     if (options.useChat) {
-        requiredFields = ["prompt"];
+        // Chat agent can prompt the user directly, nothing is required in CLI args
+        requiredFields = [];
     }
     const questions = [];
     if (!options.name && requiredFields.includes("name")) {

package/dist/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;~~AAYpC~~,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,~~iBAiC3E~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAepC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAyC3E"}

package/dist/index.js CHANGED Viewed

@@ -14,42 +14,54 @@ const pw_test_1 = require("./utils/pw-test");
 const flushEvents = async () => {
     await (0, llm_1.flushAllTraces)();
 };
-process.on("beforeExit", async () => await flushEvents());
-process.on("exit", async () => await flushEvents());
-process.on("SIGINT", async () => await flushEvents());
-process.on("SIGTERM", async () => await flushEvents());
+function setupProcessListeners(cleanup) {
+    const events = ["beforeExit", "exit", "SIGINT", "SIGTERM"];
+    events.forEach((event) => process.on(event, cleanup));
+    return () => {
+        events.forEach((event) => process.removeListener(event, cleanup));
+    };
+}
 async function createTest(task, page, scope) {
-    const testConfigArg = process.env.TEST_GEN_TOKEN;
-    const testGenConfig = (0, scenarios_1.loadTestConfigs)(testConfigArg);
-    (0, reporter_1.setReporterConfig)({
-        projectRepoName: testGenConfig.options?.metadata.projectRepoName,
-        testSessionId: testGenConfig.options?.metadata.testSessionId,
-        generationId: testGenConfig.options?.metadata.generationId,
-    });
-    (0, session_1.setSessionDetails)({
-        sessionId: testGenConfig.options?.metadata.testSessionId,
-        generationId: testGenConfig.options?.metadata.generationId,
-        testCaseId: testGenConfig.testCase.id,
-        projectRepoName: testGenConfig.options?.metadata.projectRepoName,
-    });
-    const fileService = new client_1.default();
-    const { testCase, specPath } = testGenConfig;
-    const { code, importPaths } = await (0, run_1.createTestUsingMasterAgent)({
-        testCase,
-        specPath,
-        page,
-        task,
-        options: {
-            ...testGenConfig.options,
-        },
-        scopeVars: scope,
-    });
-    await fileService.updateTest({
-        task,
-        generatedCode: code,
-        importPaths,
-    });
-    // skip the rest of the test once generation is over
-    await (0, pw_test_1.skipTest)();
+    const removeListeners = setupProcessListeners(flushEvents);
+    try {
+        const testConfigArg = process.env.TEST_GEN_TOKEN;
+        const testGenConfig = (0, scenarios_1.loadTestConfigs)(testConfigArg);
+        (0, reporter_1.setReporterConfig)({
+            projectRepoName: testGenConfig.options?.metadata.projectRepoName,
+            testSessionId: testGenConfig.options?.metadata.testSessionId,
+            generationId: testGenConfig.options?.metadata.generationId,
+        });
+        (0, session_1.setSessionDetails)({
+            sessionId: testGenConfig.options?.metadata.testSessionId,
+            generationId: testGenConfig.options?.metadata.generationId,
+            testCaseId: testGenConfig.testCase.id,
+            projectRepoName: testGenConfig.options?.metadata.projectRepoName,
+        });
+        const fileService = new client_1.default();
+        const { testCase, specPath } = testGenConfig;
+        const { code, importPaths } = await (0, run_1.createTestUsingMasterAgent)({
+            testCase,
+            specPath,
+            page,
+            task,
+            options: {
+                ...testGenConfig.options,
+            },
+            scopeVars: scope,
+        });
+        await fileService.updateTest({
+            task,
+            generatedCode: code,
+            importPaths,
+        });
+        // skip the rest of the test once generation is over
+        await (0, pw_test_1.skipTest)();
+    }
+    finally {
+        // Ensure listeners are removed even if an error occurs
+        removeListeners();
+        // Flush events one final time before removing listeners
+        await flushEvents();
+    }
 }
 exports.createTest = createTest;

package/dist/tools/browser-agent.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"browser-agent.d.ts","sourceRoot":"","sources":["../../src/tools/browser-agent.ts"],"names":[],"mappings":"~~AAMA~~,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAmDpC,eAAO,MAAM,gBAAgB,EAAE,~~IA6C9B~~,CAAC"}
1	+ {"version":3,"file":"browser-agent.d.ts","sourceRoot":"","sources":["../../src/tools/browser-agent.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAmDpC,eAAO,MAAM,gBAAgB,EAAE,IA4D9B,CAAC"}

package/dist/tools/browser-agent.js CHANGED Viewed

@@ -57,11 +57,27 @@ exports.browserAgentTool = {
     },
     execute: async (input) => {
         const { testName, testSuites, fileName, changeToMake, project } = input;
-        await (0, utils_1.replaceTodoWithCreateTest)({
-            testCaseName: testName,
-            testCaseSuites: testSuites,
-            testFilePath: fileName,
-        });
+        try {
+            await (0, utils_1.replaceTodoWithCreateTest)({
+                testCaseName: testName,
+                testCaseSuites: testSuites,
+                testFilePath: fileName,
+            });
+        }
+        catch (error) {
+            return {
+                isError: true,
+                result: `Error running tool: ${error}`,
+            };
+        }
+        const playwrightConfig = await (0, utils_1.readPlaywrightConfig)(process.cwd());
+        const validProjectNames = await (0, utils_1.getValidProjectNames)(playwrightConfig);
+        if (!validProjectNames.includes(project)) {
+            return {
+                isError: true,
+                result: `Invalid project name: ${project}. Valid project names are: ${validProjectNames.join(", ")}`,
+            };
+        }
         const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
             testFilePath: fileName,
             filePathToUpdate: fileName,

package/dist/utils/repo-tree.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"repo-tree.d.ts","sourceRoot":"","sources":["../../src/utils/repo-tree.ts"],"names":[],"mappings":"AAGA,eAAO,MAAM,eAAe,~~qBAO3B~~,CAAC;AAEF,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,KAAK,UAsE9D"}
1	+ {"version":3,"file":"repo-tree.d.ts","sourceRoot":"","sources":["../../src/utils/repo-tree.ts"],"names":[],"mappings":"AAGA,eAAO,MAAM,eAAe,qBAQ3B,CAAC;AAEF,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,KAAK,UAsE9D"}

package/dist/utils/repo-tree.js CHANGED Viewed

@@ -13,6 +13,7 @@ exports.DEFAULT_EXCLUDE = [
     /\.git/,
     ".DS_Store",
     "playwright-report",
+    ".empiricalrun",
 ];
 function generateAsciiTree(dirPath, options = {}) {
     const defaultOptions = {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.50.4",
+  "version": "0.51.1",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"
@@ -73,10 +73,10 @@
     "tsx": "^4.16.2",
     "typescript": "^5.3.3",
     "zod": "^3.23.8",
-    "@empiricalrun/llm": "^0.10.3",
+    "@empiricalrun/llm": "^0.11.1",
     "@empiricalrun/r2-uploader": "^0.3.8",
     "@empiricalrun/reporter": "^0.23.2",
-    "@empiricalrun/test-run": "^0.7.5"
+    "@empiricalrun/test-run": "^0.7.6"
   },
   "devDependencies": {
     "@playwright/test": "1.47.1",