npm - @empiricalrun/test-gen - Versions diffs - 0.52.1 → 0.52.3 - Mend

@empiricalrun/test-gen 0.52.1 → 0.52.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/CHANGELOG.md +29 -0
package/dist/agent/browsing/run.d.ts +3 -1
package/dist/agent/browsing/run.d.ts.map +1 -1
package/dist/agent/browsing/run.js +11 -6
package/dist/agent/chat/index.d.ts +3 -2
package/dist/agent/chat/index.d.ts.map +1 -1
package/dist/agent/chat/index.js +15 -18
package/dist/agent/chat/prompt.js +2 -2
package/dist/agent/cua/computer.d.ts +4 -1
package/dist/agent/cua/computer.d.ts.map +1 -1
package/dist/agent/cua/computer.js +12 -2
package/dist/agent/cua/index.d.ts +1 -3
package/dist/agent/cua/index.d.ts.map +1 -1
package/dist/agent/cua/index.js +75 -20
package/dist/agent/cua/model.d.ts.map +1 -1
package/dist/agent/cua/model.js +5 -2
package/dist/bin/index.js +17 -4
package/dist/bin/utils/index.d.ts +2 -1
package/dist/bin/utils/index.d.ts.map +1 -1
package/dist/file/client.d.ts +5 -4
package/dist/file/client.d.ts.map +1 -1
package/dist/file/client.js +10 -17
package/dist/file/server.d.ts +6 -2
package/dist/file/server.d.ts.map +1 -1
package/dist/file/server.js +15 -5
package/dist/index.d.ts.map +1 -1
package/dist/index.js +4 -3
package/dist/tools/codegen-agent.d.ts +1 -1
package/dist/tools/codegen-agent.d.ts.map +1 -1
package/dist/tools/diagnosis-fetcher.d.ts +1 -1
package/dist/tools/diagnosis-fetcher.d.ts.map +1 -1
package/dist/tools/grep.d.ts +1 -1
package/dist/tools/grep.d.ts.map +1 -1
package/dist/tools/test-gen-browser.d.ts +1 -1
package/dist/tools/test-gen-browser.d.ts.map +1 -1
package/dist/tools/test-gen-browser.js +23 -21
package/dist/tools/test-run-fetcher/index.d.ts +1 -1
package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
package/dist/tools/test-run.d.ts +1 -1
package/dist/tools/test-run.d.ts.map +1 -1
package/dist/tools/test-run.js +1 -2
package/package.json +2 -2
package/dist/tools/types.d.ts +0 -38
package/dist/tools/types.d.ts.map +0 -1
package/dist/tools/types.js +0 -12
package/dist/tools/zod-schema.d.ts +0 -19
package/dist/tools/zod-schema.d.ts.map +0 -1
package/dist/tools/zod-schema.js +0 -95

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,34 @@
 # @empiricalrun/test-gen
+## 0.52.3
+### Patch Changes
+- 6a19298: feat: changed gemini-2.5pro from exp to preview
+- cbe5823: fix: removed headed default from runTest tool schema and added function in role of toolRes Gemini
+- Updated dependencies [f4f4c5d]
+- Updated dependencies [6a19298]
+- Updated dependencies [cbe5823]
+  - @empiricalrun/llm@0.13.1
+## 0.52.2
+### Patch Changes
+- c490603: feat: input initial prompt with markdown file
+- 68640d2: feat: handover from test-gen tool to chat agent with a summary message
+- ae91e37: fix: cap cua iterations, add tracing and improve logging
+- 0704b28: feat: zod schema for str_replace_editor for gemini to use this tool
+- 02a2439: feat: summarize actions done by cua and rename fileservice
+- 01fa143: feat: custom tool grep added for gemini
+- Updated dependencies [c490603]
+- Updated dependencies [486264f]
+- Updated dependencies [ae91e37]
+- Updated dependencies [0704b28]
+- Updated dependencies [3ed20a3]
+- Updated dependencies [01fa143]
+  - @empiricalrun/llm@0.13.0
 ## 0.52.1
 ### Patch Changes

package/dist/agent/browsing/run.d.ts CHANGED Viewed

@@ -4,10 +4,12 @@ type GenerateTestsType = {
     pwProjectsFilter: string[];
     testGenToken: string;
     repoDir: string;
+    editFileWithGeneratedCode: boolean;
 };
-export declare function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, pwProjectsFilter, testGenToken, repoDir, }: GenerateTestsType): Promise<{
+export declare function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, pwProjectsFilter, testGenToken, repoDir, editFileWithGeneratedCode, }: GenerateTestsType): Promise<{
     isError: boolean;
     error: string;
+    actionsSummary?: string;
 }>;
 export {};
 //# sourceMappingURL=run.d.ts.map

package/dist/agent/browsing/run.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;~~CACjB~~,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,~~GACR~~,EAAE,iBAAiB,GAAG,OAAO,CAAC;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;~~CACf~~,CAAC,~~CAgFD~~"}
1	+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,yBAAyB,EAAE,OAAO,CAAC;CACpC,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,EACP,yBAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB,CAAC,CAqFD"}

package/dist/agent/browsing/run.js CHANGED Viewed

@@ -10,7 +10,7 @@ const web_1 = require("../../bin/utils/platform/web");
 const server_1 = require("../../file/server");
 const exec_1 = require("../../utils/exec");
 const utils_1 = require("./utils");
-async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, pwProjectsFilter, testGenToken, repoDir, }) {
+async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, pwProjectsFilter, testGenToken, repoDir, editFileWithGeneratedCode, }) {
     if (!fs_extra_1.default.existsSync(testFilePath)) {
         throw new Error(`File for master agent to run not found: ${testFilePath}`);
     }
@@ -18,9 +18,13 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
     const port = await (0, detect_port_1.default)(3030);
     // start a file service to handle file updates from agent
     // - also update the file path with updates when agent is done spitting out code
-    const fileService = new server_1.FileService({ port, repoDir });
-    await fileService.startFileService();
-    fileService.setFilePath(filePathToUpdate);
+    const fileServer = new server_1.FileServiceServer({
+        port,
+        repoDir,
+        updateFile: editFileWithGeneratedCode,
+    });
+    await fileServer.startFileService();
+    fileServer.setFilePath(filePathToUpdate);
     // read playwright config from ./playwright.config.ts of source repo
     const playwrightConfig = await (0, utils_1.readPlaywrightConfig)(repoDir);
     // detect the playwright project name for the given test file and playwright config
@@ -42,7 +46,7 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
         }
         await (0, exec_1.cmd)(command.split(" "), {
             env: {
-                APP_PORT: port.toString(),
+                IPC_FILE_SERVICE_PORT: port.toString(),
                 PW_TEST_HTML_REPORT_OPEN: "never",
                 // pass the test gen token so that the agent has the same configuration as cli
                 TEST_GEN_TOKEN: testGenToken,
@@ -75,10 +79,11 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
     }
     // remove the test only from the file
     await (0, web_1.removeTestOnly)(testFilePath);
-    await fileService.stop();
+    await fileServer.stop();
     return {
         isError,
         error,
+        actionsSummary: fileServer.getActionsSummary(),
     };
 }
 exports.generateTestsUsingMasterAgent = generateTestsUsingMasterAgent;

package/dist/agent/chat/index.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
-export declare function chatAgent({ selectedModel, useDiskForChatState, }: {
-    selectedModel?: "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-exp-03-25";
+export declare function chatAgent({ selectedModel, useDiskForChatState, initialPromptContent, }: {
+    selectedModel?: "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
     useDiskForChatState?: boolean;
+    initialPromptContent?: string;
 }): Promise<string>;
 //# sourceMappingURL=index.d.ts.map

package/dist/agent/chat/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"~~AA4DA~~,wBAAsB,SAAS,CAAC,EAC9B,aAA4C,EAC5C,mBAA2B,~~GAC5B~~,EAAE;IACD,aAAa,CAAC,EACV,4BAA4B,GAC5B,4BAA4B,GAC5B,~~0BAA0B~~,CAAC;~~IAC/B~~,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B,~~mBA2FA~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAgEA,wBAAsB,SAAS,CAAC,EAC9B,aAA4C,EAC5C,mBAA2B,EAC3B,oBAAoB,GACrB,EAAE;IACD,aAAa,CAAC,EACV,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;IACnC,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,oBAAoB,CAAC,EAAE,MAAM,CAAC;CAC/B,mBAsFA"}

package/dist/agent/chat/index.js CHANGED Viewed

@@ -10,8 +10,8 @@ const grep_1 = require("../../tools/grep");
 const test_gen_browser_1 = require("../../tools/test-gen-browser");
 const test_run_1 = require("../../tools/test-run");
 const test_run_fetcher_1 = require("../../tools/test-run-fetcher");
-const zod_schema_1 = require("../../tools/zod-schema");
 const prompt_1 = require("./prompt");
+// TODO: Add strReplaceEditor for non-Claude models
 const tools = [
     test_run_1.runTestTool,
     test_gen_browser_1.generateTestWithBrowserAgent,
@@ -21,7 +21,7 @@ const tools = [
 ];
 const toolExecutors = {
     ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
-    str_replace_editor: (input) => (0, chat_1.strReplaceEditorTool)(input, web_1.validateTypescript),
+    str_replace_editor: (input) => (0, chat_1.strReplaceEditorExecutor)(input, web_1.validateTypescript),
 };
 function createChatModel(useDiskForChatState, selectedModel) {
     if (selectedModel.startsWith("claude")) {
@@ -43,9 +43,16 @@ function concludeAgent(usageSummary) {
     console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
     (0, chat_1.cleanupBackupFiles)(process.cwd());
 }
-async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDiskForChatState = false, }) {
+async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDiskForChatState = false, initialPromptContent, }) {
     let chatModel = createChatModel(useDiskForChatState, selectedModel);
     let userPrompt = undefined;
+    if (initialPromptContent && chatModel.messages.length === 0) {
+        chatModel.pushUserMessage(initialPromptContent);
+        chatModel.askUserForInput = false;
+    }
+    else if (initialPromptContent && chatModel.messages.length > 0) {
+        console.warn(`Ignoring initial prompt because we have existing messages.`);
+    }
     const handleSigInt = () => {
         concludeAgent(chatModel.getUsageSummary());
         process.exit(0);
@@ -81,35 +88,25 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
         }
         const toolUse = chatModel.getPendingToolCall();
         if (toolUse) {
-            const spinner = ora(`Executing tool ${toolUse.name} with args: ${JSON.stringify(toolUse.input)}`).start();
+            console.log(`Executing tool ${toolUse.name} with args: ${JSON.stringify(toolUse.input)}`);
             const toolExecutor = toolExecutors[toolUse.name];
             if (!toolExecutor) {
                 throw new Error(`Tool ${toolUse.name} not found`);
             }
             const toolResult = await toolExecutor(toolUse.input);
             if (toolResult.isError) {
-                spinner.fail(`Tool ${toolUse.name} failed with error: ${toolResult.result}`);
+                ora(`Tool ${toolUse.name} failed: ${toolResult.result}`).fail();
             }
             else {
-                spinner.succeed(`Tool ${toolUse.name} completed`);
+                ora(`Tool ${toolUse.name} completed`).succeed();
             }
-            chatModel.pushMessage({
-                role: "user",
-                content: [
-                    {
-                        type: "tool_result",
-                        tool_use_id: toolUse.id,
-                        content: toolResult.result,
-                        is_error: toolResult.isError,
-                    },
-                ],
-            });
+            chatModel.pushToolResultMessage(toolUse, toolResult);
             continue;
         }
         const spinner = ora(`${getModelName(selectedModel)} is working...`).start();
         const response = await chatModel.getLLMResponse({
             systemPrompt,
-            tools: tools.map((tool) => (0, zod_schema_1.zodToOpenAITool)(tool.schema)),
+            tools: tools.map((tool) => (0, chat_1.zodToOpenAITool)(tool.schema)),
             selectedModel,
         });
         spinner.stop();

package/dist/agent/chat/prompt.js CHANGED Viewed

@@ -41,8 +41,8 @@ Or if the user asks you to modify a test, you could use the generateTestWithBrow
 that a UI selector needs to be updated, using the browser agent is a good idea.
 Before using generateTestWithBrowserAgent, you need to prepare the test code for the browser agent.
-You can do this by using the str_replace_editor tool to add a TODO comment to the test code. This
-comment should explain to the browser agent what to do.
+You can do this by using the strReplaceEditor or the text editor tool to add a TODO comment to the test
+code. This comment explains to the browser agent what it needs to do.
 For example, if the expected modification is to click on a login button, you could add the following comment.

package/dist/agent/cua/computer.d.ts CHANGED Viewed

@@ -2,6 +2,9 @@ import { ResponseComputerToolCall } from "openai/resources/responses/responses.m
 import type { Page } from "playwright";
 type ComputerAction = ResponseComputerToolCall.Click | ResponseComputerToolCall.DoubleClick | ResponseComputerToolCall.Drag | ResponseComputerToolCall.Keypress | ResponseComputerToolCall.Move | ResponseComputerToolCall.Screenshot | ResponseComputerToolCall.Scroll | ResponseComputerToolCall.Type | ResponseComputerToolCall.Wait;
 export declare function getScreenshot(page: Page): Promise<string>;
-export declare function handleModelAction(page: Page, action: ComputerAction): Promise<string>;
+export declare function handleModelAction(page: Page, action: ComputerAction): Promise<{
+    actionSummary: string;
+    actionCode: string;
+}>;
 export {};
 //# sourceMappingURL=computer.d.ts.map

package/dist/agent/cua/computer.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"computer.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/computer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,wBAAwB,EAAE,MAAM,0CAA0C,CAAC;AACpF,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAEvC,KAAK,cAAc,GACf,wBAAwB,CAAC,KAAK,GAC9B,wBAAwB,CAAC,WAAW,GACpC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,QAAQ,GACjC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,UAAU,GACnC,wBAAwB,CAAC,MAAM,GAC/B,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,IAAI,CAAC;AAElC,wBAAsB,aAAa,CAAC,IAAI,EAAE,IAAI,mBAG7C;AAgCD,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC,MAAM,CAAC,~~CA2HjB~~"}
1	+ {"version":3,"file":"computer.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/computer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,wBAAwB,EAAE,MAAM,0CAA0C,CAAC;AACpF,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAEvC,KAAK,cAAc,GACf,wBAAwB,CAAC,KAAK,GAC9B,wBAAwB,CAAC,WAAW,GACpC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,QAAQ,GACjC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,UAAU,GACnC,wBAAwB,CAAC,MAAM,GAC/B,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,IAAI,CAAC;AAElC,wBAAsB,aAAa,CAAC,IAAI,EAAE,IAAI,mBAG7C;AAgCD,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC;IACT,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAqID"}

package/dist/agent/cua/computer.js CHANGED Viewed

@@ -38,11 +38,13 @@ const CUA_KEY_TO_PLAYWRIGHT_KEY = {
 async function handleModelAction(page, action) {
     const actionType = action.type;
     let actionCode = "";
+    let actionSummary = "";
     try {
         switch (actionType) {
             case "click": {
                 const { x, y, button = "left" } = action;
                 console.log(`Action: click at (${x}, ${y}) with button '${button}'`);
+                actionSummary = `Click at (${x}, ${y}) with button '${button}'`;
                 let pwButton = undefined;
                 if (button === "left" || button === "right") {
                     pwButton = button;
@@ -72,18 +74,21 @@ async function handleModelAction(page, action) {
             case "double_click": {
                 const { x, y } = action;
                 console.log(`Action: doubleclick at (${x}, ${y})`);
+                actionSummary = `Double click at (${x}, ${y})`;
                 await page.mouse.dblclick(x, y, { button: "left" });
                 break;
             }
             case "move": {
                 const { x, y } = action;
-                console.log(`Action: move to (${x}, ${y})`);
+                console.log(`Action: mouse move to (${x}, ${y})`);
+                actionSummary = `Mouse move to (${x}, ${y})`;
                 await page.mouse.move(x, y);
                 break;
             }
             case "drag": {
                 const { path } = action;
                 console.log(`Action: drag along path ${path}`);
+                actionSummary = `Drag along path ${path}`;
                 if (!path || path.length === 0) {
                     break;
                 }
@@ -98,6 +103,7 @@ async function handleModelAction(page, action) {
             case "scroll": {
                 const { x, y, scroll_x, scroll_y } = action;
                 console.log(`Action: scroll at (${x}, ${y}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y})`);
+                actionSummary = `Scroll at (${x}, ${y}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y})`;
                 await page.mouse.move(x, y);
                 await page.evaluate(`window.scrollBy(${scroll_x}, ${scroll_y})`);
                 break;
@@ -109,6 +115,7 @@ async function handleModelAction(page, action) {
                 });
                 const mappedKey = mappedKeys.join("+"); // ["CTRL", "A"] becomes ControlOrMeta+A
                 console.log(`Action: keypress for keys ${keys} -> '${mappedKey}'`);
+                actionSummary = `Keypress for keys ${keys} (mapped to '${mappedKey}' for Playwright)`;
                 try {
                     await page.keyboard.press(mappedKey);
                     actionCode = `await page.keyboard.press('${mappedKey}');\n`;
@@ -121,6 +128,7 @@ async function handleModelAction(page, action) {
             case "type": {
                 const { text } = action;
                 console.log(`Action: type text '${text}'`);
+                actionSummary = `Type text '${text}'`;
                 await page.keyboard.type(text);
                 const locator = await page.evaluate(() => {
                     const element = document.activeElement;
@@ -131,12 +139,14 @@ async function handleModelAction(page, action) {
             }
             case "wait": {
                 console.log(`Action: wait`);
+                actionSummary = `Wait for 2 seconds`;
                 await page.waitForTimeout(2000);
                 break;
             }
             case "screenshot": {
                 // Nothing to do as screenshot is taken at each turn
                 console.log(`Action: screenshot`);
+                actionSummary = `Screenshot`;
                 break;
             }
             default:
@@ -146,6 +156,6 @@ async function handleModelAction(page, action) {
     catch (e) {
         console.error("Error handling action", action, ":", e);
     }
-    return actionCode;
+    return { actionSummary, actionCode };
 }
 exports.handleModelAction = handleModelAction;

package/dist/agent/cua/index.d.ts CHANGED Viewed

@@ -1,13 +1,11 @@
 import { Page } from "playwright";
 export declare function startPlaywrightCodegen(page: Page): Promise<void>;
-/**
- * Run the loop that executes computer actions until no 'computer_call' is found.
- */
 export declare function createTestUsingComputerUseAgent({ page, task, }: {
     page: Page;
     task: string;
 }): Promise<{
     code: string;
     importPaths: string[];
+    actionsSummary: string;
 }>;
 //# sourceMappingURL=index.d.ts.map

package/dist/agent/cua/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"~~AACA~~,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;~~AAMlC~~,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED~~;;GAEG;AACH~~,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,GACL,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;~~CACvB~~,CAAC,~~CAkFD~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,GACL,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CA2JD"}

package/dist/agent/cua/index.js CHANGED Viewed

@@ -1,6 +1,12 @@
 "use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.createTestUsingComputerUseAgent = exports.startPlaywrightCodegen = void 0;
+const llm_1 = require("@empiricalrun/llm");
+const crypto_1 = __importDefault(require("crypto"));
+const logger_1 = require("../../bin/logger");
 const utils_1 = require("../browsing/utils");
 const computer_1 = require("./computer");
 const model_1 = require("./model");
@@ -26,16 +32,25 @@ async function startPlaywrightCodegen(page) {
     await page.pause();
 }
 exports.startPlaywrightCodegen = startPlaywrightCodegen;
-/**
- * Run the loop that executes computer actions until no 'computer_call' is found.
- */
 async function createTestUsingComputerUseAgent({ page, task, }) {
-    let generatedCode = "";
     await (0, utils_1.injectPwLocatorGenerator)(page);
     const screenshotBytes = await (0, computer_1.getScreenshot)(page);
     const viewport = page.viewportSize();
     let screenWidth = viewport?.width || 1280;
     let screenHeight = viewport?.height || 720;
+    const logger = new logger_1.CustomLogger({ useReporter: false });
+    const trace = llm_1.langfuseInstance?.trace({
+        name: "computer-use-agent",
+        id: crypto_1.default.randomUUID(),
+        input: { task },
+    });
+    if (trace) {
+        const traceUrl = trace.getTraceUrl();
+        logger.log(`Starting computer use agent: ${traceUrl}`);
+    }
+    const span = trace?.span({
+        name: "initial-model-call",
+    });
     let response = await (0, model_1.callComputerUseModel)({
         input: [
             {
@@ -56,32 +71,63 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
         screenWidth,
         screenHeight,
     });
-    // eslint-disable-next-line no-constant-condition
-    while (true) {
+    span?.end({ output: response });
+    let isTaskDone = false;
+    let maxIterations = 15;
+    let generatedCode = "";
+    let actionsSummary = [];
+    let iterationIndex = 0;
+    while (!isTaskDone && iterationIndex < maxIterations) {
+        actionsSummary.push(`\n# Agent iteration ${iterationIndex}`);
+        iterationIndex++;
+        const iterationSpan = trace?.span({
+            name: `iteration-${iterationIndex}`,
+            input: { response },
+        });
         const computerCalls = response.output.filter((item) => item.type === "computer_call");
         if (computerCalls.length === 0) {
-            console.log("No computer call found. Output from model:");
-            /**
-             * TODO: Sometimes the mdoel will ask for a user confirmation - handle this flow
-             * item.type is "message", status is "completed", item.content.type is "output_text"
-             */
-            response.output.forEach((item) => {
-                console.log(JSON.stringify(item, null, 2));
-            });
-            break; // Exit when no computer calls are issued.
+            const assistantOutput = response.output.find((item) => item.type === "message");
+            if (assistantOutput) {
+                const content = assistantOutput.content.find((item) => item.type === "output_text");
+                if (content && "text" in content) {
+                    // TODO: This ignores `ResponseOutputRefusal` type (refusal from assistant)
+                    actionsSummary.push(`Agent summary: ${content.text}`);
+                }
+            }
+            isTaskDone = true;
+            continue;
+        }
+        const reasoning = response.output.find(() => (item) => item.type === "reasoning");
+        if (reasoning) {
+            const reasoningItem = reasoning;
+            const summaryText = reasoningItem.summary?.find((item) => item.type === "summary_text")?.text;
+            if (summaryText) {
+                actionsSummary.push(`Action reasoning: ${summaryText}`);
+            }
         }
         // We expect at most one computer call per response.
         const computerCall = computerCalls[0];
         const lastCallId = computerCall.call_id;
         const action = computerCall.action;
         const pendingSafetyChecks = computerCall.pending_safety_checks;
-        // Execute the action (function defined in step 3)
-        const actionCode = await (0, computer_1.handleModelAction)(page, action);
-        generatedCode += actionCode;
-        await new Promise((resolve) => setTimeout(resolve, 1000)); // Allow time for changes to take effect.
-        // Take a screenshot after the action (function defined in step 4)
+        // Execute the action and take a screenshot
+        const { actionSummary, actionCode } = await (0, computer_1.handleModelAction)(page, action);
+        actionsSummary.push(`Action executed: ${actionSummary}`);
+        if (actionCode) {
+            actionsSummary.push(`Generated code: ${actionCode}`);
+            generatedCode += actionCode;
+        }
+        else {
+            actionsSummary.push(`No code generated: Will rely on Playwright's ability to auto-wait or auto-scroll`);
+        }
+        // Allow time for changes to take effect.
+        await new Promise((resolve) => setTimeout(resolve, 1000));
         const screenshotBytes = await (0, computer_1.getScreenshot)(page);
         // Send the screenshot back as a computer_call_output
+        const computerCallSpan = iterationSpan?.span({
+            name: "computer-call-output",
+            input: { lastCallId, acknowledged_safety_checks: pendingSafetyChecks },
+        });
         response = await (0, model_1.callComputerUseModel)({
             previousResponseId: response.id,
             input: [
@@ -98,8 +144,17 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
             screenWidth,
             screenHeight,
         });
+        computerCallSpan?.end({ output: response });
+        iterationSpan?.end({ output: response });
+    }
+    if (!isTaskDone) {
+        actionsSummary.push(`Max iteration limit hit: Task not done after ${maxIterations} iterations`);
     }
+    trace?.update({
+        output: { code: generatedCode, actionsSummary: actionsSummary.join("\n") },
+    });
     return {
+        actionsSummary: actionsSummary.join("\n"),
         code: generatedCode,
         // TODO: Does not support skills, so import paths are empty
         importPaths: [],

package/dist/agent/cua/model.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AACA,OAAO,EACL,QAAQ,EACR,aAAa,EACd,MAAM,0CAA0C,CAAC;~~AAQlD~~,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,GACb,EAAE;IACD,KAAK,EAAE,aAAa,CAAC;IACrB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAqBpB"}
1	+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AACA,OAAO,EACL,QAAQ,EACR,aAAa,EACd,MAAM,0CAA0C,CAAC;AAWlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,GACb,EAAE;IACD,KAAK,EAAE,aAAa,CAAC;IACrB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAqBpB"}

package/dist/agent/cua/model.js CHANGED Viewed

@@ -9,11 +9,14 @@ const INSTRUCTIONS = `You will be asked to execute some actions in a browser con
 Don't ask the user for confirmations - just execute the actions.
 For example, if the user message says "Click on Submit button", then
-you click on the submit button -- even if it looks like a scary action.`;
+you click on the submit button -- even if it looks like a scary action.
+If you have been asked to retrieve text or verify something on the UI, then communicate
+that in your responses so that the user can see your thinking process in its entirety.`;
 async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }) {
     const openai = new openai_1.default();
     return await openai.responses.create({
-        model: "computer-use-preview",
+        model: "computer-use-preview-2025-03-11",
         previous_response_id: previousResponseId,
         tools: [
             {

package/dist/bin/index.js CHANGED Viewed

@@ -35,20 +35,31 @@ function setupProcessListeners(cleanup) {
         events.forEach((event) => process.removeListener(event, cleanup));
     };
 }
-async function runChatAgent(modelInput, useDiskForChatState) {
+async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath) {
     const MODEL_MAPPING = {
         "claude-3-7": "claude-3-7-sonnet-20250219",
         "3-7": "claude-3-7-sonnet-20250219",
         "claude-3-5": "claude-3-5-sonnet-20241022",
         "3-5": "claude-3-5-sonnet-20241022",
-        "gemini-2.5-pro-exp-03-25": "gemini-2.5-pro-exp-03-25",
+        "gemini-2.5-pro-preview-03-25": "gemini-2.5-pro-preview-03-25",
     };
     if (modelInput && !MODEL_MAPPING[modelInput]) {
         throw new Error(`Invalid chat model: ${modelInput}`);
     }
+    let initialPromptContent = undefined;
+    if (initialPromptPath) {
+        try {
+            const fs = await import("fs");
+            initialPromptContent = fs.readFileSync(initialPromptPath, "utf-8");
+        }
+        catch (error) {
+            throw new Error(`Failed to read initial prompt file at ${initialPromptPath}: ${error.message}`);
+        }
+    }
     return await (0, chat_1.chatAgent)({
         selectedModel: modelInput ? MODEL_MAPPING[modelInput] : undefined,
         useDiskForChatState,
+        initialPromptContent,
     });
 }
 async function runAgentsWorkflow(testGenConfig, testGenToken) {
@@ -168,6 +179,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
             pwProjectsFilter: testGenConfig.environment?.playwrightProjects,
             testGenToken,
             repoDir: process.cwd(),
+            editFileWithGeneratedCode: true,
         });
         if (isError) {
             throw new Error(error);
@@ -187,7 +199,8 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
         .option("--suites <suites>", "Comma separated list of describe blocks")
         .option("--use-chat", "Use chat agent (and not the workflow)")
         .option("--use-disk-for-chat-state", "Save and load chat state from disk")
-        .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-exp-03-25)")
+        .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-03-25)")
+        .option("--initial-prompt <path>", "Path to an initial prompt file (e.g. prompt.md)")
         .parse(process.argv);
     const options = program.opts();
     const completedOptions = await (0, utils_2.validateAndCompleteCliOptions)(options);
@@ -211,7 +224,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
     // Download the build if repo has a download script
     await (0, test_build_1.downloadBuild)(testGenConfig.build || {});
     if (completedOptions.useChat) {
-        await runChatAgent(completedOptions.chatModel, completedOptions.useDiskForChatState);
+        await runChatAgent(completedOptions.chatModel, completedOptions.useDiskForChatState, completedOptions.initialPrompt);
         return;
     }
     let agentUsed;

package/dist/bin/utils/index.d.ts CHANGED Viewed

@@ -6,7 +6,8 @@ export interface CliOptions {
     suites?: string;
     useChat?: boolean;
     useDiskForChatState?: boolean;
-    chatModel?: "claude-3-7" | "3-7" | "claude-3-5" | "3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-exp-03-25";
+    initialPrompt?: string;
+    chatModel?: "claude-3-7" | "3-7" | "claude-3-5" | "3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
 }
 export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;
 //# sourceMappingURL=index.d.ts.map

package/dist/bin/utils/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,GAC5B,~~0BAA0B~~,CAAC;~~CAChC~~;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;CACpC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}

package/dist/file/client.d.ts CHANGED Viewed

@@ -1,14 +1,15 @@
-declare class TestFileService {
+declare class FileServiceClient {
     baseUrl: string;
     port: number | undefined;
     constructor();
     static isAvailable(): boolean;
-    updateTest({ generatedCode, task, importPaths, }: {
+    updateTest({ generatedCode, task, importPaths, actionsSummary, }: {
         generatedCode: string;
         task: string;
         importPaths: string[];
-    }): Promise<void>;
+        actionsSummary?: string;
+    }): Promise<any>;
     post(path: string, body: any): Promise<any>;
 }
-export default TestFileService;
+export default FileServiceClient;
 //# sourceMappingURL=client.d.ts.map

package/dist/file/client.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/file/client.ts"],"names":[],"mappings":"AAAA,cAAM,~~eAAe~~;~~IACnB~~,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;;~~IASzB~~,MAAM,CAAC,WAAW;IAIZ,UAAU,CAAC,EACf,aAAa,EACb,IAAI,EACJ,WAAW,~~GACZ~~,EAAE;QACD,aAAa,EAAE,MAAM,CAAC;QACtB,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,EAAE,CAAC;~~KACvB~~;~~IAgBK~~,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG;CAgBnC;AAED,eAAe,~~eAAe~~,CAAC"}
1	+ {"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/file/client.ts"],"names":[],"mappings":"AAAA,cAAM,iBAAiB;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;;IAUzB,MAAM,CAAC,WAAW;IAIZ,UAAU,CAAC,EACf,aAAa,EACb,IAAI,EACJ,WAAW,EACX,cAAc,GACf,EAAE;QACD,aAAa,EAAE,MAAM,CAAC;QACtB,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,EAAE,CAAC;QACtB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB;IASK,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG;CAgBnC;AAED,eAAe,iBAAiB,CAAC"}

package/dist/file/client.js CHANGED Viewed

@@ -1,32 +1,25 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-class TestFileService {
+class FileServiceClient {
     baseUrl;
     port;
     constructor() {
-        const port = Number(process.env.APP_PORT);
+        const port = Number(process.env.IPC_FILE_SERVICE_PORT);
         if (port && !isNaN(port)) {
             this.port = port;
         }
         this.baseUrl = `http://localhost:${port}`;
     }
     static isAvailable() {
-        return !!Number(process.env.APP_PORT);
+        return !!Number(process.env.IPC_FILE_SERVICE_PORT);
     }
-    async updateTest({ generatedCode, task, importPaths, }) {
-        const resp = await fetch(`${this.baseUrl}/test`, {
-            method: "POST",
-            headers: {
-                "Content-Type": "application/json",
-            },
-            body: JSON.stringify({ generatedCode, task, importPaths }),
+    async updateTest({ generatedCode, task, importPaths, actionsSummary, }) {
+        return this.post("/test", {
+            generatedCode,
+            task,
+            importPaths,
+            actionsSummary,
         });
-        if (!resp.ok) {
-            throw new Error(resp.statusText);
-        }
-        else {
-            console.log("Generated and updated test successfully");
-        }
     }
     async post(path, body) {
         const resp = await fetch(`${this.baseUrl}${path}`, {
@@ -45,4 +38,4 @@ class TestFileService {
         }
     }
 }
-exports.default = TestFileService;
+exports.default = FileServiceClient;

package/dist/file/server.d.ts CHANGED Viewed

@@ -1,12 +1,16 @@
-export declare class FileService {
+export declare class FileServiceServer {
     private port;
     private filePath;
     private repoDir;
     private server;
-    constructor({ port, repoDir }: {
+    private actionsSummary;
+    private updateFile;
+    constructor({ port, repoDir, updateFile, }: {
         port: number;
         repoDir: string;
+        updateFile: boolean;
     });
+    getActionsSummary(): string | undefined;
     setFilePath(filePath: string): void;
     startFileService(): Promise<number>;
     stop(): Promise<void>;

package/dist/file/server.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/file/server.ts"],"names":[],"mappings":"AAWA,qBAAa,~~WAAW~~;~~IACtB~~,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,QAAQ,CAAc;IAC9B,OAAO,CAAC,OAAO,CAAc;IAC7B,OAAO,CAAC,MAAM,CAA4C;~~gBAE9C~~,~~EAAE~~,IAAI,~~EAAE~~,OAAO,~~EAAE~~,EAAE;~~QAAE~~,IAAI,EAAE,MAAM,CAAC;~~QAAC~~,OAAO,EAAE,MAAM,~~CAAA~~;~~KAAE~~;~~IAKhE~~,WAAW,CAAC,QAAQ,EAAE,MAAM;IAItB,gBAAgB,IAAI,OAAO,CAAC,MAAM,CAAC;~~IA0CnC~~,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAgB5B;AAED,wBAAsB,gBAAgB,kBAAK"}
1	+ {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/file/server.ts"],"names":[],"mappings":"AAWA,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,QAAQ,CAAc;IAC9B,OAAO,CAAC,OAAO,CAAc;IAC7B,OAAO,CAAC,MAAM,CAA4C;IAC1D,OAAO,CAAC,cAAc,CAAqB;IAC3C,OAAO,CAAC,UAAU,CAAkB;gBAExB,EACV,IAAI,EACJ,OAAO,EACP,UAAU,GACX,EAAE;QACD,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,EAAE,OAAO,CAAC;KACrB;IAMD,iBAAiB;IAIjB,WAAW,CAAC,QAAQ,EAAE,MAAM;IAItB,gBAAgB,IAAI,OAAO,CAAC,MAAM,CAAC;IA+CnC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAgB5B;AAED,wBAAsB,gBAAgB,kBAAK"}

package/dist/file/server.js CHANGED Viewed

@@ -3,20 +3,26 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.startFileService = exports.FileService = void 0;
+exports.startFileService = exports.FileServiceServer = void 0;
 const express_1 = __importDefault(require("express"));
 const fs_1 = __importDefault(require("fs"));
 const path_1 = __importDefault(require("path"));
 const web_1 = require("../bin/utils/platform/web");
 const ipc_1 = require("../human-in-the-loop/ipc");
-class FileService {
+class FileServiceServer {
     port = 0;
     filePath = "";
     repoDir = "";
     server;
-    constructor({ port, repoDir }) {
+    actionsSummary;
+    updateFile = false;
+    constructor({ port, repoDir, updateFile, }) {
         this.port = port;
         this.repoDir = repoDir;
+        this.updateFile = updateFile;
+    }
+    getActionsSummary() {
+        return this.actionsSummary;
     }
     setFilePath(filePath) {
         this.filePath = filePath;
@@ -26,7 +32,11 @@ class FileService {
         app.use(express_1.default.json());
         (0, ipc_1.humanLoopRoute)(app);
         app.post("/test", async (req, res) => {
-            const { generatedCode, importPaths } = req.body;
+            const { generatedCode, importPaths, actionsSummary } = req.body;
+            this.actionsSummary = actionsSummary;
+            if (!this.updateFile) {
+                return res.send({ success: true });
+            }
             try {
                 const testFilePath = path_1.default.resolve(this.repoDir, this.filePath);
                 if (testFilePath) {
@@ -64,6 +74,6 @@ class FileService {
         });
     }
 }
-exports.FileService = FileService;
+exports.FileServiceServer = FileServiceServer;
 async function startFileService() { }
 exports.startFileService = startFileService;

package/dist/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;~~AAepC~~,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,~~iBAsD3E~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAqBpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAyD3E"}

package/dist/index.js CHANGED Viewed

@@ -40,7 +40,7 @@ async function createTest(task, page, scope) {
                 projectRepoName: testGenConfig.options?.metadata.projectRepoName,
             });
         }
-        const fileService = new client_1.default();
+        const fileServiceClient = new client_1.default();
         const useComputerUseAgent = testGenConfig.options?.useComputerUseAgent;
         let agentResult;
         if (useComputerUseAgent) {
@@ -61,11 +61,12 @@ async function createTest(task, page, scope) {
                 scopeVars: scope,
             });
         }
-        const { code, importPaths } = agentResult;
-        await fileService.updateTest({
+        const { code, importPaths, actionsSummary } = agentResult;
+        await fileServiceClient.updateTest({
             task,
             generatedCode: code,
             importPaths,
+            actionsSummary,
         });
         // skip the rest of the test once generation is over
         await (0, pw_test_1.skipTest)();

package/dist/tools/codegen-agent.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
-import type { Tool } from "./types";
+import type { Tool } from "@empiricalrun/llm/chat";
 export declare const codegenTool: Tool;
 //# sourceMappingURL=codegen-agent.d.ts.map

package/dist/tools/codegen-agent.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"codegen-agent.d.ts","sourceRoot":"","sources":["../../src/tools/codegen-agent.ts"],"names":[],"mappings":"~~AAIA~~,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,~~SAAS~~,CAAC;~~AAepC~~,eAAO,MAAM,WAAW,EAAE,IAyBzB,CAAC"}
1	+ {"version":3,"file":"codegen-agent.d.ts","sourceRoot":"","sources":["../../src/tools/codegen-agent.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAmBnD,eAAO,MAAM,WAAW,EAAE,IAyBzB,CAAC"}

package/dist/tools/diagnosis-fetcher.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
-import type { Tool } from "./types";
+import type { Tool } from "@empiricalrun/llm/chat";
 export declare const diagnosisTool: Tool;
 //# sourceMappingURL=diagnosis-fetcher.d.ts.map

package/dist/tools/diagnosis-fetcher.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"diagnosis-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/diagnosis-fetcher.ts"],"names":[],"mappings":"~~AAIA~~,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,~~SAAS~~,CAAC;~~AAUpC~~,eAAO,MAAM,aAAa,EAAE,IAgF3B,CAAC"}
1	+ {"version":3,"file":"diagnosis-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/diagnosis-fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAanD,eAAO,MAAM,aAAa,EAAE,IAgF3B,CAAC"}

package/dist/tools/grep.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
-import { Tool } from "./types";
+import type { Tool } from "@empiricalrun/llm/chat";
 export declare const grepTool: Tool;
 //# sourceMappingURL=grep.d.ts.map

package/dist/tools/grep.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"grep.d.ts","sourceRoot":"","sources":["../../src/tools/grep.ts"],"names":[],"mappings":"~~AAKA~~,OAAO,EAAE,IAAI,EAAc,MAAM,~~SAAS~~,CAAC;~~AAgB3C~~,eAAO,MAAM,QAAQ,EAAE,IA+CtB,CAAC"}
1	+ {"version":3,"file":"grep.d.ts","sourceRoot":"","sources":["../../src/tools/grep.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAc,MAAM,wBAAwB,CAAC;AAqB/D,eAAO,MAAM,QAAQ,EAAE,IA+CtB,CAAC"}

package/dist/tools/test-gen-browser.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
-import type { Tool } from "./types";
+import type { Tool } from "@empiricalrun/llm/chat";
 export declare const generateTestWithBrowserAgent: Tool;
 //# sourceMappingURL=test-gen-browser.d.ts.map

package/dist/tools/test-gen-browser.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"~~AAWA~~,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,~~SAAS~~,CAAC;~~AAmDpC~~,eAAO,MAAM,4BAA4B,EAAE,~~IAuE1C~~,CAAC"}
1	+ {"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AA0DnD,eAAO,MAAM,4BAA4B,EAAE,IA0E1C,CAAC"}

package/dist/tools/test-gen-browser.js CHANGED Viewed

@@ -1,12 +1,15 @@
 "use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.generateTestWithBrowserAgent = void 0;
+const promises_1 = __importDefault(require("fs/promises"));
 const zod_1 = require("zod");
 const run_1 = require("../agent/browsing/run");
 const utils_1 = require("../agent/browsing/utils");
 const web_1 = require("../bin/utils/platform/web");
 const scenarios_1 = require("../bin/utils/scenarios");
-const git_1 = require("../utils/git");
 const BrowserAgentSchema = zod_1.z.object({
     testName: zod_1.z.string().describe("The name of the test to create or modify"),
     testSuites: zod_1.z
@@ -27,10 +30,10 @@ locator/selector for an element on the page.
 IMPORTANT: Before you invoke this tool, you need to ensure that the test code is correctly prepared for this
 agent. Preparation involves adding a TODO comment that describes the change that needs to be made. A good
-comment calls out the element and browser interactions sto take on them. The TODO comment also has (agent) next to it, to
+comment calls out the element and browser interactions it must take. The TODO comment also has (agent) next to it, to
 clearly label that the change is for the agent to make.
-For example: This is a good TODO comment
+For example, this is a good TODO comment:
 \`\`\`
 test("Example test code", async ({ page }) => {
@@ -39,16 +42,12 @@ test("Example test code", async ({ page }) => {
 });
 \`\`\`
-The browser agent will execute the steps before the TODO comment and replace the TODO comment with the Playwright
-code that performs the actions described in the comment. For instance, on the running the tool, the agent will
-output the following final code:
+For the above file, the browser environment will execute the steps before the TODO comment and hand-over the control
+to the browser agent. The agent will do the actions described in the TODO comment and then resume control back to the
+test code.
-\`\`\`
-test("Example test code", async ({ page }) => {
-  await page.goto("https://example.com");
-  await page.getByRole("button", { name: "Login" }).click();
-});
-\`\`\`
+The browser agent will return a summary of actions that it took, and the generated Playwright code for them. You can
+then use the text editor tool to replace the TODO comment with the generated Playwright code.
 `;
 exports.generateTestWithBrowserAgent = {
     schema: {
@@ -72,6 +71,7 @@ exports.generateTestWithBrowserAgent = {
                 result: `Test block not found for test name: "${testName}" in file: "${fileName}" with describe blocks: "${testSuites.join(", ")}"`,
             };
         }
+        const fileBackup = await promises_1.default.readFile(fileName, "utf-8");
         try {
             await (0, utils_1.replaceTodoWithCreateTest)({
                 testCaseName: testName,
@@ -85,7 +85,7 @@ exports.generateTestWithBrowserAgent = {
                 result: `Error running tool: ${error}`,
             };
         }
-        const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
+        const toolResult = await (0, run_1.generateTestsUsingMasterAgent)({
             testFilePath: fileName,
             filePathToUpdate: fileName,
             pwProjectsFilter: [project],
@@ -97,25 +97,27 @@ exports.generateTestWithBrowserAgent = {
                 useComputerUseAgent: true,
             }),
             repoDir: process.cwd(),
+            editFileWithGeneratedCode: false,
         });
+        // Undo the TODO -> createTest change
+        await promises_1.default.writeFile(fileName, fileBackup, "utf-8");
+        const { isError, error, actionsSummary } = toolResult;
         if (!isError) {
-            const gitPatch = (0, git_1.getGitDiff)(fileName);
             return {
                 isError,
-                result: `Test was generated successfully. Here is the git patch:
-\`\`\`
-${gitPatch}
-\`\`\`
+                result: `Browser agent has finished running. Here is the summary of actions it took
+and the generated Playwright code:
+${actionsSummary}
 `,
             };
         }
         else {
             return {
                 isError,
-                result: `Test was not generated successfully. Here is the error:
-\`\`\`
+                result: `Browser agent failed to run successfully. Here is the error:
 ${error}
-\`\`\`
 `,
             };
         }

package/dist/tools/test-run-fetcher/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { Tool } from "../types";
+import type { Tool } from "@empiricalrun/llm/chat";
 export declare function extractPathAfterSourceRepo(fullPath: string): string;
 export declare const testRunTool: Tool;
 //# sourceMappingURL=index.d.ts.map

package/dist/tools/test-run-fetcher/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/test-run-fetcher/index.ts"],"names":[],"mappings":"~~AAEA~~,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,~~UAAU~~,CAAC;~~AAWrC~~,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAOnE;AAED,eAAO,MAAM,WAAW,EAAE,IA4HzB,CAAC"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/test-run-fetcher/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAanD,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAOnE;AAED,eAAO,MAAM,WAAW,EAAE,IA4HzB,CAAC"}

package/dist/tools/test-run.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
-import type { Tool } from "./types";
+import type { Tool } from "@empiricalrun/llm/chat";
 export declare const runTestTool: Tool;
 //# sourceMappingURL=test-run.d.ts.map

package/dist/tools/test-run.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"~~AAGA~~,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,~~SAAS~~,CAAC;~~AAsBpC~~,eAAO,MAAM,WAAW,EAAE,IA8BzB,CAAC"}
1	+ {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAuBnD,eAAO,MAAM,WAAW,EAAE,IA8BzB,CAAC"}

package/dist/tools/test-run.js CHANGED Viewed

@@ -15,8 +15,7 @@ const RunTestSchema = zod_1.z.object({
     headed: zod_1.z
         .boolean()
         .describe("Whether to run the test in headed mode (default is false, which is headless)")
-        .optional()
-        .default(false),
+        .optional(),
 });
 exports.runTestTool = {
     schema: {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.52.1",
+  "version": "0.52.3",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"
@@ -75,7 +75,7 @@
     "tsx": "^4.16.2",
     "typescript": "^5.3.3",
     "zod": "^3.23.8",
-    "@empiricalrun/llm": "^0.12.0",
+    "@empiricalrun/llm": "^0.13.1",
     "@empiricalrun/r2-uploader": "^0.3.8",
     "@empiricalrun/test-run": "^0.7.6"
   },

package/dist/tools/types.d.ts DELETED Viewed

@@ -1,38 +0,0 @@
-import { z } from "zod";
-/**
- * Base schema for all tools. Each tool should extend this with their specific parameters.
- */
-export declare const BaseToolSchema: z.ZodObject<{
-    name: z.ZodString;
-    description: z.ZodString;
-    parameters: z.ZodObject<{}, "passthrough", z.ZodTypeAny, z.objectOutputType<{}, z.ZodTypeAny, "passthrough">, z.objectInputType<{}, z.ZodTypeAny, "passthrough">>;
-}, "strip", z.ZodTypeAny, {
-    name: string;
-    description: string;
-    parameters: {} & {
-        [k: string]: unknown;
-    };
-}, {
-    name: string;
-    description: string;
-    parameters: {} & {
-        [k: string]: unknown;
-    };
-}>;
-export type ToolSchema = z.infer<typeof BaseToolSchema>;
-/**
- * Interface for creating a tool with its schema and execute function
- */
-export interface Tool {
-    schema: {
-        name: string;
-        description: string;
-        parameters: z.ZodType;
-    };
-    execute: (input: any) => Promise<ToolResult>;
-}
-export interface ToolResult {
-    isError: boolean;
-    result: string;
-}
-//# sourceMappingURL=types.d.ts.map

package/dist/tools/types.d.ts.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/tools/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB;;GAEG;AACH,eAAO,MAAM,cAAc;;;;;;;;;;;;;;;;EAIzB,CAAC;AAEH,MAAM,MAAM,UAAU,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,cAAc,CAAC,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,IAAI;IACnB,MAAM,EAAE;QACN,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,CAAC,CAAC,OAAO,CAAC;KACvB,CAAC;IACF,OAAO,EAAE,CAAC,KAAK,EAAE,GAAG,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CAC9C;AAED,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;CAChB"}

package/dist/tools/types.js DELETED Viewed

@@ -1,12 +0,0 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.BaseToolSchema = void 0;
-const zod_1 = require("zod");
-/**
- * Base schema for all tools. Each tool should extend this with their specific parameters.
- */
-exports.BaseToolSchema = zod_1.z.object({
-    name: zod_1.z.string(),
-    description: zod_1.z.string(),
-    parameters: zod_1.z.object({}).passthrough(),
-});

package/dist/tools/zod-schema.d.ts DELETED Viewed

@@ -1,19 +0,0 @@
-import type OpenAI from "openai";
-import { z } from "zod";
-/**
- * Convert a tool schema to OpenAI tool format
- */
-export declare function zodToOpenAITool(schema: {
-    name: string;
-    description: string;
-    parameters: z.ZodType;
-}): OpenAI.Chat.Completions.ChatCompletionTool;
-/**
- * Convert Zod schema to JSON Schema
- */
-export declare function zodToJsonSchema(schema: z.ZodType): any;
-/**
- * Convert specific Zod type to JSON Schema
- */
-export declare function zodTypeToJsonSchema(zodType: z.ZodType): any;
-//# sourceMappingURL=zod-schema.d.ts.map

package/dist/tools/zod-schema.d.ts.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"zod-schema.d.ts","sourceRoot":"","sources":["../../src/tools/zod-schema.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,MAAM,QAAQ,CAAC;AACjC,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB;;GAEG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE;IACtC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,CAAC,CAAC,OAAO,CAAC;CACvB,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAS7C;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,CAAC,CAAC,OAAO,GAAG,GAAG,CAuBtD;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,CAAC,CAAC,OAAO,GAAG,GAAG,CAoD3D"}

package/dist/tools/zod-schema.js DELETED Viewed

@@ -1,95 +0,0 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.zodTypeToJsonSchema = exports.zodToJsonSchema = exports.zodToOpenAITool = void 0;
-const zod_1 = require("zod");
-/**
- * Convert a tool schema to OpenAI tool format
- */
-function zodToOpenAITool(schema) {
-    return {
-        type: "function",
-        function: {
-            name: schema.name,
-            description: schema.description,
-            parameters: zodToJsonSchema(schema.parameters),
-        },
-    };
-}
-exports.zodToOpenAITool = zodToOpenAITool;
-/**
- * Convert Zod schema to JSON Schema
- */
-function zodToJsonSchema(schema) {
-    if (schema instanceof zod_1.z.ZodObject) {
-        const shape = schema._def.shape();
-        const properties = {};
-        const required = [];
-        Object.entries(shape).forEach(([key, value]) => {
-            properties[key] = zodTypeToJsonSchema(value);
-            // Check if this field is required
-            if (!(value instanceof zod_1.z.ZodOptional)) {
-                required.push(key);
-            }
-        });
-        return {
-            type: "object",
-            properties,
-            ...(required.length > 0 ? { required } : {}),
-        };
-    }
-    return { type: "string" }; // Fallback
-}
-exports.zodToJsonSchema = zodToJsonSchema;
-/**
- * Convert specific Zod type to JSON Schema
- */
-function zodTypeToJsonSchema(zodType) {
-    // Handle string types
-    if (zodType instanceof zod_1.z.ZodString) {
-        const schema = { type: "string" };
-        if (zodType.description)
-            schema.description = zodType.description;
-        return schema;
-    }
-    // Handle number types
-    if (zodType instanceof zod_1.z.ZodNumber) {
-        const schema = { type: "number" };
-        if (zodType.description)
-            schema.description = zodType.description;
-        return schema;
-    }
-    // Handle boolean
-    if (zodType instanceof zod_1.z.ZodBoolean) {
-        const schema = { type: "boolean" };
-        if (zodType.description)
-            schema.description = zodType.description;
-        return schema;
-    }
-    // Handle arrays
-    if (zodType instanceof zod_1.z.ZodArray) {
-        return {
-            type: "array",
-            items: zodTypeToJsonSchema(zodType._def.type),
-            ...(zodType.description ? { description: zodType.description } : {}),
-        };
-    }
-    // Handle objects
-    if (zodType instanceof zod_1.z.ZodObject) {
-        return zodToJsonSchema(zodType);
-    }
-    // Handle enums
-    if (zodType instanceof zod_1.z.ZodEnum) {
-        return {
-            type: "string",
-            enum: zodType._def.values,
-            ...(zodType.description ? { description: zodType.description } : {}),
-        };
-    }
-    // Handle optional types
-    if (zodType instanceof zod_1.z.ZodOptional) {
-        return zodTypeToJsonSchema(zodType._def.innerType);
-    }
-    // Default fallback
-    return { type: "string" };
-}
-exports.zodTypeToJsonSchema = zodTypeToJsonSchema;