npm - @empiricalrun/test-gen - Versions diffs - 0.53.2 → 0.53.4 - Mend

@empiricalrun/test-gen 0.53.2 → 0.53.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +16 -0
package/dist/agent/browsing/utils.d.ts.map +1 -1
package/dist/agent/browsing/utils.js +6 -5
package/dist/agent/chat/index.d.ts.map +1 -1
package/dist/agent/chat/index.js +2 -0
package/dist/agent/cua/computer.d.ts +4 -0
package/dist/agent/cua/computer.d.ts.map +1 -1
package/dist/agent/cua/computer.js +9 -1
package/dist/agent/cua/index.d.ts.map +1 -1
package/dist/agent/cua/index.js +60 -30
package/dist/agent/cua/model.d.ts +2 -2
package/dist/agent/cua/model.d.ts.map +1 -1
package/dist/agent/cua/model.js +19 -0
package/dist/bin/utils/index.js +1 -1
package/dist/tools/commit-and-create-pr.d.ts +3 -0
package/dist/tools/commit-and-create-pr.d.ts.map +1 -0
package/dist/tools/commit-and-create-pr.js +102 -0
package/dist/tools/diagnosis-fetcher.d.ts.map +1 -1
package/dist/tools/diagnosis-fetcher.js +13 -10
package/dist/tools/test-gen-browser.d.ts.map +1 -1
package/dist/tools/test-gen-browser.js +11 -5
package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
package/dist/tools/test-run-fetcher/index.js +13 -11
package/dist/tools/utils/index.d.ts +11 -0
package/dist/tools/utils/index.d.ts.map +1 -0
package/dist/tools/utils/index.js +36 -0
package/package.json +2 -2

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,21 @@
 # @empiricalrun/test-gen
+## 0.53.4
+### Patch Changes
+- 1426372: fix: remove stray console.log
+- 7efc3dc: feat: add page.goto to cua implementation + prompt edits
+- Updated dependencies [7efc3dc]
+  - @empiricalrun/llm@0.14.3
+## 0.53.3
+### Patch Changes
+- 094b9f7: feat: add tool call for commit and push changes from chat agent
+- cc64ff1: feat: enable browser tool call to pick the right page to interact with
 ## 0.53.2
 ### Patch Changes

package/dist/agent/browsing/utils.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,~~iBA0BA~~;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD,wBAAsB,oBAAoB,CACxC,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,EAAE,CAAC,CAQnB;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAoBb,SAAS;CAKjB"}
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,iBA2BA;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD,wBAAsB,oBAAoB,CACxC,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,EAAE,CAAC,CAQnB;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAoBb,SAAS;CAKjB"}

package/dist/agent/browsing/utils.js CHANGED Viewed

@@ -90,15 +90,16 @@ async function prepareFileForUpdateScenario({ testCase, specPath, trace, }) {
 }
 async function replaceTodoWithCreateTest({ testFilePath, testCaseName, testCaseSuites, }) {
     // This method is an alternative to prepareFileForUpdateScenario
-    // TODO: Does not support multiple pages, scoped variables, updates in POM files
+    // TODO: Does not support scoped variables and updates in POM files
     const fileContent = await fs_extra_1.default.readFile(testFilePath, "utf-8");
-    const todoRegex = /\/\/ TODO\(agent\): (.*)/;
+    const todoRegex = /\/\/ TODO\(agent(?:\s+on\s+(\w+))?\):\s*(.*)/;
     const todoMatch = fileContent.match(todoRegex);
     if (!todoMatch) {
-        throw new Error(`No "// TODO(agent):" comment found in file: ${testFilePath}`);
+        throw new Error(`No "// TODO(agent):" or "// TODO(agent on pageName):" comment found in file: ${testFilePath}`);
     }
-    // TODO: figure out correct page variable name
-    await fs_extra_1.default.writeFile(testFilePath, fileContent.replace(todoRegex, (_, todoText) => `await createTest("${todoText.replace(/"/g, '\\"')}", page);`));
+    const [, pageVarName] = todoMatch;
+    const pageVariable = pageVarName || "page"; // Default to "page" if not specified
+    await fs_extra_1.default.writeFile(testFilePath, fileContent.replace(todoRegex, (_, __, todoText) => `await createTest("${todoText.replace(/"/g, '\\"')}", ${pageVariable});`));
     await addImportForCreateTest(testFilePath);
     await markTestAsOnly({
         testCaseName,

package/dist/agent/chat/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"~~AAyFA~~,wBAAsB,SAAS,CAAC,EAC9B,aAA4C,EAC5C,mBAA2B,EAC3B,oBAAoB,GACrB,EAAE;IACD,aAAa,CAAC,EACV,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;IACnC,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,oBAAoB,CAAC,EAAE,MAAM,CAAC;CAC/B,mBAyFA"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AA2FA,wBAAsB,SAAS,CAAC,EAC9B,aAA4C,EAC5C,mBAA2B,EAC3B,oBAAoB,GACrB,EAAE;IACD,aAAa,CAAC,EACV,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;IACnC,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,oBAAoB,CAAC,EAAE,MAAM,CAAC;CAC/B,mBAyFA"}

package/dist/agent/chat/index.js CHANGED Viewed

@@ -5,6 +5,7 @@ const chat_1 = require("@empiricalrun/llm/chat");
 const picocolors_1 = require("picocolors");
 const web_1 = require("../../bin/utils/platform/web");
 const human_in_the_loop_1 = require("../../human-in-the-loop");
+const commit_and_create_pr_1 = require("../../tools/commit-and-create-pr");
 const diagnosis_fetcher_1 = require("../../tools/diagnosis-fetcher");
 const grep_1 = require("../../tools/grep");
 const test_gen_browser_1 = require("../../tools/test-gen-browser");
@@ -18,6 +19,7 @@ function getTools(selectedModel) {
         test_run_fetcher_1.fetchTestRunReportTool,
         diagnosis_fetcher_1.fetchDiagnosisReportTool,
         test_gen_browser_1.generateTestWithBrowserAgent,
+        commit_and_create_pr_1.commitAndPushChangesTool,
     ];
     if (selectedModel.startsWith("gemini")) {
         // Claude will have its own built-in text editor tools

package/dist/agent/cua/computer.d.ts CHANGED Viewed

@@ -2,6 +2,10 @@ import { ResponseComputerToolCall } from "openai/resources/responses/responses.m
 import type { Page } from "playwright";
 type ComputerAction = ResponseComputerToolCall.Click | ResponseComputerToolCall.DoubleClick | ResponseComputerToolCall.Drag | ResponseComputerToolCall.Keypress | ResponseComputerToolCall.Move | ResponseComputerToolCall.Screenshot | ResponseComputerToolCall.Scroll | ResponseComputerToolCall.Type | ResponseComputerToolCall.Wait;
 export declare function getScreenshot(page: Page): Promise<string>;
+export declare function handlePageGoto(page: Page, url: string): Promise<{
+    actionSummary: string;
+    actionCode: string;
+}>;
 export declare function handleModelAction(page: Page, action: ComputerAction): Promise<{
     actionSummary: string;
     actionCode: string;

package/dist/agent/cua/computer.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"computer.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/computer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,wBAAwB,EAAE,MAAM,0CAA0C,CAAC;AACpF,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAEvC,KAAK,cAAc,GACf,wBAAwB,CAAC,KAAK,GAC9B,wBAAwB,CAAC,WAAW,GACpC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,QAAQ,GACjC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,UAAU,GACnC,wBAAwB,CAAC,MAAM,GAC/B,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,IAAI,CAAC;AAElC,wBAAsB,aAAa,CAAC,IAAI,EAAE,IAAI,mBAG7C;AAgCD,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC;IACT,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAqID"}
1	+ {"version":3,"file":"computer.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/computer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,wBAAwB,EAAE,MAAM,0CAA0C,CAAC;AACpF,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAEvC,KAAK,cAAc,GACf,wBAAwB,CAAC,KAAK,GAC9B,wBAAwB,CAAC,WAAW,GACpC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,QAAQ,GACjC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,UAAU,GACnC,wBAAwB,CAAC,MAAM,GAC/B,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,IAAI,CAAC;AAElC,wBAAsB,aAAa,CAAC,IAAI,EAAE,IAAI,mBAG7C;AAgCD,wBAAsB,cAAc,CAClC,IAAI,EAAE,IAAI,EACV,GAAG,EAAE,MAAM,GACV,OAAO,CAAC;IACT,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAMD;AAED,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC;IACT,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAqID"}

package/dist/agent/cua/computer.js CHANGED Viewed

@@ -1,6 +1,6 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.handleModelAction = exports.getScreenshot = void 0;
+exports.handleModelAction = exports.handlePageGoto = exports.getScreenshot = void 0;
 async function getScreenshot(page) {
     const screenshotBytes = await page.screenshot();
     return Buffer.from(screenshotBytes).toString("base64");
@@ -35,6 +35,14 @@ const CUA_KEY_TO_PLAYWRIGHT_KEY = {
     tab: "Tab",
     win: "Meta",
 };
+async function handlePageGoto(page, url) {
+    await page.goto(url);
+    return {
+        actionSummary: `Navigated page to ${url}`,
+        actionCode: `await page.goto("${url}");\n`,
+    };
+}
+exports.handlePageGoto = handlePageGoto;
 async function handleModelAction(page, action) {
     const actionType = action.type;
     let actionCode = "";

package/dist/agent/cua/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"~~AAOA~~,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,GACL,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,~~CA2JD~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AASA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,GACL,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CAmMD"}

package/dist/agent/cua/index.js CHANGED Viewed

@@ -58,7 +58,7 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
                 content: [
                     {
                         type: "input_text",
-                        text: task,
+                        text: `Task to execute: ${task}\n\nCurrent page URL: ${page.url()}`,
                     },
                     {
                         type: "input_image",
@@ -85,7 +85,8 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
             input: { response },
         });
         const computerCalls = response.output.filter((item) => item.type === "computer_call");
-        if (computerCalls.length === 0) {
+        const functionCalls = response.output.filter((item) => item.type === "function_call");
+        if (computerCalls.length === 0 && functionCalls.length === 0) {
             const assistantOutput = response.output.find((item) => item.type === "message");
             if (assistantOutput) {
                 const content = assistantOutput.content.find((item) => item.type === "output_text");
@@ -105,46 +106,75 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
                 actionsSummary.push(`Action reasoning: ${summaryText}`);
             }
         }
-        // We expect at most one computer call per response.
-        const computerCall = computerCalls[0];
-        const lastCallId = computerCall.call_id;
-        const action = computerCall.action;
-        const pendingSafetyChecks = computerCall.pending_safety_checks;
-        // Execute the action and take a screenshot
-        const { actionSummary, actionCode } = await (0, computer_1.handleModelAction)(page, action);
-        actionsSummary.push(`Action executed: ${actionSummary}`);
-        if (actionCode) {
-            actionsSummary.push(`Generated code: ${actionCode}`);
-            generatedCode += actionCode;
+        // We expect either a function call or a computer call in the response.
+        let toolCallOutput;
+        let executedActionSummary = "";
+        // We are assuming only one function call per response
+        const functionCall = functionCalls[0];
+        if (functionCall) {
+            const args = JSON.parse(functionCall.arguments);
+            const { actionSummary, actionCode } = await (0, computer_1.handlePageGoto)(page, args.url);
+            executedActionSummary = actionSummary;
+            actionsSummary.push(`Action executed: ${actionSummary}`);
+            if (actionCode) {
+                actionsSummary.push(`Generated code: ${actionCode}`);
+                generatedCode += actionCode;
+            }
+            toolCallOutput = {
+                type: "function_call_output",
+                call_id: functionCall.call_id,
+                output: `Navigating page to ${args.url}`,
+            };
+        }
+        else if (computerCalls.length >= 1) {
+            // We expect at most one computer call per response.
+            const computerCall = computerCalls[0];
+            const action = computerCall.action;
+            // Execute the action and take a screenshot
+            const { actionSummary, actionCode } = await (0, computer_1.handleModelAction)(page, action);
+            executedActionSummary = actionSummary;
+            actionsSummary.push(`Action executed: ${actionSummary}`);
+            if (actionCode) {
+                actionsSummary.push(`Generated code: ${actionCode}`);
+                generatedCode += actionCode;
+            }
+            else {
+                actionsSummary.push(`No code generated: Will rely on Playwright's ability to auto-wait or auto-scroll`);
+            }
+            // Allow time for changes to take effect.
+            await new Promise((resolve) => setTimeout(resolve, 1000));
+            const screenshotBytes = await (0, computer_1.getScreenshot)(page);
+            // Populate toolCallOutput
+            toolCallOutput = {
+                type: "computer_call_output",
+                call_id: computerCall.call_id,
+                output: {
+                    type: "computer_screenshot",
+                    image_url: `data:image/png;base64,${screenshotBytes}`,
+                },
+                acknowledged_safety_checks: computerCall.pending_safety_checks,
+            };
         }
         else {
-            actionsSummary.push(`No code generated: Will rely on Playwright's ability to auto-wait or auto-scroll`);
+            throw new Error("No tool call found in response.");
         }
-        // Allow time for changes to take effect.
-        await new Promise((resolve) => setTimeout(resolve, 1000));
-        const screenshotBytes = await (0, computer_1.getScreenshot)(page);
-        // Send the screenshot back as a computer_call_output
-        const computerCallSpan = iterationSpan?.span({
-            name: "computer-call-output",
-            input: { lastCallId, acknowledged_safety_checks: pendingSafetyChecks },
-        });
         response = await (0, model_1.callComputerUseModel)({
             previousResponseId: response.id,
             input: [
+                toolCallOutput,
                 {
-                    call_id: lastCallId,
-                    type: "computer_call_output",
-                    output: {
-                        type: "computer_screenshot",
-                        image_url: `data:image/png;base64,${screenshotBytes}`,
-                    },
-                    acknowledged_safety_checks: pendingSafetyChecks,
+                    role: "user",
+                    content: [
+                        {
+                            type: "input_text",
+                            text: `Action executed: ${executedActionSummary || "None"}\nCurrent page URL: ${page.url()}`,
+                        },
+                    ],
                 },
             ],
             screenWidth,
             screenHeight,
         });
-        computerCallSpan?.end({ output: response });
         iterationSpan?.end({ output: response });
     }
     if (!isTaskDone) {

package/dist/agent/cua/model.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
-import { Response, ResponseInput } from "openai/resources/responses/responses.mjs";
+import { Response, ResponseInputItem } from "openai/resources/responses/responses.mjs";
 export declare function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }: {
-    input: ResponseInput;
+    input: ResponseInputItem[];
     previousResponseId?: string;
     screenWidth: number;
     screenHeight: number;

package/dist/agent/cua/model.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AACA,OAAO,~~EACL~~,QAAQ,EACR,~~aAAa~~,~~EACd~~,MAAM,0CAA0C,CAAC;~~AAWlD~~,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,GACb,EAAE;IACD,KAAK,EAAE,~~aAAa~~,CAAC;~~IACrB~~,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,~~CAqBpB~~"}
1	+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}

package/dist/agent/cua/model.js CHANGED Viewed

@@ -13,12 +13,31 @@ you click on the submit button -- even if it looks like a scary action.
 If you have been asked to retrieve text or verify something on the UI, then communicate
 that in your responses so that the user can see your thinking process in its entirety.`;
+const pageGotoTool = {
+    type: "function",
+    name: "page_goto",
+    description: "Navigate to a given URL (e.g. https://www.openai.com). Call this if you are looking at a blank page or a new page.",
+    parameters: {
+        type: "object",
+        properties: {
+            url: {
+                type: "string",
+                description: "The URL to navigate to",
+            },
+        },
+        additionalProperties: false,
+        required: ["url"],
+    },
+    strict: true,
+};
 async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }) {
     const openai = new openai_1.default();
     return await openai.responses.create({
         model: "computer-use-preview-2025-03-11",
         previous_response_id: previousResponseId,
+        parallel_tool_calls: false,
         tools: [
+            pageGotoTool,
             {
                 type: "computer-preview",
                 display_width: screenWidth,

package/dist/bin/utils/index.js CHANGED Viewed

@@ -71,7 +71,7 @@ function printBanner() {
 --"-"-`;
     const version = require("../../../package.json").version;
     const logLine1 = `Running test-gen v${version}`;
-    const logLine2 = `from ${__dirname}`;
+    const logLine2 = `from ${__dirname.split("/bin/utils")[0]}`;
     // Process ASCII art
     const asciiLines = asciiArtRaw
         .split("\n")

package/dist/tools/commit-and-create-pr.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { Tool } from "@empiricalrun/llm/chat";
+export declare const commitAndPushChangesTool: Tool;
+//# sourceMappingURL=commit-and-create-pr.d.ts.map

package/dist/tools/commit-and-create-pr.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"commit-and-create-pr.d.ts","sourceRoot":"","sources":["../../src/tools/commit-and-create-pr.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAyBnD,eAAO,MAAM,wBAAwB,EAAE,IAwFtC,CAAC"}

package/dist/tools/commit-and-create-pr.js ADDED Viewed

@@ -0,0 +1,102 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.commitAndPushChangesTool = void 0;
+const child_process_1 = require("child_process");
+const crypto_1 = __importDefault(require("crypto"));
+const zod_1 = require("zod");
+const utils_1 = require("./utils");
+const GIT_USER_NAME = "empiricalrun[bot]";
+const GIT_USER_EMAIL = "180257021+empiricalrun[bot]@users.noreply.github.com";
+const CommitAndPushChangesSchema = zod_1.z.object({
+    commitMessage: zod_1.z
+        .string()
+        .describe("A short message to use for the commit. Should not be more than 8 words. Should follow conventional commit format."),
+});
+exports.commitAndPushChangesTool = {
+    schema: {
+        name: "commitAndPushChanges",
+        description: `Creates a commit with all modified files and pushes them to the current branch.
+If currently on main branch, creates a new branch with a random name.
+If the current branch already has an open PR, commits and pushes changes to that PR.
+Uses the empiricalrun[bot] credentials for git operations.
+Returns the URL of the created or updated pull request.`,
+        parameters: CommitAndPushChangesSchema,
+    },
+    execute: async (input) => {
+        try {
+            const { commitMessage } = input;
+            const currentBranch = (0, child_process_1.execSync)("git rev-parse --abbrev-ref HEAD")
+                .toString()
+                .trim();
+            let branchName = currentBranch;
+            if (currentBranch === "main") {
+                // If on main, create a new branch
+                const randomId = crypto_1.default.randomUUID().substring(0, 8);
+                branchName = `branch-${randomId}`;
+                (0, child_process_1.execSync)(`git checkout -b ${branchName}`);
+            }
+            const modifiedFiles = (0, child_process_1.execSync)("git status --porcelain")
+                .toString()
+                .split("\n")
+                .filter((line) => line && !line.includes(".bak"))
+                .map((line) => line.substring(3)); // Remove status prefix
+            if (modifiedFiles.length === 0) {
+                return {
+                    isError: true,
+                    result: "No modified files to commit",
+                };
+            }
+            for (const file of modifiedFiles) {
+                (0, child_process_1.execSync)(`git add "${file}"`);
+            }
+            // Use -c flag to set config just for this commit
+            (0, child_process_1.execSync)(`git -c user.name="${GIT_USER_NAME}" -c user.email="${GIT_USER_EMAIL}" commit -m "${commitMessage} [skip ci]"`);
+            const repoUrl = (0, child_process_1.execSync)("git config --get remote.origin.url")
+                .toString()
+                .trim();
+            const [owner, repo] = repoUrl
+                .replace("https://github.com/", "")
+                .replace(".git", "")
+                .split("/");
+            const existingPRs = (await (0, utils_1.callGitHubProxy)({
+                method: "GET",
+                url: `https://api.github.com/repos/${owner}/${repo}/pulls`,
+                body: {
+                    head: `${owner}:${branchName}`,
+                    state: "open",
+                },
+            }));
+            (0, child_process_1.execSync)(`git push origin ${branchName} --set-upstream`);
+            const existingPR = existingPRs?.find((pr) => pr.head.ref === branchName);
+            if (existingPR) {
+                return {
+                    isError: false,
+                    result: `Committed and pushed changes to existing PR: ${existingPR.html_url}`,
+                };
+            }
+            const pr = (await (0, utils_1.callGitHubProxy)({
+                method: "POST",
+                url: `https://api.github.com/repos/${owner}/${repo}/pulls`,
+                body: {
+                    title: commitMessage,
+                    head: branchName,
+                    base: "main",
+                    body: "Created via CommitAndPushChanges tool",
+                },
+            }));
+            return {
+                isError: false,
+                result: `Committed and pushed changes to new PR: ${pr.html_url}`,
+            };
+        }
+        catch (error) {
+            return {
+                isError: true,
+                result: `Failed to commit and push changes: ${error instanceof Error ? error.message : String(error)}`,
+            };
+        }
+    },
+};

package/dist/tools/diagnosis-fetcher.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"diagnosis-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/diagnosis-fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;~~AAanD~~,eAAO,MAAM,wBAAwB,EAAE,IAgFtC,CAAC"}
1	+ {"version":3,"file":"diagnosis-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/diagnosis-fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAenD,eAAO,MAAM,wBAAwB,EAAE,IAgFtC,CAAC"}

package/dist/tools/diagnosis-fetcher.js CHANGED Viewed

@@ -7,6 +7,7 @@ exports.fetchDiagnosisReportTool = void 0;
 const promises_1 = __importDefault(require("fs/promises"));
 const path_1 = __importDefault(require("path"));
 const zod_1 = require("zod");
+const utils_1 = require("./utils");
 const DiagnosisSchema = zod_1.z.object({
     diagnosisUrl: zod_1.z
         .string()
@@ -23,21 +24,23 @@ exports.fetchDiagnosisReportTool = {
         // Extract the slug from the URL - it's the part after the last '--'
         const slug = diagnosisUrl.split("--").pop();
         if (!slug) {
-            throw new Error("Invalid diagnosis URL - could not extract slug");
+            return {
+                isError: true,
+                result: "Invalid diagnosis URL - could not extract slug",
+            };
+        }
+        let data = null;
+        try {
+            data = await (0, utils_1.makeDashboardRequest)({
+                path: `/api/diagnosis/${slug}/detailed`,
+            });
         }
-        const response = await fetch(`https://dash.empirical.run/api/diagnosis/${slug}/detailed`, {
-            method: "GET",
-            headers: {
-                Authorization: "weQPMWKT",
-            },
-        });
-        if (!response.ok) {
+        catch (error) {
             return {
-                result: `Failed to fetch diagnosis details: ${response.statusText}`,
                 isError: true,
+                result: `Failed to fetch diagnosis details: ${error instanceof Error ? error.message : String(error)}`,
             };
         }
-        const data = await response.json();
         const { test_case, diagnosis } = data.data;
         const project = diagnosis?.test_project || "unknown";
         const sourceContext = await promises_1.default.readFile(path_1.default.join("tests", test_case.file_path), "utf-8");

package/dist/tools/test-gen-browser.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;~~AAwEnD~~,eAAO,MAAM,4BAA4B,EAAE,IA0E1C,CAAC"}
1	+ {"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AA8EnD,eAAO,MAAM,4BAA4B,EAAE,IA0E1C,CAAC"}

package/dist/tools/test-gen-browser.js CHANGED Viewed

@@ -29,16 +29,22 @@ and generate Playwright code for that actions. This is a useful tool when the mo
 locator/selector for an element on the page.
 IMPORTANT: Before you invoke this tool, you need to ensure that the test code is correctly prepared for this
-agent. Preparation involves adding a TODO comment that describes the change that needs to be made. A good
-comment calls out the element and browser interactions it must take. The TODO comment also has (agent) next to it, to
-clearly label that the change is for the agent to make.
+agent. Preparation involves adding a TODO comment that describes the change that needs to be made, and the page
+variable name where the actions must be performed. The content of the TODO comment calls out the element and browser
+interactions it must take. The TODO comment also has (agent on pageName) next to it, to clearly label that the change
+is for the agent to make on the given page (pageName in this case).
+To choose the page variable name, go through the test code and find available page variables. If you are replacing some
+existing test code, use the same page variable name as in the existing test code. If you are adding steps to the test,
+use the page variable name that is appropriate for the new steps. The page variable represents the browser page (or tab) that
+the agent is supposed to interact with.
 For example, this is a good TODO comment:
 \`\`\`
 test("Example test code", async ({ page }) => {
   await page.goto("https://example.com");
-  // TODO(agent): Click on the login button
+  // TODO(agent on page): Click on the login button
 });
 \`\`\`
@@ -53,7 +59,7 @@ For example, this is invalid:
 \`\`\`
 await extPage
   .getByTestId("virtuoso-item-list")
-  // TODO(agent): Click on the STARS button
+  // TODO(agent on extPage): Click on the STARS button
   .getByText("STARS", { exact: true })
   .click();
 // This is invalid, because the TODO is in the middle of a multi-line statement

package/dist/tools/test-run-fetcher/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/test-run-fetcher/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;~~AAanD~~,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAOnE;AAED,eAAO,MAAM,sBAAsB,EAAE,~~IA4HpC~~,CAAC"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/test-run-fetcher/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAcnD,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAOnE;AAED,eAAO,MAAM,sBAAsB,EAAE,IAwHpC,CAAC"}

package/dist/tools/test-run-fetcher/index.js CHANGED Viewed

@@ -2,6 +2,7 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.fetchTestRunReportTool = exports.extractPathAfterSourceRepo = void 0;
 const zod_1 = require("zod");
+const utils_1 = require("../utils");
 const TestRunSchema = zod_1.z.object({
     testRunUrl: zod_1.z
         .string()
@@ -32,22 +33,23 @@ exports.fetchTestRunReportTool = {
         const runId = urlParts.pop(); // Last part is the run ID
         const repoName = urlParts[urlParts.length - 2]; // Second to last part is the repo name
         if (!runId || !repoName) {
-            throw new Error("Invalid test run URL - could not extract run ID or repo name");
+            return {
+                isError: true,
+                result: "Invalid test run URL - could not extract run ID or repo name",
+            };
+        }
+        let data = null;
+        try {
+            data = await (0, utils_1.makeDashboardRequest)({
+                path: `/api/test-runs/${runId}?repo_name=${repoName}`,
+            });
         }
-        // Make the API call to fetch test run details
-        const response = await fetch(`https://dash.empirical.run/api/test-runs/${runId}?repo_name=${repoName}`, {
-            method: "GET",
-            headers: {
-                Authorization: "weQPMWKT",
-            },
-        });
-        if (!response.ok) {
+        catch (error) {
             return {
-                result: `Failed to fetch test run details: ${response.statusText}`,
                 isError: true,
+                result: `Failed to fetch test run details: ${error instanceof Error ? error.message : String(error)}`,
             };
         }
-        const data = (await response.json());
         // To efficiently use input_tokens, we
         //   1. Truncate stack trace to last 300 characters
         //   2. Remove request/response headers from network metadata

package/dist/tools/utils/index.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+export declare function makeDashboardRequest<T>({ path, method, body, }: {
+    path: string;
+    method?: string;
+    body?: any;
+}): Promise<T>;
+export declare function callGitHubProxy({ method, url, body, }: {
+    method: string;
+    url: string;
+    body?: any;
+}): Promise<unknown>;
+//# sourceMappingURL=index.d.ts.map

package/dist/tools/utils/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/index.ts"],"names":[],"mappings":"AAAA,wBAAsB,oBAAoB,CAAC,CAAC,EAAE,EAC5C,IAAI,EACJ,MAAc,EACd,IAAI,GACL,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,GAAG,CAAC;CACZ,GAAG,OAAO,CAAC,CAAC,CAAC,CAoBb;AAED,wBAAsB,eAAe,CAAC,EACpC,MAAM,EACN,GAAG,EACH,IAAI,GACL,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,GAAG,CAAC;CACZ,oBAWA"}

package/dist/tools/utils/index.js ADDED Viewed

@@ -0,0 +1,36 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.callGitHubProxy = exports.makeDashboardRequest = void 0;
+async function makeDashboardRequest({ path, method = "GET", body, }) {
+    const requestHeaders = {
+        "Content-Type": "application/json",
+        // TODO: Move to env variable for authentication
+        Authorization: "weQPMWKT",
+        "User-Agent": "empiricalrun/test-gen",
+    };
+    const baseUrl = "https://dash.empirical.run";
+    const response = await fetch(`${baseUrl}${path}`, {
+        method,
+        headers: requestHeaders,
+        ...(body && { body: JSON.stringify(body) }),
+    });
+    if (!response.ok) {
+        const errorBody = await response.text();
+        throw new Error(`API request failed for ${method} ${path} (Status: ${response.status}). Body: ${errorBody}`);
+    }
+    return await response.json();
+}
+exports.makeDashboardRequest = makeDashboardRequest;
+async function callGitHubProxy({ method, url, body, }) {
+    const githubApiPath = url.replace("https://api.github.com", "");
+    return makeDashboardRequest({
+        path: "/api/github/proxy",
+        method: "POST",
+        body: {
+            method,
+            url: githubApiPath,
+            body,
+        },
+    });
+}
+exports.callGitHubProxy = callGitHubProxy;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.53.2",
+  "version": "0.53.4",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"
@@ -68,7 +68,7 @@
     "tsx": "^4.16.2",
     "typescript": "^5.3.3",
     "zod": "^3.23.8",
-    "@empiricalrun/llm": "^0.14.2",
+    "@empiricalrun/llm": "^0.14.3",
     "@empiricalrun/r2-uploader": "^0.3.8",
     "@empiricalrun/test-run": "^0.7.6"
   },