npm - @empiricalrun/test-gen - Versions diffs - 0.58.0 → 0.59.0 - Mend

@empiricalrun/test-gen 0.58.0 → 0.59.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/CHANGELOG.md +53 -0
package/dist/agent/browsing/run.d.ts +3 -1
package/dist/agent/browsing/run.d.ts.map +1 -1
package/dist/agent/browsing/run.js +23 -25
package/dist/agent/browsing/utils.d.ts +1 -14
package/dist/agent/browsing/utils.d.ts.map +1 -1
package/dist/agent/browsing/utils.js +1 -58
package/dist/agent/chat/agent-loop.d.ts +2 -1
package/dist/agent/chat/agent-loop.d.ts.map +1 -1
package/dist/agent/chat/agent-loop.js +41 -24
package/dist/agent/chat/exports.d.ts +4 -5
package/dist/agent/chat/exports.d.ts.map +1 -1
package/dist/agent/chat/exports.js +12 -42
package/dist/agent/chat/index.d.ts.map +1 -1
package/dist/agent/chat/index.js +10 -4
package/dist/agent/chat/models.d.ts +6 -0
package/dist/agent/chat/models.d.ts.map +1 -0
package/dist/agent/chat/models.js +37 -0
package/dist/agent/chat/prompt.d.ts.map +1 -1
package/dist/agent/chat/prompt.js +37 -8
package/dist/agent/chat/state.d.ts +14 -5
package/dist/agent/chat/state.d.ts.map +1 -1
package/dist/agent/chat/state.js +88 -25
package/dist/agent/chat/types.d.ts +0 -1
package/dist/agent/chat/types.d.ts.map +1 -1
package/dist/agent/master/browser-tests/index.spec.js +6 -6
package/dist/bin/index.js +4 -0
package/dist/bin/utils/index.d.ts +1 -0
package/dist/bin/utils/index.d.ts.map +1 -1
package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +3 -0
package/dist/test-build/index.js +1 -1
package/dist/tool-call-service/index.d.ts +2 -1
package/dist/tool-call-service/index.d.ts.map +1 -1
package/dist/tool-call-service/index.js +51 -71
package/dist/tool-call-service/utils.d.ts +10 -0
package/dist/tool-call-service/utils.d.ts.map +1 -0
package/dist/tool-call-service/utils.js +23 -0
package/dist/tools/download-build.d.ts +9 -0
package/dist/tools/download-build.d.ts.map +1 -1
package/dist/tools/download-build.js +5 -4
package/dist/tools/test-gen-browser.d.ts.map +1 -1
package/dist/tools/test-gen-browser.js +8 -13
package/dist/tools/test-run.d.ts.map +1 -1
package/dist/tools/test-run.js +8 -13
package/dist/utils/checkpoint.d.ts.map +1 -1
package/dist/utils/checkpoint.js +3 -1
package/dist/utils/exec.d.ts +2 -2
package/dist/utils/exec.d.ts.map +1 -1
package/dist/utils/exec.js +5 -4
package/package.json +4 -4
package/tsconfig.tsbuildinfo +1 -1

package/dist/agent/chat/prompt.js CHANGED Viewed

@@ -52,20 +52,49 @@ The position of the comment is important: the browser agent will look for this c
 the actual code to click on the login button. If you are fixing a failing test, your comment should be
 around the failing line of code, so that it can be replaced/modified.
-# Rules for fixing Playwright tests
+# Proactiveness
+1. You are allowed to be proactive, but ONLY for read-only tool calls: like searching for content, reading files, fetching data from tools, and
+running Playwright tests.
+2. For any read-write tool calls (e.g. modifying any file), you should share your plan and get the user's approval before proceeding.
+# Rules to follow
 You must follow these rules while adding new tests or modifying existing tests. There can be exceptions to these rules, but
 ONLY when explicitly asked for by the user.
-1. Do not add any conditional logic or try catch blocks in a test. A good test deterministically tests a user scenario
-2. Trust Playwright's ability to auto-wait while taking actions on elements. For example, do not add checks on locator.isVisible() before clicking on it: Playwright already does this
-3. Do not add waitForTimeout or waitForLoadState in a test. Playwright will automatically wait for the page to load.
-4. You can't delete some steps from the test to make it pass. The test needs to accomplish its objective (which is to validate a particular user scenario)
+1. You can't delete some steps from the test to make it pass. The test needs to accomplish its objective (which is to validate a particular user scenario)
+2. Do not add any conditional logic or try catch blocks in a test. A good test deterministically tests a user scenario
+3. Trust Playwright's ability to auto-wait while taking actions on elements. For example, do not add checks on locator.isVisible() before clicking on it: Playwright already does this
+4. Do not add waitForTimeout or waitForLoadState in a test. Playwright will automatically wait for the page to load.
+5. Try/catch blocks are a code smell for tests: you should not use them.
+6. Do not use then() or catch() syntax in a test. Use async/await only
-# Proactiveness
+There are few exceptions to these rules. BEFORE applying any of the following exceptions, you MUST share your plan with the user and get their approval.
+## Exceptions for conditional logic
+There are few exceptions where you can add conditional logic to a test. If the application UI reveals some UI elements on certain conditions, we can add conditional logic.
-You are allowed to be proactive, but ONLY for read-only actions, like searching for content, reading files, fetching data from tools, and
-running Playwright tests. For any read-write actions (e.g. modifying any file), you should share your plan and get the user's approval before proceeding.
+For example, a form view shows a "Save" button only when the form is dirty. In this case, we will have to check if the "Save" button is visible before clicking on it. To do this,
+follow this pattern:
+\`\`\`
+const saveButton = page.getByRole('button', { name: 'Save' });
+if (await saveButton.isVisible()) {
+  await saveButton.click();
+}
+\`\`\`
+Note that locator.isVisible() DOES NOT wait for the element to be visible. If the element in question shows up after a delay, we have no option but to add a waitForTimeout.
+\`\`\`
+const saveButton = page.getByRole('button', { name: 'Save' });
+await page.waitForTimeout(100); // Wait for the element to be visible -- only if necessary.
+if (await saveButton.isVisible()) {
+  await saveButton.click();
+}
+\`\`\`
 # Repo context
 ${repoContext}

package/dist/agent/chat/state.d.ts CHANGED Viewed

@@ -1,14 +1,23 @@
 import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
-export declare const CURRENT_CHAT_STATE_VERSION = "20250327.1";
+import { CanonicalMessage, ChatState } from "@empiricalrun/shared-types";
+export declare const CHAT_STATE_VERSIONS_MIGRATIONS_MAP: Record<string, (state: any) => any>;
+export declare const LATEST_CHAT_STATE_VERSION = "0.1";
 export declare const CHAT_STATE_PATH: string;
 export type ChatStateOnDisk<T> = {
     version: string;
     model: SupportedChatModels;
     messages: T[];
+    askUserForInput: boolean;
 };
-export declare function createChatState(userPrompt: string, existingState: ChatStateOnDisk<any> | undefined, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
-export declare function createChatStateForMessages<T>(messages: any, selectedModel: SupportedChatModels): ChatStateOnDisk<T>;
+export declare function createChatState(userPrompt: string | undefined, existingState: ChatStateOnDisk<any> | undefined, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
+export declare function createChatStateForMessages<T>(messages: any, selectedModel: SupportedChatModels, askUserForInput: boolean): ChatStateOnDisk<T>;
 export declare function chatStateFromModel<T>(chatModel: IChatModel<T>, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
-export declare function loadChatState<T>(): ChatStateOnDisk<T> | undefined;
-export declare function saveToDisk<T>(messages: Array<T>, selectedModel: SupportedChatModels): void;
+export declare function loadChatState(): ChatState | undefined;
+/**
+ * Migrates a chat state object from an old version to the latest version.
+ * Add migration logic for each version as needed.
+ */
+export declare function migrateChatState<T = any>(oldState: any): ChatStateOnDisk<T>;
+export declare function saveToDisk<T>(messages: Array<T>, selectedModel: SupportedChatModels, askUserForInput: boolean): void;
+export declare function getLatestDownloadBuildUrl(messages: CanonicalMessage[]): string | null;
 //# sourceMappingURL=state.d.ts.map

package/dist/agent/chat/state.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,~~EAEL~~,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;~~AAIhC~~,eAAO,MAAM,~~0BAA0B~~,~~eAAe~~,CAAC;~~AAEvD~~,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,MAAM,MAAM,eAAe,CAAC,CAAC,IAAI;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,mBAAmB,CAAC;IAC3B,QAAQ,EAAE,CAAC,EAAE,CAAC;~~CACf~~,CAAC;AAEF,wBAAgB,eAAe,CAC7B,UAAU,EAAE,MAAM,~~EAClB~~,aAAa,EAAE,eAAe,CAAC,GAAG,CAAC,GAAG,SAAS,EAC/C,aAAa,EAAE,mBAAmB,~~4BAMnC~~;AAED,wBAAgB,0BAA0B,CAAC,CAAC,EAC1C,QAAQ,EAAE,GAAG,EACb,aAAa,EAAE,mBAAmB,~~GACjC~~,eAAe,CAAC,CAAC,CAAC,~~CAOpB~~;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAClC,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,EACxB,aAAa,EAAE,mBAAmB,~~4BAGnC~~;AAED,wBAAgB,aAAa,CAAC,CAAC,~~KAAK~~,eAAe,CAAC,CAAC,CAAC,~~GAAG,SAAS,CAajE~~;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,~~QAsBnC~~"}
1	+ {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,4BAA4B,CAAC;AA+BzE,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,MAAM,MAAM,eAAe,CAAC,CAAC,IAAI;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,mBAAmB,CAAC;IAC3B,QAAQ,EAAE,CAAC,EAAE,CAAC;IACd,eAAe,EAAE,OAAO,CAAC;CAC1B,CAAC;AAEF,wBAAgB,eAAe,CAC7B,UAAU,EAAE,MAAM,GAAG,SAAS,EAC9B,aAAa,EAAE,eAAe,CAAC,GAAG,CAAC,GAAG,SAAS,EAC/C,aAAa,EAAE,mBAAmB,4BAYnC;AAED,wBAAgB,0BAA0B,CAAC,CAAC,EAC1C,QAAQ,EAAE,GAAG,EACb,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,GACvB,eAAe,CAAC,CAAC,CAAC,CAQpB;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAClC,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,EACxB,aAAa,EAAE,mBAAmB,4BAOnC;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,GAAG,GAAG,EAAE,QAAQ,EAAE,GAAG,GAAG,eAAe,CAAC,CAAC,CAAC,CAqB3E;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,QAezB;AAED,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAqBf"}

package/dist/agent/chat/state.js CHANGED Viewed

@@ -3,64 +3,127 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.CHAT_STATE_PATH = exports.CURRENT_CHAT_STATE_VERSION = void 0;
+exports.CHAT_STATE_PATH = exports.LATEST_CHAT_STATE_VERSION = exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP = void 0;
 exports.createChatState = createChatState;
 exports.createChatStateForMessages = createChatStateForMessages;
 exports.chatStateFromModel = chatStateFromModel;
 exports.loadChatState = loadChatState;
+exports.migrateChatState = migrateChatState;
 exports.saveToDisk = saveToDisk;
+exports.getLatestDownloadBuildUrl = getLatestDownloadBuildUrl;
 const chat_1 = require("@empiricalrun/llm/chat");
 const fs_1 = __importDefault(require("fs"));
 const path_1 = __importDefault(require("path"));
-exports.CURRENT_CHAT_STATE_VERSION = "20250327.1";
+// Migration wrapper for v20250327.1 -> v0.1 chat state versions
+// v20250327.1 was model-specific, but v0.1 is canonical
+function migrateToV01(oldState) {
+    if (oldState.model &&
+        typeof oldState.model === "string" &&
+        Array.isArray(oldState.messages)) {
+        const provider = (0, chat_1.getProviderForModel)(oldState.model);
+        if (provider === "google") {
+            return {
+                ...oldState,
+                version: "0.1",
+                messages: oldState.messages.map(chat_1.geminiToCanonical),
+            };
+        }
+        else {
+            throw new Error(`Unsupported state for migration with model: ${oldState.model} and version: ${oldState.version}`);
+        }
+    }
+    // If not Gemini or not matching, return the old state
+    return oldState;
+}
+exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP = {
+    "20250327.1": migrateToV01,
+    "0.1": (state) => state,
+};
+exports.LATEST_CHAT_STATE_VERSION = "0.1";
 exports.CHAT_STATE_PATH = path_1.default.join(process.cwd(), ".empiricalrun", "last-chat.json");
 function createChatState(userPrompt, existingState, selectedModel) {
     const messages = existingState?.messages || [];
     const chatModel = (0, chat_1.createChatModel)(messages, selectedModel);
-    chatModel.pushUserMessage(userPrompt);
-    return createChatStateForMessages(chatModel.messages, selectedModel);
+    if (userPrompt) {
+        chatModel.pushUserMessage(userPrompt);
+    }
+    return createChatStateForMessages(chatModel.messages, selectedModel, chatModel.askUserForInput);
 }
-function createChatStateForMessages(messages, selectedModel) {
+function createChatStateForMessages(messages, selectedModel, askUserForInput) {
     // TODO: Add better types for messages
     return {
-        version: exports.CURRENT_CHAT_STATE_VERSION,
+        version: exports.LATEST_CHAT_STATE_VERSION,
         model: selectedModel,
         messages: messages,
+        askUserForInput: askUserForInput,
     };
 }
 function chatStateFromModel(chatModel, selectedModel) {
-    return createChatStateForMessages(chatModel.messages, selectedModel);
+    return createChatStateForMessages(chatModel.messages, selectedModel, chatModel.askUserForInput);
 }
 function loadChatState() {
     if (!fs_1.default.existsSync(exports.CHAT_STATE_PATH)) {
         return undefined;
     }
     const raw = fs_1.default.readFileSync(exports.CHAT_STATE_PATH, "utf8");
-    const state = JSON.parse(raw);
-    if (state.version !== exports.CURRENT_CHAT_STATE_VERSION) {
-        throw new Error(`Unsupported chat state v${state.version}. Expected v${exports.CURRENT_CHAT_STATE_VERSION}.`);
+    let state = JSON.parse(raw);
+    // Always migrate to the latest version after loading
+    const migratedState = migrateChatState(state);
+    // Only save if migration actually changed the state
+    if (JSON.stringify(state) !== JSON.stringify(migratedState)) {
+        fs_1.default.writeFileSync(exports.CHAT_STATE_PATH, JSON.stringify(migratedState, null, 2));
     }
-    return state;
+    return migratedState;
 }
-function saveToDisk(messages, selectedModel) {
-    const statePath = exports.CHAT_STATE_PATH;
-    let existingState = {
-        version: exports.CURRENT_CHAT_STATE_VERSION,
-        model: selectedModel,
-        messages: [],
+/**
+ * Migrates a chat state object from an old version to the latest version.
+ * Add migration logic for each version as needed.
+ */
+function migrateChatState(oldState) {
+    if (!oldState || Object.keys(oldState).length === 0) {
+        return oldState;
+    }
+    if (!oldState.version) {
+        throw new Error("No version found in chat state");
+    }
+    if (!exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP[oldState.version]) {
+        throw new Error(`No migration function found for version: ${oldState.version}`);
+    }
+    if (oldState.version === exports.LATEST_CHAT_STATE_VERSION) {
+        return oldState;
+    }
+    const migrateFn = exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP[oldState.version];
+    const migrated = migrateFn(oldState);
+    return {
+        version: exports.LATEST_CHAT_STATE_VERSION,
+        ...migrated,
     };
+}
+function saveToDisk(messages, selectedModel, askUserForInput) {
+    const statePath = exports.CHAT_STATE_PATH;
     // Ensure directory exists before trying to read/write
     const dirname = path_1.default.dirname(statePath);
     if (!fs_1.default.existsSync(dirname)) {
         fs_1.default.mkdirSync(dirname, { recursive: true });
     }
-    if (fs_1.default.existsSync(statePath)) {
-        existingState = JSON.parse(fs_1.default.readFileSync(statePath, "utf8"));
-    }
-    const newState = {
-        ...existingState,
-        messages: messages,
-        model: selectedModel,
-    };
+    // Use the helper to build the new state
+    const newState = createChatStateForMessages(messages, selectedModel, askUserForInput);
     fs_1.default.writeFileSync(statePath, JSON.stringify(newState, null, 2));
 }
+function getLatestDownloadBuildUrl(messages) {
+    const toolCallMessage = messages
+        .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())
+        .find((p) => p.parts.find((p) => {
+        if ("toolCall" in p) {
+            return p.toolCall?.name === "downloadBuild";
+        }
+        return false;
+    }));
+    if (!toolCallMessage)
+        return null;
+    const toolCallPart = toolCallMessage.parts.find((p) => "toolCall" in p && p.toolCall?.name === "downloadBuild");
+    if (!toolCallPart || !("toolCall" in toolCallPart))
+        return null;
+    const input = toolCallPart.toolCall.input;
+    return input.buildUrl;
+}

package/dist/agent/chat/types.d.ts CHANGED Viewed

@@ -1,5 +1,4 @@
 import { ChatStateOnDisk } from "./state";
-export type SupportedChatModels = "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25" | "o4-mini-2025-04-16";
 type LatestMessage = {
     role: string;
     textMessage: string;

package/dist/agent/chat/types.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1C,~~MAAM,MAAM,mBAAmB,GAC3B,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,GAC9B,oBAAoB,CAAC;AAEzB,~~KAAK,aAAa,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAC7B,KAAK,EAAE,eAAe,CAAC,GAAG,CAAC,EAC3B,0BAA0B,EAAE,aAAa,GAAG,SAAS,KAClD,OAAO,CAAC,IAAI,CAAC,CAAC"}
1	+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1C,KAAK,aAAa,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAC7B,KAAK,EAAE,eAAe,CAAC,GAAG,CAAC,EAC3B,0BAA0B,EAAE,aAAa,GAAG,SAAS,KAClD,OAAO,CAAC,IAAI,CAAC,CAAC"}

package/dist/agent/master/browser-tests/index.spec.js CHANGED Viewed

@@ -9,7 +9,7 @@ const utils_1 = require("../../browsing/utils");
 const element_annotation_1 = require("../element-annotation");
 const run_1 = require("../run");
 const fixtures_1 = require("./fixtures");
-(0, fixtures_1.test)("able to scroll and interact with elements", async ({ page, server }) => {
+fixtures_1.test.skip("able to scroll and interact with elements", async ({ page, server, }) => {
     await page.goto(`${server.baseURL}/blog-page.html`);
     const response = await (0, run_1.createTestUsingMasterAgent)({
         task: `fill test@test.com into the email field and click the submit`,
@@ -21,7 +21,7 @@ const fixtures_1 = require("./fixtures");
     (0, fixtures_1.expect)(response.code).toContain("await page.getByPlaceholder('Enter your email').fill(\"test@test.com\")");
     (0, fixtures_1.expect)(response.code).toContain("await page.getByRole('button', { name: 'Subscribe' }).click()");
 });
-(0, fixtures_1.test)("scroll when element does not exist", async ({ page, server }) => {
+fixtures_1.test.skip("scroll when element does not exist", async ({ page, server }) => {
     await page.goto(`${server.baseURL}/blog-page.html`);
     const response = await (0, run_1.createTestUsingMasterAgent)({
         task: `click search button`,
@@ -32,7 +32,7 @@ const fixtures_1 = require("./fixtures");
     (0, fixtures_1.expect)(response.importPaths.length).toBe(0);
     (0, fixtures_1.expect)(response.code.length).toBe(0);
 });
-(0, fixtures_1.test)("scroll and click inside div elements", async ({ page, server }) => {
+fixtures_1.test.skip("scroll and click inside div elements", async ({ page, server }) => {
     await page.goto(`${server.baseURL}/dropdown-scrolls.html`);
     const response = await (0, run_1.createTestUsingMasterAgent)({
         task: `click on x-3 inside bmw dropdown, and then,
@@ -49,7 +49,7 @@ click on maverick inside ford dropdown`,
     (0, fixtures_1.expect)(lines.find((l) => l.match(/^await page.+Ford.+.click/))).toBeTruthy();
     (0, fixtures_1.expect)(lines.find((l) => l.match(/^await page.+Maverick.+.click/))).toBeTruthy();
 });
-(0, fixtures_1.test)("master agent can click icons accurately", async ({ page, server }) => {
+fixtures_1.test.skip("master agent can click icons accurately", async ({ page, server, }) => {
     await page.goto(`${server.baseURL}/icons-navbar.html`);
     await (0, fixtures_1.expect)(page.getByText("select an icon")).toBeVisible();
     const response = await (0, run_1.createTestUsingMasterAgent)({
@@ -86,7 +86,7 @@ click on maverick inside ford dropdown`,
     (0, fixtures_1.expect)(response.code).toContain("page.locator");
     (0, fixtures_1.expect)(response.code).toContain("click()");
 });
-(0, fixtures_1.test)("annotate and enrich annotations correctly", async ({ page, server }) => {
+fixtures_1.test.skip("annotate and enrich annotations correctly", async ({ page, server, }) => {
     await (0, utils_1.injectPwLocatorGenerator)(page);
     await page.goto(`${server.baseURL}/iframe-elements.html`);
     const { annotationKeys: keys } = await (0, element_annotation_1.getAnnotationKeys)({
@@ -105,7 +105,7 @@ click on maverick inside ford dropdown`,
     // 2 clickable divs: 1 in main frame, 1 in iframe
     (0, fixtures_1.expect)(keys.filter((k) => k.text.includes("Lorem Ipsum")).length).toBe(2);
 });
-(0, fixtures_1.test)("fill action with multiple pages", async ({ context }) => {
+fixtures_1.test.skip("fill action with multiple pages", async ({ context }) => {
     const page1 = await context.newPage();
     const page2 = await context.newPage();
     const response = await (0, run_1.createTestUsingMasterAgent)({

package/dist/bin/index.js CHANGED Viewed

@@ -185,6 +185,8 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
             status: "agent_live_session_started",
         });
         const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
+            testCaseName: testCase.name,
+            testCaseSuites: testCase.suites,
             testFilePath: specPath,
             filePathToUpdate,
             pwProjectsFilter: testGenConfig.environment?.playwrightProjects,
@@ -213,6 +215,7 @@ async function main() {
         .option("--use-disk-for-chat-state", "Save and load chat state from disk")
         .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-03-25)")
         .option("--initial-prompt <path>", "Path to an initial prompt file (e.g. prompt.md)")
+        .option("--use-transform", "Use the new message transform strategy")
         .parse(process.argv);
     const options = program.opts();
     const completedOptions = await (0, utils_2.validateAndCompleteCliOptions)(options);
@@ -243,6 +246,7 @@ async function main() {
             modelInput: completedOptions.chatModel,
             useDiskForChatState: completedOptions.useDiskForChatState,
             initialPromptPath: completedOptions.initialPrompt,
+            useTransform: completedOptions.useTransform,
         });
         return;
     }

package/dist/bin/utils/index.d.ts CHANGED Viewed

@@ -9,6 +9,7 @@ export interface CliOptions {
     initialPrompt?: string;
     chatSessionId?: string;
     chatModel?: "claude-3-7" | "claude-3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro" | "gemini-2.5-pro-preview-03-25" | "o4-mini" | "o4-mini-2025-04-16";
+    useTransform?: boolean;
 }
 export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;
 export declare function printBanner(): void;

package/dist/bin/utils/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,YAAY,GACZ,4BAA4B,GAC5B,4BAA4B,GAC5B,gBAAgB,GAChB,8BAA8B,GAC9B,SAAS,GACT,oBAAoB,CAAC;~~CAC1B~~;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,YAAY,GACZ,4BAA4B,GAC5B,4BAA4B,GAC5B,gBAAgB,GAChB,8BAA8B,GAC9B,SAAS,GACT,oBAAoB,CAAC;IACzB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { FrameLocator, Page } from "playwright";
 import { ScopeVars } from "./types";
+export { downloadBuild } from "./test-build";
 export declare function createTest(task: string, pageRef: Page | FrameLocator, scope?: ScopeVars): Promise<void>;
 //# sourceMappingURL=index.d.ts.map

package/dist/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQhD,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;~~AAoBpC~~,wBAAsB,UAAU,CAC9B,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,IAAI,GAAG,YAAY,EAC5B,KAAK,CAAC,EAAE,SAAS,iBA0ElB"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQhD,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAoB7C,wBAAsB,UAAU,CAC9B,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,IAAI,GAAG,YAAY,EAC5B,KAAK,CAAC,EAAE,SAAS,iBA0ElB"}

package/dist/index.js CHANGED Viewed

@@ -3,6 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
+exports.downloadBuild = void 0;
 exports.createTest = createTest;
 const llm_1 = require("@empiricalrun/llm");
 const cua_1 = require("./agent/cua");
@@ -11,6 +12,8 @@ const scenarios_1 = require("./bin/utils/scenarios");
 const client_1 = __importDefault(require("./file/client"));
 const reporter_1 = require("./reporter");
 const session_1 = require("./session");
+var test_build_1 = require("./test-build");
+Object.defineProperty(exports, "downloadBuild", { enumerable: true, get: function () { return test_build_1.downloadBuild; } });
 const flushEvents = async () => {
     await (0, llm_1.flushAllTraces)();
 };

package/dist/test-build/index.js CHANGED Viewed

@@ -23,7 +23,7 @@ async function downloadBuild(buildUrl) {
     const buildDownloadScript = packageJSON.scripts["download"];
     if (buildDownloadScript && buildUrl) {
         logger.log(`Downloading build from ${buildUrl}`);
-        await (0, exec_1.cmd)(`npm run download ${buildUrl}`.split(" "), {
+        await (0, exec_1.cmd)(`npm`, ["run", "download", buildUrl], {
             env: { ...Object(process.env) },
         });
     }

package/dist/tool-call-service/index.d.ts CHANGED Viewed

@@ -14,6 +14,7 @@ export declare class ToolCallService {
     getTools(): Promise<{
         tools: Tool[];
     }>;
-    execute(toolCalls: PendingToolCall[], isRemote: boolean, trace?: TraceClient): Promise<ToolResult[]>;
+    sendToQueue(toolCalls: PendingToolCall[]): Promise<void>;
+    execute(toolCalls: PendingToolCall[], trace?: TraceClient): Promise<ToolResult[]>;
 }
 //# sourceMappingURL=index.d.ts.map

package/dist/tool-call-service/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tool-call-service/index.ts"],"names":[],"mappings":"~~AACA~~,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,~~EACL~~,eAAe,EACf,mBAAmB,EACnB,IAAI,EACJ,UAAU,EACX,MAAM,wBAAwB,CAAC;~~AAgBhC~~,YAAY,EAAE,mBAAmB,EAAE,CAAC;AAEpC,KAAK,aAAa,GAAG;IACnB,CAAC,GAAG,EAAE,MAAM,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CACzE,CAAC;~~AA6BF~~,qBAAa,eAAe;IAC1B,KAAK,EAAE,IAAI,EAAE,CAAM;IACnB,aAAa,EAAE,aAAa,CAAM;IAClC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,aAAa,EAAE,mBAAmB,CAAC;IACnC,UAAU,EAAE,MAAM,CAAC;gBAEjB,aAAa,EAAE,MAAM,GAAG,IAAI,EAC5B,aAAa,EAAE,mBAAmB,EAClC,UAAU,EAAE,MAAM;IAiBd,QAAQ;;;IAaR,~~OAAO~~,~~CACX~~,SAAS,EAAE,eAAe,EAAE,~~EAC5B~~,~~QAAQ~~,~~EAAE~~,OAAO,~~EACjB~~,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,UAAU,EAAE,CAAC;~~CAyDzB~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tool-call-service/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAEL,eAAe,EACf,mBAAmB,EACnB,IAAI,EACJ,UAAU,EACX,MAAM,wBAAwB,CAAC;AAiBhC,YAAY,EAAE,mBAAmB,EAAE,CAAC;AAEpC,KAAK,aAAa,GAAG;IACnB,CAAC,GAAG,EAAE,MAAM,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CACzE,CAAC;AAEF,qBAAa,eAAe;IAC1B,KAAK,EAAE,IAAI,EAAE,CAAM;IACnB,aAAa,EAAE,aAAa,CAAM;IAClC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,aAAa,EAAE,mBAAmB,CAAC;IACnC,UAAU,EAAE,MAAM,CAAC;gBAEjB,aAAa,EAAE,MAAM,GAAG,IAAI,EAC5B,aAAa,EAAE,mBAAmB,EAClC,UAAU,EAAE,MAAM;IAiBd,QAAQ;;;IAaR,WAAW,CAAC,SAAS,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAcxD,OAAO,CACX,SAAS,EAAE,eAAe,EAAE,EAC5B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,UAAU,EAAE,CAAC;CA2CzB"}

package/dist/tool-call-service/index.js CHANGED Viewed

@@ -1,7 +1,7 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.ToolCallService = void 0;
-const client_sqs_1 = require("@aws-sdk/client-sqs");
+const chat_1 = require("@empiricalrun/llm/chat");
 const commit_and_create_pr_1 = require("../tools/commit-and-create-pr");
 const diagnosis_fetcher_1 = require("../tools/diagnosis-fetcher");
 const download_build_1 = require("../tools/download-build");
@@ -12,25 +12,7 @@ const test_gen_browser_1 = require("../tools/test-gen-browser");
 const test_run_1 = require("../tools/test-run");
 const test_run_fetcher_1 = require("../tools/test-run-fetcher");
 const checkpoint_1 = require("../utils/checkpoint");
-async function sendToolRequestToRemoteQueue(payload) {
-    const sqs = new client_sqs_1.SQSClient({
-        region: process.env.AWS_REGION,
-        credentials: {
-            accessKeyId: process.env.AWS_ACCESS_KEY_ID,
-            secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
-        },
-    });
-    const queueUrl = process.env.TOOL_EXECUTION_SQS_URL;
-    if (!queueUrl) {
-        throw new Error("TOOL_EXECUTION_SQS_URL is required for remote execution.");
-    }
-    await sqs.send(new client_sqs_1.SendMessageCommand({
-        QueueUrl: queueUrl,
-        MessageBody: JSON.stringify(payload),
-        MessageGroupId: payload.requestId,
-        MessageDeduplicationId: payload.requestId, // unique id for the tool request
-    }));
-}
+const utils_1 = require("./utils");
 class ToolCallService {
     tools = [];
     toolExecutors = {};
@@ -53,70 +35,68 @@ class ToolCallService {
         ];
     }
     async getTools() {
-        if (!this.selectedModel.startsWith("claude")) {
+        if ((0, chat_1.getProviderForModel)(this.selectedModel) !== "claude") {
             this.tools.push(...str_replace_editor_1.textEditorTools);
         }
         this.tools.forEach((tool) => {
             this.toolExecutors[tool.schema.name] = tool.execute;
         });
-        if (this.selectedModel.startsWith("claude")) {
+        if ((0, chat_1.getProviderForModel)(this.selectedModel) === "claude") {
             this.toolExecutors["str_replace_editor"] = str_replace_editor_1.strReplaceEditorExecutor;
         }
         return { tools: this.tools };
     }
-    async execute(toolCalls, isRemote, trace) {
-        if (isRemote && this.chatSessionId) {
-            await sendToolRequestToRemoteQueue({
-                toolCalls,
-                requestId: crypto.randomUUID(),
-                chatSessionId: this.chatSessionId,
-                selectedModel: this.selectedModel,
-                branchName: this.branchName,
-            });
-            return toolCalls.map(() => ({
-                isError: false,
-                result: `Tool request sent to remote queue to execute.`,
-            }));
+    async sendToQueue(toolCalls) {
+        const requestId = toolCalls[0]?.id;
+        if (!requestId) {
+            throw new Error("Could not find an id for the tool call.");
         }
-        else {
-            const executeSpan = trace?.span({
-                name: "execute_tools",
-                input: { toolCalls: toolCalls.map((tc) => ({ name: tc.name })) },
+        await (0, utils_1.sendToolRequestToRemoteQueue)({
+            toolCalls,
+            requestId,
+            chatSessionId: this.chatSessionId,
+            selectedModel: this.selectedModel,
+            branchName: this.branchName,
+        });
+    }
+    async execute(toolCalls, trace) {
+        const executeSpan = trace?.span({
+            name: "execute_tools",
+            input: { toolCalls: toolCalls.map((tc) => ({ name: tc.name })) },
+        });
+        const toolResults = [];
+        for (const toolCall of toolCalls) {
+            const span = executeSpan?.span({
+                name: `tool: ${toolCall.name}`,
+                input: toolCall.input,
             });
-            const toolResults = [];
-            for (const toolCall of toolCalls) {
-                const span = executeSpan?.span({
-                    name: `tool: ${toolCall.name}`,
-                    input: toolCall.input,
-                });
-                const toolExecutor = this.toolExecutors[toolCall.name];
-                if (!toolExecutor) {
-                    const errorResult = {
-                        isError: true,
-                        result: `Invalid function/tool call: invalid_tool_call not found`,
-                    };
-                    toolResults.push(errorResult);
-                    span?.end({ output: errorResult });
-                    continue;
-                }
-                try {
-                    const result = await toolExecutor(toolCall.input, trace);
-                    toolResults.push(result);
-                    span?.end({ output: result });
-                }
-                catch (error) {
-                    const errorResult = {
-                        isError: true,
-                        result: error instanceof Error ? error.message : String(error),
-                    };
-                    toolResults.push(errorResult);
-                    span?.end({ output: errorResult });
-                }
+            const toolExecutor = this.toolExecutors[toolCall.name];
+            if (!toolExecutor) {
+                const errorResult = {
+                    isError: true,
+                    result: `Invalid function/tool call: ${toolCall.name} not found`,
+                };
+                toolResults.push(errorResult);
+                span?.end({ output: errorResult });
+                continue;
+            }
+            try {
+                const result = await toolExecutor(toolCall.input, trace);
+                toolResults.push(result);
+                span?.end({ output: result });
+            }
+            catch (error) {
+                const errorResult = {
+                    isError: true,
+                    result: error instanceof Error ? error.message : String(error),
+                };
+                toolResults.push(errorResult);
+                span?.end({ output: errorResult });
             }
-            await (0, checkpoint_1.createCheckpoint)(toolCalls, this.branchName);
-            executeSpan?.end({ output: { toolResults } });
-            return toolResults;
         }
+        await (0, checkpoint_1.createCheckpoint)(toolCalls, this.branchName);
+        executeSpan?.end({ output: { toolResults } });
+        return toolResults;
     }
 }
 exports.ToolCallService = ToolCallService;

package/dist/tool-call-service/utils.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+import { SupportedChatModels } from "@empiricalrun/llm/chat";
+import { PendingToolCall } from "@empiricalrun/shared-types";
+export declare function sendToolRequestToRemoteQueue(payload: {
+    toolCalls: PendingToolCall[];
+    requestId: string;
+    chatSessionId: number;
+    selectedModel: SupportedChatModels;
+    branchName: string;
+}): Promise<void>;
+//# sourceMappingURL=utils.d.ts.map

package/dist/tool-call-service/utils.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/tool-call-service/utils.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAE7D,wBAAsB,4BAA4B,CAAC,OAAO,EAAE;IAC1D,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,mBAAmB,CAAC;IACnC,UAAU,EAAE,MAAM,CAAC;CACpB,iBAoBA"}

package/dist/tool-call-service/utils.js ADDED Viewed

@@ -0,0 +1,23 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.sendToolRequestToRemoteQueue = sendToolRequestToRemoteQueue;
+const client_sqs_1 = require("@aws-sdk/client-sqs");
+async function sendToolRequestToRemoteQueue(payload) {
+    const sqs = new client_sqs_1.SQSClient({
+        region: process.env.AWS_REGION,
+        credentials: {
+            accessKeyId: process.env.AWS_ACCESS_KEY_ID,
+            secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
+        },
+    });
+    const queueUrl = process.env.TOOL_EXECUTION_SQS_URL;
+    if (!queueUrl) {
+        throw new Error("TOOL_EXECUTION_SQS_URL is required for remote execution.");
+    }
+    await sqs.send(new client_sqs_1.SendMessageCommand({
+        QueueUrl: queueUrl,
+        MessageBody: JSON.stringify(payload),
+        MessageGroupId: payload.requestId,
+        MessageDeduplicationId: payload.requestId, // unique id for the tool request
+    }));
+}

package/dist/tools/download-build.d.ts CHANGED Viewed

@@ -1,3 +1,12 @@
 import type { Tool } from "@empiricalrun/llm/chat";
+import { z } from "zod";
+export declare const downloadBuildToolSchema: z.ZodObject<{
+    buildUrl: z.ZodString;
+}, "strip", z.ZodTypeAny, {
+    buildUrl: string;
+}, {
+    buildUrl: string;
+}>;
+export type DownloadBuildToolInput = z.infer<typeof downloadBuildToolSchema>;
 export declare const downloadBuildTool: Tool;
 //# sourceMappingURL=download-build.d.ts.map

package/dist/tools/download-build.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"download-build.d.ts","sourceRoot":"","sources":["../../src/tools/download-build.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;~~AAKnD~~,eAAO,MAAM,iBAAiB,EAAE,~~IAkC~~/B,CAAC"}
1	+ {"version":3,"file":"download-build.d.ts","sourceRoot":"","sources":["../../src/tools/download-build.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AACnD,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,eAAO,MAAM,uBAAuB;;;;;;EAElC,CAAC;AAEH,MAAM,MAAM,sBAAsB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,uBAAuB,CAAC,CAAC;AAE7E,eAAO,MAAM,iBAAiB,EAAE,IA8B/B,CAAC"}