npm - @empiricalrun/test-gen - Versions diffs - 0.58.0 → 0.60.0 - Mend

@empiricalrun/test-gen 0.58.0 → 0.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

package/CHANGELOG.md +73 -0
package/dist/agent/browsing/run.d.ts +9 -2
package/dist/agent/browsing/run.d.ts.map +1 -1
package/dist/agent/browsing/run.js +30 -30
package/dist/agent/browsing/utils.d.ts +1 -14
package/dist/agent/browsing/utils.d.ts.map +1 -1
package/dist/agent/browsing/utils.js +1 -58
package/dist/agent/chat/agent-loop.d.ts +2 -1
package/dist/agent/chat/agent-loop.d.ts.map +1 -1
package/dist/agent/chat/agent-loop.js +42 -34
package/dist/agent/chat/exports.d.ts +5 -6
package/dist/agent/chat/exports.d.ts.map +1 -1
package/dist/agent/chat/exports.js +13 -42
package/dist/agent/chat/index.d.ts +2 -1
package/dist/agent/chat/index.d.ts.map +1 -1
package/dist/agent/chat/index.js +23 -8
package/dist/agent/chat/models.d.ts +6 -0
package/dist/agent/chat/models.d.ts.map +1 -0
package/dist/agent/chat/models.js +37 -0
package/dist/agent/chat/prompt.d.ts.map +1 -1
package/dist/agent/chat/prompt.js +37 -8
package/dist/agent/chat/state.d.ts +31 -10
package/dist/agent/chat/state.d.ts.map +1 -1
package/dist/agent/chat/state.js +132 -27
package/dist/agent/chat/types.d.ts +2 -3
package/dist/agent/chat/types.d.ts.map +1 -1
package/dist/agent/chat/utils.d.ts +14 -0
package/dist/agent/chat/utils.d.ts.map +1 -0
package/dist/agent/chat/utils.js +50 -0
package/dist/agent/master/browser-tests/index.spec.js +6 -6
package/dist/bin/index.js +12 -2
package/dist/bin/utils/index.d.ts +1 -0
package/dist/bin/utils/index.d.ts.map +1 -1
package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +3 -0
package/dist/test-build/index.js +1 -1
package/dist/tool-call-service/index.d.ts +2 -1
package/dist/tool-call-service/index.d.ts.map +1 -1
package/dist/tool-call-service/index.js +51 -71
package/dist/tool-call-service/utils.d.ts +10 -0
package/dist/tool-call-service/utils.d.ts.map +1 -0
package/dist/tool-call-service/utils.js +23 -0
package/dist/tools/download-build.d.ts +9 -0
package/dist/tools/download-build.d.ts.map +1 -1
package/dist/tools/download-build.js +5 -4
package/dist/tools/str_replace_editor.d.ts.map +1 -1
package/dist/tools/str_replace_editor.js +24 -7
package/dist/tools/test-gen-browser.d.ts.map +1 -1
package/dist/tools/test-gen-browser.js +26 -19
package/dist/tools/test-run.d.ts.map +1 -1
package/dist/tools/test-run.js +8 -13
package/dist/utils/checkpoint.d.ts.map +1 -1
package/dist/utils/checkpoint.js +3 -1
package/dist/utils/exec.d.ts +2 -2
package/dist/utils/exec.d.ts.map +1 -1
package/dist/utils/exec.js +5 -4
package/package.json +5 -4
package/tsconfig.tsbuildinfo +1 -1

package/dist/agent/chat/index.js CHANGED Viewed

@@ -11,16 +11,17 @@ const file_tree_1 = require("../../utils/file-tree");
 const git_1 = require("../../utils/git");
 const agent_loop_1 = require("./agent-loop");
 const state_1 = require("./state");
+const DASHBOARD_DOMAIN = process.env.DASHBOARD_DOMAIN || "https://dash.empirical.run";
 function stopCriteria(userPrompt) {
     return userPrompt?.toLowerCase() === "stop";
 }
-function concludeAgent(chatModel, useDiskForChatState, selectedModel) {
+function concludeAgent(chatModel, useDiskForChatState, selectedModel, error) {
     console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + chatModel.getUsageSummary())}`);
     if (useDiskForChatState) {
-        (0, state_1.saveToDisk)(chatModel.messages, selectedModel);
+        (0, state_1.saveToDisk)(chatModel.messages, selectedModel, chatModel.askUserForInput, error);
     }
 }
-async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }) {
+async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, withRetry, }) {
     let chatState;
     if (useDiskForChatState) {
         chatState = (0, state_1.loadChatState)();
@@ -45,8 +46,12 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
             console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
         }
     }
+    // if withRetry set the chatState error null
+    if (withRetry && chatState) {
+        chatState.error = null;
+    }
     const handleSigInt = () => {
-        concludeAgent(chatModel, useDiskForChatState, selectedModel);
+        concludeAgent(chatModel, useDiskForChatState, selectedModel, null);
         process.exit(0);
     };
     process.once("SIGINT", handleSigInt);
@@ -54,7 +59,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
     let userPrompt;
     let reporterFunc = async (chatState, latest) => {
         if (useDiskForChatState) {
-            (0, state_1.saveToDisk)(chatState.messages, selectedModel);
+            (0, state_1.saveToDisk)(chatState.messages, selectedModel, chatState.askUserForInput, chatState.error);
         }
         if (latest) {
             console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
@@ -79,9 +84,14 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
             catch (e) {
                 // https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
                 if (e instanceof Error && e.name === "ExitPromptError") {
-                    concludeAgent(chatModel, useDiskForChatState, selectedModel);
+                    concludeAgent(chatModel, useDiskForChatState, selectedModel, null);
                     process.exit(0);
                 }
+                concludeAgent(chatModel, useDiskForChatState, selectedModel, {
+                    message: e.message,
+                    stack: e.stack || "Stack trace not available",
+                    timestamp: new Date().toISOString(),
+                });
                 throw e;
             }
             if (!stopCriteria(userPrompt)) {
@@ -99,6 +109,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
                 trace,
                 toolCallService,
                 fileInfo,
+                isToolExecutionRemote: false,
             });
         }
     }
@@ -111,7 +122,6 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
     const usageSummary = chatModel.getUsageSummary();
     console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
 }
-const DASHBOARD_DOMAIN = process.env.DASHBOARD_DOMAIN || "https://dash.empirical.run";
 async function getChatSessionFromDashboard(chatSessionId) {
     const response = await fetch(`${DASHBOARD_DOMAIN}/api/chat-sessions/${chatSessionId}`, {
         headers: {
@@ -127,7 +137,11 @@ async function getChatSessionFromDashboard(chatSessionId) {
 }
 async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
     const chatSession = await getChatSessionFromDashboard(chatSessionId);
-    const chatState = chatSession.chat_state;
+    let chatState = chatSession.chat_state;
+    // If not already canonical, migrate to canonical format
+    if (!chatState.version || chatState.version !== state_1.LATEST_CHAT_STATE_VERSION) {
+        chatState = (0, state_1.migrateChatState)(chatState);
+    }
     const branchName = chatSession.branch_name;
     const trace = llm_1.langfuseInstance?.trace({
         id: chatSession.langfuse_trace_id,
@@ -164,5 +178,6 @@ async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
         trace,
         toolCallService,
         fileInfo,
+        isToolExecutionRemote: false,
     });
 }

package/dist/agent/chat/models.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import type { ModelInfo } from "@empiricalrun/shared-types";
+export declare const SUPPORTED_CHAT_MODELS: readonly ModelInfo[];
+export type SupportedChatModels = (typeof SUPPORTED_CHAT_MODELS)[number]["id"];
+export declare const defaultModel: SupportedChatModels;
+export declare const modelLabels: Record<SupportedChatModels, string>;
+//# sourceMappingURL=models.d.ts.map

package/dist/agent/chat/models.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,4BAA4B,CAAC;AAE5D,eAAO,MAAM,qBAAqB,EAAE,SAAS,SAAS,EAqB5C,CAAC;AAEX,MAAM,MAAM,mBAAmB,GAAG,CAAC,OAAO,qBAAqB,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC;AAW/E,eAAO,MAAM,YAAY,EAAE,mBAA6C,CAAC;AAEzE,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAOzD,CAAC"}

package/dist/agent/chat/models.js ADDED Viewed

@@ -0,0 +1,37 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.modelLabels = exports.defaultModel = exports.SUPPORTED_CHAT_MODELS = void 0;
+exports.SUPPORTED_CHAT_MODELS = [
+    {
+        id: "gemini-2.5-pro-preview-03-25",
+        label: "Gemini 2.5 Pro",
+        provider: "google",
+    },
+    {
+        id: "o4-mini-2025-04-16",
+        label: "OpenAI O4 Mini",
+        provider: "openai",
+    },
+    {
+        id: "claude-3-7-sonnet-20250219",
+        label: "Claude 3.7 Sonnet",
+        provider: "claude",
+    },
+    {
+        id: "claude-3-5-sonnet-20241022",
+        label: "Claude 3.5 Sonnet",
+        provider: "claude",
+    },
+];
+const DEFAULT_CHAT_MODEL_ID = "gemini-2.5-pro-preview-03-25";
+function getDefaultChatModelId() {
+    if (!exports.SUPPORTED_CHAT_MODELS.some((m) => m.id === DEFAULT_CHAT_MODEL_ID)) {
+        throw new Error("Default chat model is not in SUPPORTED_CHAT_MODELS");
+    }
+    return DEFAULT_CHAT_MODEL_ID;
+}
+exports.defaultModel = getDefaultChatModelId();
+exports.modelLabels = exports.SUPPORTED_CHAT_MODELS.reduce((acc, model) => ({
+    ...acc,
+    [model.id]: model.label,
+}), {});

package/dist/agent/chat/prompt.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,~~mBAoEzD~~"}
1	+ {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,mBAiGzD"}

package/dist/agent/chat/prompt.js CHANGED Viewed

@@ -52,20 +52,49 @@ The position of the comment is important: the browser agent will look for this c
 the actual code to click on the login button. If you are fixing a failing test, your comment should be
 around the failing line of code, so that it can be replaced/modified.
-# Rules for fixing Playwright tests
+# Proactiveness
+1. You are allowed to be proactive, but ONLY for read-only tool calls: like searching for content, reading files, fetching data from tools, and
+running Playwright tests.
+2. For any read-write tool calls (e.g. modifying any file), you should share your plan and get the user's approval before proceeding.
+# Rules to follow
 You must follow these rules while adding new tests or modifying existing tests. There can be exceptions to these rules, but
 ONLY when explicitly asked for by the user.
-1. Do not add any conditional logic or try catch blocks in a test. A good test deterministically tests a user scenario
-2. Trust Playwright's ability to auto-wait while taking actions on elements. For example, do not add checks on locator.isVisible() before clicking on it: Playwright already does this
-3. Do not add waitForTimeout or waitForLoadState in a test. Playwright will automatically wait for the page to load.
-4. You can't delete some steps from the test to make it pass. The test needs to accomplish its objective (which is to validate a particular user scenario)
+1. You can't delete some steps from the test to make it pass. The test needs to accomplish its objective (which is to validate a particular user scenario)
+2. Do not add any conditional logic or try catch blocks in a test. A good test deterministically tests a user scenario
+3. Trust Playwright's ability to auto-wait while taking actions on elements. For example, do not add checks on locator.isVisible() before clicking on it: Playwright already does this
+4. Do not add waitForTimeout or waitForLoadState in a test. Playwright will automatically wait for the page to load.
+5. Try/catch blocks are a code smell for tests: you should not use them.
+6. Do not use then() or catch() syntax in a test. Use async/await only
-# Proactiveness
+There are few exceptions to these rules. BEFORE applying any of the following exceptions, you MUST share your plan with the user and get their approval.
+## Exceptions for conditional logic
+There are few exceptions where you can add conditional logic to a test. If the application UI reveals some UI elements on certain conditions, we can add conditional logic.
-You are allowed to be proactive, but ONLY for read-only actions, like searching for content, reading files, fetching data from tools, and
-running Playwright tests. For any read-write actions (e.g. modifying any file), you should share your plan and get the user's approval before proceeding.
+For example, a form view shows a "Save" button only when the form is dirty. In this case, we will have to check if the "Save" button is visible before clicking on it. To do this,
+follow this pattern:
+\`\`\`
+const saveButton = page.getByRole('button', { name: 'Save' });
+if (await saveButton.isVisible()) {
+  await saveButton.click();
+}
+\`\`\`
+Note that locator.isVisible() DOES NOT wait for the element to be visible. If the element in question shows up after a delay, we have no option but to add a waitForTimeout.
+\`\`\`
+const saveButton = page.getByRole('button', { name: 'Save' });
+await page.waitForTimeout(100); // Wait for the element to be visible -- only if necessary.
+if (await saveButton.isVisible()) {
+  await saveButton.click();
+}
+\`\`\`
 # Repo context
 ${repoContext}

package/dist/agent/chat/state.d.ts CHANGED Viewed

@@ -1,14 +1,35 @@
 import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
-export declare const CURRENT_CHAT_STATE_VERSION = "20250327.1";
+import { CanonicalMessage, ChatState, ChatStateError } from "@empiricalrun/shared-types";
+export declare const CHAT_STATE_VERSIONS_MIGRATIONS_MAP: Record<string, (state: any) => any>;
+export declare const LATEST_CHAT_STATE_VERSION = "0.1";
 export declare const CHAT_STATE_PATH: string;
-export type ChatStateOnDisk<T> = {
-    version: string;
-    model: SupportedChatModels;
-    messages: T[];
+export declare function createChatState({ userPrompt, existingState, selectedModel, error, }: {
+    userPrompt: string | undefined;
+    existingState: ChatState | undefined;
+    selectedModel: SupportedChatModels;
+    error: ChatStateError | null;
+}): ChatState;
+export declare function createChatStateForMessages({ messages, selectedModel, askUserForInput, error, }: {
+    messages: any;
+    selectedModel: SupportedChatModels;
+    askUserForInput: boolean;
+    error: ChatStateError | null;
+}): ChatState;
+export declare function chatStateFromModel<T>({ chatModel, selectedModel, error, }: {
+    chatModel: IChatModel<T>;
+    selectedModel: SupportedChatModels;
+    error: ChatStateError | null;
+}): ChatState;
+export declare function loadChatState(): ChatState | undefined;
+/**
+ * Migrates a chat state object from an old version to the latest version.
+ * Add migration logic for each version as needed.
+ */
+export declare function migrateChatState(oldState: any): ChatState;
+export declare function saveToDisk<T>(messages: Array<T>, selectedModel: SupportedChatModels, askUserForInput: boolean, error: ChatStateError | null): void;
+export declare function getLatestDownloadBuildUrl(messages: CanonicalMessage[]): string | null;
+export declare function fetchToolCallAvailability(toolRequestId: String, messages: CanonicalMessage[]): {
+    hasToolRequest: boolean;
+    hasToolResponse: boolean;
 };
-export declare function createChatState(userPrompt: string, existingState: ChatStateOnDisk<any> | undefined, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
-export declare function createChatStateForMessages<T>(messages: any, selectedModel: SupportedChatModels): ChatStateOnDisk<T>;
-export declare function chatStateFromModel<T>(chatModel: IChatModel<T>, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
-export declare function loadChatState<T>(): ChatStateOnDisk<T> | undefined;
-export declare function saveToDisk<T>(messages: Array<T>, selectedModel: SupportedChatModels): void;
 //# sourceMappingURL=state.d.ts.map

package/dist/agent/chat/state.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,~~EAEL~~,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;~~AAIhC~~,~~eAAO~~,MAAM,~~0BAA0B~~,~~eAAe,~~CAAC;~~AAEvD~~,eAAO,MAAM,~~eAAe~~,~~QAI3B~~,~~CAAC;AAEF,~~MAAM,MAAM,~~eAAe~~,CAAC,~~CAAC~~,~~IAAI;IAC/B~~,~~OAAO~~,~~EAAE~~,~~MAAM~~,CAAC;~~IAChB~~,~~KAAK~~,~~EAAE~~,~~mBAAmB~~,CAAC;~~IAC3B~~,~~QAAQ~~,~~EAAE~~,~~CAAC~~,~~EAAE~~,CAAC;~~CACf,CAAC;~~AAEF,wBAAgB,eAAe,~~CAC7B~~,UAAU,~~EAAE~~,~~MAAM~~,~~EAClB~~,aAAa,EAAE,~~eAAe~~,~~CAAC~~,GAAG,CAAC,GAAG,SAAS,~~EAC/C~~,aAAa,EAAE,mBAAmB,~~4BAMnC~~;AAED,wBAAgB,0BAA0B,CAAC,~~CAAC~~,~~EAC1C~~,QAAQ,EAAE,GAAG,~~EACb~~,aAAa,EAAE,mBAAmB,~~GACjC~~,eAAe,~~CAAC~~,CAAC,CAAC,~~CAOpB~~;AAED,wBAAgB,kBAAkB,CAAC,CAAC,~~EAClC~~,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,~~EACxB~~,aAAa,EAAE,mBAAmB,~~4BAGnC~~;AAED,wBAAgB,aAAa,~~CAAC~~,~~CAAC~~,~~KAAK~~,~~eAAe~~,~~CAAC~~,~~CAAC~~,CAAC,GAAG,SAAS,~~CAajE~~;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,~~QAsBnC~~"}
1	+ {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,gBAAgB,EAChB,SAAS,EACT,cAAc,EACf,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,GAAG,CAAC;IACd,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,GAAG,SAAS,CASZ;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,EACpC,SAAS,EACT,aAAa,EACb,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAOA;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,GAAG,GAAG,SAAS,CAqBzD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,EACxB,KAAK,EAAE,cAAc,GAAG,IAAI,QAgB7B;AA2BD,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,yBAAyB,CACvC,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,gBAAgB,EAAE;;;EAe7B"}

package/dist/agent/chat/state.js CHANGED Viewed

@@ -3,64 +3,169 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.CHAT_STATE_PATH = exports.CURRENT_CHAT_STATE_VERSION = void 0;
+exports.CHAT_STATE_PATH = exports.LATEST_CHAT_STATE_VERSION = exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP = void 0;
 exports.createChatState = createChatState;
 exports.createChatStateForMessages = createChatStateForMessages;
 exports.chatStateFromModel = chatStateFromModel;
 exports.loadChatState = loadChatState;
+exports.migrateChatState = migrateChatState;
 exports.saveToDisk = saveToDisk;
+exports.getLatestDownloadBuildUrl = getLatestDownloadBuildUrl;
+exports.fetchToolCallAvailability = fetchToolCallAvailability;
 const chat_1 = require("@empiricalrun/llm/chat");
 const fs_1 = __importDefault(require("fs"));
 const path_1 = __importDefault(require("path"));
-exports.CURRENT_CHAT_STATE_VERSION = "20250327.1";
+// Migration wrapper for v20250327.1 -> v0.1 chat state versions
+// v20250327.1 was model-specific, but v0.1 is canonical
+function migrateToV01(oldState) {
+    if (oldState.model &&
+        typeof oldState.model === "string" &&
+        Array.isArray(oldState.messages)) {
+        const provider = (0, chat_1.getProviderForModel)(oldState.model);
+        if (provider === "google") {
+            return {
+                ...oldState,
+                version: "0.1",
+                messages: oldState.messages.map(chat_1.geminiToCanonical),
+            };
+        }
+        else {
+            throw new Error(`Unsupported state for migration with model: ${oldState.model} and version: ${oldState.version}`);
+        }
+    }
+    // If not Gemini or not matching, return the old state
+    return oldState;
+}
+exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP = {
+    "20250327.1": migrateToV01,
+    "0.1": (state) => state,
+};
+exports.LATEST_CHAT_STATE_VERSION = "0.1";
 exports.CHAT_STATE_PATH = path_1.default.join(process.cwd(), ".empiricalrun", "last-chat.json");
-function createChatState(userPrompt, existingState, selectedModel) {
+function createChatState({ userPrompt, existingState, selectedModel, error, }) {
     const messages = existingState?.messages || [];
     const chatModel = (0, chat_1.createChatModel)(messages, selectedModel);
-    chatModel.pushUserMessage(userPrompt);
-    return createChatStateForMessages(chatModel.messages, selectedModel);
+    if (userPrompt) {
+        chatModel.pushUserMessage(userPrompt);
+    }
+    return createChatStateForMessages({
+        messages: chatModel.messages,
+        selectedModel,
+        askUserForInput: chatModel.askUserForInput,
+        error,
+    });
 }
-function createChatStateForMessages(messages, selectedModel) {
+function createChatStateForMessages({ messages, selectedModel, askUserForInput, error, }) {
     // TODO: Add better types for messages
     return {
-        version: exports.CURRENT_CHAT_STATE_VERSION,
+        version: exports.LATEST_CHAT_STATE_VERSION,
         model: selectedModel,
         messages: messages,
+        askUserForInput: askUserForInput,
+        error: error,
     };
 }
-function chatStateFromModel(chatModel, selectedModel) {
-    return createChatStateForMessages(chatModel.messages, selectedModel);
+function chatStateFromModel({ chatModel, selectedModel, error, }) {
+    return createChatStateForMessages({
+        messages: chatModel.messages,
+        selectedModel,
+        askUserForInput: chatModel.askUserForInput,
+        error,
+    });
 }
 function loadChatState() {
     if (!fs_1.default.existsSync(exports.CHAT_STATE_PATH)) {
         return undefined;
     }
     const raw = fs_1.default.readFileSync(exports.CHAT_STATE_PATH, "utf8");
-    const state = JSON.parse(raw);
-    if (state.version !== exports.CURRENT_CHAT_STATE_VERSION) {
-        throw new Error(`Unsupported chat state v${state.version}. Expected v${exports.CURRENT_CHAT_STATE_VERSION}.`);
+    let state = JSON.parse(raw);
+    // Always migrate to the latest version after loading
+    const migratedState = migrateChatState(state);
+    // Only save if migration actually changed the state
+    if (JSON.stringify(state) !== JSON.stringify(migratedState)) {
+        fs_1.default.writeFileSync(exports.CHAT_STATE_PATH, JSON.stringify(migratedState, null, 2));
     }
-    return state;
+    return migratedState;
 }
-function saveToDisk(messages, selectedModel) {
-    const statePath = exports.CHAT_STATE_PATH;
-    let existingState = {
-        version: exports.CURRENT_CHAT_STATE_VERSION,
-        model: selectedModel,
-        messages: [],
+/**
+ * Migrates a chat state object from an old version to the latest version.
+ * Add migration logic for each version as needed.
+ */
+function migrateChatState(oldState) {
+    if (!oldState || Object.keys(oldState).length === 0) {
+        return oldState;
+    }
+    if (!oldState.version) {
+        throw new Error("No version found in chat state");
+    }
+    if (!exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP[oldState.version]) {
+        throw new Error(`No migration function found for version: ${oldState.version}`);
+    }
+    if (oldState.version === exports.LATEST_CHAT_STATE_VERSION) {
+        return oldState;
+    }
+    const migrateFn = exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP[oldState.version];
+    const migrated = migrateFn(oldState);
+    return {
+        version: exports.LATEST_CHAT_STATE_VERSION,
+        ...migrated,
     };
+}
+function saveToDisk(messages, selectedModel, askUserForInput, error) {
+    const statePath = exports.CHAT_STATE_PATH;
     // Ensure directory exists before trying to read/write
     const dirname = path_1.default.dirname(statePath);
     if (!fs_1.default.existsSync(dirname)) {
         fs_1.default.mkdirSync(dirname, { recursive: true });
     }
-    if (fs_1.default.existsSync(statePath)) {
-        existingState = JSON.parse(fs_1.default.readFileSync(statePath, "utf8"));
-    }
-    const newState = {
-        ...existingState,
-        messages: messages,
-        model: selectedModel,
-    };
+    // Use the helper to build the new state
+    const newState = createChatStateForMessages({
+        messages,
+        selectedModel,
+        askUserForInput,
+        error,
+    });
     fs_1.default.writeFileSync(statePath, JSON.stringify(newState, null, 2));
 }
+function findLatestToolCall(messages, toolName) {
+    return messages
+        .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())
+        .find((m) => m.parts.some((p) => "toolCall" in p && p.toolCall?.name === toolName));
+}
+function findSuccessfulToolResponse(messages, toolCallId) {
+    return messages.find((m) => m.parts.some((p) => {
+        if ("toolResult" in p) {
+            return p.toolCallId === toolCallId && !p.toolResult.isError;
+        }
+        return false;
+    }));
+}
+function getLatestDownloadBuildUrl(messages) {
+    const toolCallMessage = findLatestToolCall(messages, "downloadBuild");
+    if (!toolCallMessage)
+        return null;
+    const toolCallPart = toolCallMessage.parts.find((p) => "toolCall" in p && p.toolCall?.name === "downloadBuild");
+    if (!toolCallPart || !("toolCall" in toolCallPart))
+        return null;
+    const toolCallId = toolCallPart.toolCall.id;
+    const toolResponseMessage = findSuccessfulToolResponse(messages, toolCallId);
+    if (!toolResponseMessage)
+        return null;
+    const input = toolCallPart.toolCall.input;
+    return input.buildUrl;
+}
+function fetchToolCallAvailability(toolRequestId, messages) {
+    let hasToolRequest = false;
+    let hasToolResponse = false;
+    messages.forEach((message) => {
+        message.parts.forEach((part) => {
+            if ("toolCall" in part && part.toolCall?.id === toolRequestId) {
+                hasToolRequest = true;
+            }
+            if ("toolResult" in part && part.toolCallId === toolRequestId) {
+                hasToolResponse = true;
+            }
+        });
+    });
+    return { hasToolRequest, hasToolResponse };
+}

package/dist/agent/chat/types.d.ts CHANGED Viewed

@@ -1,9 +1,8 @@
-import { ChatStateOnDisk } from "./state";
-export type SupportedChatModels = "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25" | "o4-mini-2025-04-16";
+import { ChatState } from "@empiricalrun/shared-types";
 type LatestMessage = {
     role: string;
     textMessage: string;
 };
-export type ReporterFunction = (state: ChatStateOnDisk<any>, latestHumanReadableMessage: LatestMessage | undefined) => Promise<void>;
+export type ReporterFunction = (state: ChatState, latestHumanReadableMessage: LatestMessage | undefined) => Promise<void>;
 export {};
 //# sourceMappingURL=types.d.ts.map

package/dist/agent/chat/types.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,~~eAAe~~,EAAE,MAAM,~~SAAS,CAAC;AAE1C,MAAM,MAAM,mBAAmB,GAC3B,~~4BAA4B,~~GAC5B,4BAA4B,GAC5B,8BAA8B,GAC9B,oBAAoB,~~CAAC;~~AAEzB~~,KAAK,aAAa,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAC7B,KAAK,EAAE,~~eAAe~~,~~CAAC~~,~~GAAG,CAAC,EAC3B,~~0BAA0B,EAAE,aAAa,GAAG,SAAS,KAClD,OAAO,CAAC,IAAI,CAAC,CAAC"}
1	+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,4BAA4B,CAAC;AAEvD,KAAK,aAAa,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAC7B,KAAK,EAAE,SAAS,EAChB,0BAA0B,EAAE,aAAa,GAAG,SAAS,KAClD,OAAO,CAAC,IAAI,CAAC,CAAC"}

package/dist/agent/chat/utils.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+import { TraceClient } from "@empiricalrun/llm";
+import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
+import { ReporterFunction } from "./types";
+export declare const log: (...args: any[]) => void;
+export declare function getModelName(model: string): string;
+export declare function handleAgentError({ context, error, chatModel, selectedModel, reporter, trace, }: {
+    context: string;
+    error: unknown;
+    chatModel: IChatModel<any>;
+    selectedModel: SupportedChatModels;
+    reporter: ReporterFunction;
+    trace?: TraceClient;
+}): Promise<void>;
+//# sourceMappingURL=utils.d.ts.map

package/dist/agent/chat/utils.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAEL,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAKhC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,GAAG,EAAE,SAEjC,CAAC;AAcF,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,OAAO,EACP,KAAK,EACL,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAsBA"}

package/dist/agent/chat/utils.js ADDED Viewed

@@ -0,0 +1,50 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.log = void 0;
+exports.getModelName = getModelName;
+exports.handleAgentError = handleAgentError;
+const chat_1 = require("@empiricalrun/llm/chat");
+const picocolors_1 = require("picocolors");
+const state_1 = require("./state");
+const log = (...args) => {
+    console.log((0, picocolors_1.gray)(args.join(" ")));
+};
+exports.log = log;
+function logError(context, error, trace) {
+    console.error((0, picocolors_1.gray)(`[Error in ${context}]:`), error instanceof Error ? error.stack || error.message : error);
+    trace?.update({
+        output: {
+            error: error instanceof Error ? error.message : String(error),
+        },
+    });
+}
+function getModelName(model) {
+    const provider = (0, chat_1.getProviderForModel)(model);
+    if (provider === "claude")
+        return "Claude";
+    if (provider === "google")
+        return "Gemini";
+    if (provider === "openai")
+        return "o4";
+    return "AI";
+}
+async function handleAgentError({ context, error, chatModel, selectedModel, reporter, trace, }) {
+    const errorObject = {
+        message: error.message,
+        stack: error.stack || "Stack trace not available",
+        timestamp: new Date().toISOString(),
+    };
+    await reporter((0, state_1.chatStateFromModel)({
+        chatModel,
+        selectedModel,
+        error: errorObject,
+    }), chatModel.getHumanReadableLatestMessage());
+    trace?.update({
+        output: {
+            error: errorObject,
+        },
+    });
+    if (context) {
+        logError(context, error, trace);
+    }
+}

package/dist/agent/master/browser-tests/index.spec.js CHANGED Viewed

@@ -9,7 +9,7 @@ const utils_1 = require("../../browsing/utils");
 const element_annotation_1 = require("../element-annotation");
 const run_1 = require("../run");
 const fixtures_1 = require("./fixtures");
-(0, fixtures_1.test)("able to scroll and interact with elements", async ({ page, server }) => {
+fixtures_1.test.skip("able to scroll and interact with elements", async ({ page, server, }) => {
     await page.goto(`${server.baseURL}/blog-page.html`);
     const response = await (0, run_1.createTestUsingMasterAgent)({
         task: `fill test@test.com into the email field and click the submit`,
@@ -21,7 +21,7 @@ const fixtures_1 = require("./fixtures");
     (0, fixtures_1.expect)(response.code).toContain("await page.getByPlaceholder('Enter your email').fill(\"test@test.com\")");
     (0, fixtures_1.expect)(response.code).toContain("await page.getByRole('button', { name: 'Subscribe' }).click()");
 });
-(0, fixtures_1.test)("scroll when element does not exist", async ({ page, server }) => {
+fixtures_1.test.skip("scroll when element does not exist", async ({ page, server }) => {
     await page.goto(`${server.baseURL}/blog-page.html`);
     const response = await (0, run_1.createTestUsingMasterAgent)({
         task: `click search button`,
@@ -32,7 +32,7 @@ const fixtures_1 = require("./fixtures");
     (0, fixtures_1.expect)(response.importPaths.length).toBe(0);
     (0, fixtures_1.expect)(response.code.length).toBe(0);
 });
-(0, fixtures_1.test)("scroll and click inside div elements", async ({ page, server }) => {
+fixtures_1.test.skip("scroll and click inside div elements", async ({ page, server }) => {
     await page.goto(`${server.baseURL}/dropdown-scrolls.html`);
     const response = await (0, run_1.createTestUsingMasterAgent)({
         task: `click on x-3 inside bmw dropdown, and then,
@@ -49,7 +49,7 @@ click on maverick inside ford dropdown`,
     (0, fixtures_1.expect)(lines.find((l) => l.match(/^await page.+Ford.+.click/))).toBeTruthy();
     (0, fixtures_1.expect)(lines.find((l) => l.match(/^await page.+Maverick.+.click/))).toBeTruthy();
 });
-(0, fixtures_1.test)("master agent can click icons accurately", async ({ page, server }) => {
+fixtures_1.test.skip("master agent can click icons accurately", async ({ page, server, }) => {
     await page.goto(`${server.baseURL}/icons-navbar.html`);
     await (0, fixtures_1.expect)(page.getByText("select an icon")).toBeVisible();
     const response = await (0, run_1.createTestUsingMasterAgent)({
@@ -86,7 +86,7 @@ click on maverick inside ford dropdown`,
     (0, fixtures_1.expect)(response.code).toContain("page.locator");
     (0, fixtures_1.expect)(response.code).toContain("click()");
 });
-(0, fixtures_1.test)("annotate and enrich annotations correctly", async ({ page, server }) => {
+fixtures_1.test.skip("annotate and enrich annotations correctly", async ({ page, server, }) => {
     await (0, utils_1.injectPwLocatorGenerator)(page);
     await page.goto(`${server.baseURL}/iframe-elements.html`);
     const { annotationKeys: keys } = await (0, element_annotation_1.getAnnotationKeys)({
@@ -105,7 +105,7 @@ click on maverick inside ford dropdown`,
     // 2 clickable divs: 1 in main frame, 1 in iframe
     (0, fixtures_1.expect)(keys.filter((k) => k.text.includes("Lorem Ipsum")).length).toBe(2);
 });
-(0, fixtures_1.test)("fill action with multiple pages", async ({ context }) => {
+fixtures_1.test.skip("fill action with multiple pages", async ({ context }) => {
     const page1 = await context.newPage();
     const page2 = await context.newPage();
     const response = await (0, run_1.createTestUsingMasterAgent)({