npm - @empiricalrun/test-gen - Versions diffs - 0.53.4 → 0.53.6 - Mend

@empiricalrun/test-gen 0.53.4 → 0.53.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +24 -0
package/dist/agent/chat/agent-loop.d.ts +10 -0
package/dist/agent/chat/agent-loop.d.ts.map +1 -0
package/dist/agent/chat/agent-loop.js +91 -0
package/dist/agent/chat/index.d.ts +10 -5
package/dist/agent/chat/index.d.ts.map +1 -1
package/dist/agent/chat/index.js +108 -107
package/dist/agent/chat/model.d.ts +4 -0
package/dist/agent/chat/model.d.ts.map +1 -0
package/dist/agent/chat/model.js +14 -0
package/dist/agent/chat/state.d.ts +14 -0
package/dist/agent/chat/state.d.ts.map +1 -0
package/dist/agent/chat/state.js +63 -0
package/dist/agent/chat/types.d.ts +9 -0
package/dist/agent/chat/types.d.ts.map +1 -0
package/dist/agent/chat/types.js +2 -0
package/dist/agent/cua/index.d.ts +3 -1
package/dist/agent/cua/index.d.ts.map +1 -1
package/dist/agent/cua/index.js +10 -22
package/dist/agent/cua/model.d.ts +3 -1
package/dist/agent/cua/model.d.ts.map +1 -1
package/dist/agent/cua/model.js +3 -7
package/dist/bin/index.js +21 -5
package/dist/bin/utils/index.d.ts +1 -0
package/dist/bin/utils/index.d.ts.map +1 -1
package/package.json +5 -17

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,29 @@
 # @empiricalrun/test-gen
+## 0.53.6
+### Patch Changes
+- a3a1863: refactor: split chatagent into cli runner and agent loop
+- a32c076: feat: enabled LLM tracing for chat agent
+- eb89698: feat: used langfuse LLM tracing for claude and gemini usage
+- 9cc17cc: fix: import for chat state for dashboard
+- 17fcf83: feat: chat agent fetches and reports to the dashboard
+- 1c1fd00: feat: expose chatagent methods, starting with createChatState
+- c4c5a32: refactor: make chatmodels stateless and elevate state to chatagent
+- 48702e0: feat: checkout chat session branch before running chat agent
+- Updated dependencies [eb89698]
+- Updated dependencies [c4c5a32]
+  - @empiricalrun/llm@0.14.5
+## 0.53.5
+### Patch Changes
+- 9f3cb10: feat: automated tracing for LLM call overlay dismiss
+- Updated dependencies [9f3cb10]
+  - @empiricalrun/llm@0.14.4
 ## 0.53.4
 ### Patch Changes

package/dist/agent/chat/agent-loop.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+import { TraceClient } from "@empiricalrun/llm";
+import { IChatModel } from "@empiricalrun/llm/chat";
+import { ReporterFunction, SupportedChatModels } from "./types";
+export declare function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }: {
+    chatModel: IChatModel<any>;
+    selectedModel: SupportedChatModels;
+    reporter: ReporterFunction;
+    trace?: TraceClient;
+}): Promise<void>;
+//# sourceMappingURL=agent-loop.d.ts.map

package/dist/agent/chat/agent-loop.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAEL,UAAU,EAMX,MAAM,wBAAwB,CAAC;AAYhC,OAAO,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAyChE,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBA2CA"}

package/dist/agent/chat/agent-loop.js ADDED Viewed

@@ -0,0 +1,91 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.chatAgentLoop = void 0;
+const chat_1 = require("@empiricalrun/llm/chat");
+const picocolors_1 = require("picocolors");
+const web_1 = require("../../bin/utils/platform/web");
+const commit_and_create_pr_1 = require("../../tools/commit-and-create-pr");
+const diagnosis_fetcher_1 = require("../../tools/diagnosis-fetcher");
+const grep_1 = require("../../tools/grep");
+const test_gen_browser_1 = require("../../tools/test-gen-browser");
+const test_run_1 = require("../../tools/test-run");
+const test_run_fetcher_1 = require("../../tools/test-run-fetcher");
+const prompt_1 = require("./prompt");
+const state_1 = require("./state");
+function getTools(selectedModel) {
+    let tools = [
+        grep_1.grepTool,
+        test_run_1.runTestTool,
+        test_run_fetcher_1.fetchTestRunReportTool,
+        diagnosis_fetcher_1.fetchDiagnosisReportTool,
+        test_gen_browser_1.generateTestWithBrowserAgent,
+        commit_and_create_pr_1.commitAndPushChangesTool,
+    ];
+    if (selectedModel.startsWith("gemini")) {
+        // Claude will have its own built-in text editor tools
+        chat_1.textEditorTools.forEach((tool) => {
+            const originalExecute = tool.execute;
+            tool.execute = (input) => originalExecute(input, web_1.validateTypescript);
+        });
+        tools.push(...chat_1.textEditorTools);
+    }
+    const toolExecutors = {
+        ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
+    };
+    if (selectedModel.startsWith("claude")) {
+        toolExecutors.str_replace_editor = (input) => (0, chat_1.strReplaceEditorExecutor)(input, web_1.validateTypescript);
+    }
+    return { tools, toolExecutors };
+}
+function getModelName(model) {
+    if (model.startsWith("claude"))
+        return "Claude";
+    if (model.startsWith("gemini"))
+        return "Gemini";
+    return "AI";
+}
+const log = (...args) => {
+    console.log((0, picocolors_1.gray)(args.join(" ")));
+};
+async function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }) {
+    const systemPrompt = await (0, prompt_1.buildSystemPrompt)();
+    const { tools, toolExecutors } = getTools(selectedModel);
+    while (!chatModel.askUserForInput) {
+        const toolCalls = chatModel.getPendingToolCalls();
+        if (toolCalls.length) {
+            const toolResults = [];
+            for (const call of toolCalls) {
+                const args = JSON.stringify(call.input);
+                log(`Executing tool ${call.name} with args: ${args}`);
+                const toolExecutor = toolExecutors[call.name];
+                if (!toolExecutor) {
+                    throw new Error(`Tool ${call.name} not found`);
+                }
+                const callResponse = await toolExecutor(call.input);
+                if (callResponse.isError) {
+                    log(`Tool ${call.name} failed: ${callResponse.result}`);
+                }
+                else {
+                    log(`Tool ${call.name} completed`);
+                }
+                toolResults.push(callResponse);
+            }
+            chatModel.pushToolResultsMessage(toolCalls, toolResults);
+        }
+        log(`${getModelName(selectedModel)} is working...`);
+        const response = await chatModel.getLLMResponse({
+            systemPrompt,
+            tools: tools.map((tool) => (0, chat_1.zodToOpenAITool)(tool.schema)),
+            selectedModel,
+            trace,
+        });
+        if (!response) {
+            throw new Error("No response from LLM");
+        }
+        chatModel.pushMessage(response);
+        const latest = chatModel.getHumanReadableLatestMessage();
+        await reporter((0, state_1.chatStateFromModel)(chatModel), latest);
+    }
+    (0, chat_1.cleanupBackupFiles)(process.cwd());
+}
+exports.chatAgentLoop = chatAgentLoop;

package/dist/agent/chat/index.d.ts CHANGED Viewed

@@ -1,6 +1,11 @@
-export declare function chatAgent({ selectedModel, useDiskForChatState, initialPromptContent, }: {
-    selectedModel?: "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
-    useDiskForChatState?: boolean;
-    initialPromptContent?: string;
-}): Promise<string>;
+import { SupportedChatModels } from "./types";
+export declare function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }: {
+    selectedModel: SupportedChatModels;
+    useDiskForChatState: boolean;
+    initialPromptContent: string | undefined;
+}): Promise<void>;
+export declare function runChatAgentForDashboard({ chatSessionId, selectedModel, }: {
+    selectedModel: SupportedChatModels;
+    chatSessionId: number;
+}): Promise<void>;
 //# sourceMappingURL=index.d.ts.map

package/dist/agent/chat/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"~~AA2FA~~,~~wBAAsB~~,SAAS,CAAC,~~EAC9B~~,~~aAA4C~~,~~EAC5C~~,~~mBAA2B~~,~~EAC3B~~,oBAAoB,GACrB,EAAE;IACD,aAAa,CAAC,~~EACV~~,~~4BAA4B~~,~~GAC5B~~,~~4BAA4B~~,~~GAC5B~~,~~8BAA8B~~,CAAC;~~IACnC~~,~~mBAAmB~~,CAAC,EAAE,~~OAAO~~,CAAC;~~IAC9B~~,~~oBAAoB~~,~~CAAC,~~EAAE,MAAM,CAAC;~~CAC/B~~,~~mBAyFA~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AASA,OAAO,EAAoB,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAgBhE,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBAoFA;AA+BD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA8BA"}

package/dist/agent/chat/index.js CHANGED Viewed

@@ -1,69 +1,29 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.chatAgent = void 0;
-const chat_1 = require("@empiricalrun/llm/chat");
+exports.runChatAgentForDashboard = exports.runChatAgentForCLI = void 0;
+const llm_1 = require("@empiricalrun/llm");
+const child_process_1 = require("child_process");
 const picocolors_1 = require("picocolors");
-const web_1 = require("../../bin/utils/platform/web");
 const human_in_the_loop_1 = require("../../human-in-the-loop");
-const commit_and_create_pr_1 = require("../../tools/commit-and-create-pr");
-const diagnosis_fetcher_1 = require("../../tools/diagnosis-fetcher");
-const grep_1 = require("../../tools/grep");
-const test_gen_browser_1 = require("../../tools/test-gen-browser");
-const test_run_1 = require("../../tools/test-run");
-const test_run_fetcher_1 = require("../../tools/test-run-fetcher");
-const prompt_1 = require("./prompt");
-function getTools(selectedModel) {
-    let tools = [
-        grep_1.grepTool,
-        test_run_1.runTestTool,
-        test_run_fetcher_1.fetchTestRunReportTool,
-        diagnosis_fetcher_1.fetchDiagnosisReportTool,
-        test_gen_browser_1.generateTestWithBrowserAgent,
-        commit_and_create_pr_1.commitAndPushChangesTool,
-    ];
-    if (selectedModel.startsWith("gemini")) {
-        // Claude will have its own built-in text editor tools
-        chat_1.textEditorTools.forEach((tool) => {
-            const originalExecute = tool.execute;
-            tool.execute = (input) => originalExecute(input, web_1.validateTypescript);
-        });
-        tools.push(...chat_1.textEditorTools);
-    }
-    const toolExecutors = {
-        // TODO: Add validateTypescript
-        ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
-    };
-    if (selectedModel.startsWith("claude")) {
-        toolExecutors.str_replace_editor = (input) => (0, chat_1.strReplaceEditorExecutor)(input, web_1.validateTypescript);
-    }
-    return { tools, toolExecutors };
-}
-function createChatModel(useDiskForChatState, selectedModel) {
-    if (selectedModel.startsWith("claude")) {
-        return new chat_1.ClaudeChatModel(useDiskForChatState);
-    }
-    if (selectedModel.startsWith("gemini")) {
-        return new chat_1.GeminiChatModel(useDiskForChatState);
-    }
-    throw new Error(`Unsupported model: ${selectedModel}`);
-}
-function getModelName(model) {
-    if (model.startsWith("claude"))
-        return "Claude";
-    if (model.startsWith("gemini"))
-        return "Gemini";
-    return "AI";
-}
-function concludeAgent(usageSummary) {
-    console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
-    (0, chat_1.cleanupBackupFiles)(process.cwd());
-}
+const agent_loop_1 = require("./agent-loop");
+const model_1 = require("./model");
+const state_1 = require("./state");
 function stopCriteria(userPrompt) {
     return userPrompt?.toLowerCase() === "stop";
 }
-async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDiskForChatState = false, initialPromptContent, }) {
-    let chatModel = createChatModel(useDiskForChatState, selectedModel);
-    let userPrompt = undefined;
+function concludeAgent(chatModel, useDiskForChatState) {
+    console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + chatModel.getUsageSummary())}`);
+    if (useDiskForChatState) {
+        (0, state_1.saveToDisk)(chatModel.messages);
+    }
+}
+async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }) {
+    let chatState;
+    if (useDiskForChatState) {
+        chatState = (0, state_1.loadChatState)();
+    }
+    let messagesLoadedFromDisk = chatState?.messages || [];
+    let chatModel = (0, model_1.createChatModel)(messagesLoadedFromDisk, selectedModel);
     if (initialPromptContent && chatModel.messages.length === 0) {
         chatModel.pushUserMessage(initialPromptContent);
         chatModel.askUserForInput = false;
@@ -71,22 +31,37 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
     else if (initialPromptContent && chatModel.messages.length > 0) {
         console.warn(`Ignoring initial prompt because we have existing messages.`);
     }
+    if (chatModel.askUserForInput) {
+        // Show last message to the user for context when we loaded from disk
+        const latest = chatModel.getHumanReadableLatestMessage();
+        if (latest) {
+            console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
+        }
+    }
     const handleSigInt = () => {
-        concludeAgent(chatModel.getUsageSummary());
+        concludeAgent(chatModel, useDiskForChatState);
         process.exit(0);
     };
     process.once("SIGINT", handleSigInt);
     process.once("SIGTERM", handleSigInt);
-    const ora = (await import("ora")).default;
-    if (chatModel.askUserForInput) {
-        // Show last message to the user for context when we loaded from disk
-        const latest = chatModel.getHumanReadableLatestMessage();
+    let userPrompt;
+    let reporterFunc = async (chatState, latest) => {
+        if (useDiskForChatState) {
+            (0, state_1.saveToDisk)(chatState.messages);
+        }
         if (latest) {
-            console.log(`${latest.role}: ${latest.textMessage}`);
+            console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
         }
+    };
+    const trace = (0, llm_1.createLangfuseTrace)({
+        name: "chat_agent",
+        input: initialPromptContent || "",
+        tags: [selectedModel, "chat_agent"],
+    });
+    if (trace) {
+        const traceUrl = trace.getTraceUrl();
+        console.log(`Starting ${selectedModel}: ${traceUrl}`);
     }
-    const systemPrompt = await (0, prompt_1.buildSystemPrompt)();
-    const { tools, toolExecutors } = getTools(selectedModel);
     while (!stopCriteria(userPrompt)) {
         if (chatModel.askUserForInput) {
             try {
@@ -97,7 +72,7 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
             catch (e) {
                 // https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
                 if (e instanceof Error && e.name === "ExitPromptError") {
-                    concludeAgent(chatModel.getUsageSummary());
+                    concludeAgent(chatModel, useDiskForChatState);
                     process.exit(0);
                 }
                 throw e;
@@ -105,47 +80,73 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
             if (!stopCriteria(userPrompt)) {
                 chatModel.pushUserMessage(userPrompt);
             }
-            continue;
         }
-        const toolCalls = chatModel.getPendingToolCalls();
-        if (toolCalls.length) {
-            const toolResults = [];
-            for (const call of toolCalls) {
-                const args = JSON.stringify(call.input);
-                console.log(`Executing tool ${call.name} with args: ${args}`);
-                const toolExecutor = toolExecutors[call.name];
-                if (!toolExecutor) {
-                    throw new Error(`Tool ${call.name} not found`);
-                }
-                const callResponse = await toolExecutor(call.input);
-                if (callResponse.isError) {
-                    ora(`Tool ${call.name} failed: ${callResponse.result}`).fail();
-                }
-                else {
-                    ora(`Tool ${call.name} completed`).succeed();
-                }
-                toolResults.push(callResponse);
-            }
-            chatModel.pushToolResultsMessage(toolCalls, toolResults);
-        }
-        const spinner = ora(`${getModelName(selectedModel)} is working...`).start();
-        const response = await chatModel.getLLMResponse({
-            systemPrompt,
-            tools: tools.map((tool) => (0, chat_1.zodToOpenAITool)(tool.schema)),
-            selectedModel,
-        });
-        spinner.stop();
-        if (!response) {
-            throw new Error("No response from LLM");
-        }
-        chatModel.pushMessage(response);
-        const latest = chatModel.getHumanReadableLatestMessage();
-        if (latest) {
-            console.log(`${latest.role}: ${latest.textMessage}`);
+        else {
+            // TODO: Should we pass a loader function? That would allow us to show a spinner
+            await (0, agent_loop_1.chatAgentLoop)({
+                chatModel,
+                selectedModel,
+                reporter: reporterFunc,
+                trace,
+            });
         }
     }
+    trace?.update({
+        output: {
+            messages: chatModel.messages,
+        },
+    });
+    await llm_1.langfuseInstance?.flushAsync();
     const usageSummary = chatModel.getUsageSummary();
-    concludeAgent(usageSummary);
-    return usageSummary;
+    console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
+}
+exports.runChatAgentForCLI = runChatAgentForCLI;
+const DASHBOARD_DOMAIN = process.env.DASHBOARD_DOMAIN || "https://dash.empirical.run";
+async function getChatSessionFromDashboard(chatSessionId) {
+    const response = await fetch(`${DASHBOARD_DOMAIN}/api/chat-sessions/${chatSessionId}`, {
+        headers: {
+            "Content-Type": "application/json",
+            Authorization: `weQPMWKT`,
+        },
+    });
+    const data = await response.json();
+    return data.data.chat_session;
+}
+async function checkoutBranch(branchName) {
+    // TODO: This assumes repoDir is process.cwd()
+    try {
+        (0, child_process_1.execSync)(`git checkout ${branchName}`);
+    }
+    catch (e) {
+        // If branch doesn't exist, create it
+        (0, child_process_1.execSync)(`git checkout -b ${branchName}`);
+    }
+}
+async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
+    const chatSession = await getChatSessionFromDashboard(chatSessionId);
+    const chatState = chatSession.chat_state;
+    const branchName = chatSession.branch_name;
+    await checkoutBranch(branchName);
+    let chatModel = (0, model_1.createChatModel)(chatState.messages, selectedModel);
+    let reporterFunc = async (chatState, latest) => {
+        const response = await fetch(`${DASHBOARD_DOMAIN}/api/chat-sessions/${chatSessionId}`, {
+            method: "PATCH",
+            body: JSON.stringify({
+                chat_state: chatState,
+                last_assistant_message: latest?.textMessage,
+            }),
+            headers: {
+                "Content-Type": "application/json",
+                Authorization: `weQPMWKT`,
+            },
+        });
+        const data = await response.json();
+        console.log(`Patch request sent for chat session: ${JSON.stringify(data)}`);
+    };
+    await (0, agent_loop_1.chatAgentLoop)({
+        chatModel,
+        selectedModel,
+        reporter: reporterFunc,
+    });
 }
-exports.chatAgent = chatAgent;
+exports.runChatAgentForDashboard = runChatAgentForDashboard;

package/dist/agent/chat/model.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+import { IChatModel } from "@empiricalrun/llm/chat";
+import { SupportedChatModels } from "./types";
+export declare function createChatModel(messages: any[], selectedModel: SupportedChatModels): IChatModel<any>;
+//# sourceMappingURL=model.d.ts.map

package/dist/agent/chat/model.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/model.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,UAAU,EACX,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE9C,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,GAAG,EAAE,EACf,aAAa,EAAE,mBAAmB,GACjC,UAAU,CAAC,GAAG,CAAC,CAQjB"}

package/dist/agent/chat/model.js ADDED Viewed

@@ -0,0 +1,14 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.createChatModel = void 0;
+const chat_1 = require("@empiricalrun/llm/chat");
+function createChatModel(messages, selectedModel) {
+    if (selectedModel.startsWith("claude")) {
+        return new chat_1.ClaudeChatModel(messages);
+    }
+    if (selectedModel.startsWith("gemini")) {
+        return new chat_1.GeminiChatModel(messages);
+    }
+    throw new Error(`Unsupported model: ${selectedModel}`);
+}
+exports.createChatModel = createChatModel;

package/dist/agent/chat/state.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+import { IChatModel } from "@empiricalrun/llm/chat";
+import { SupportedChatModels } from "./types";
+export declare const CURRENT_CHAT_STATE_VERSION = "20250327.1";
+export declare const CHAT_STATE_PATH: string;
+export type ChatStateOnDisk<T> = {
+    version: typeof CURRENT_CHAT_STATE_VERSION;
+    messages: T[];
+};
+export declare function createChatState(userPrompt: string, existingState: ChatStateOnDisk<any>, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
+export declare function createChatStateForMessages<T>(messages: any): ChatStateOnDisk<T>;
+export declare function chatStateFromModel<T>(chatModel: IChatModel<T>): ChatStateOnDisk<unknown>;
+export declare function loadChatState<T>(): ChatStateOnDisk<T> | undefined;
+export declare function saveToDisk<T>(messages: Array<T>): void;
+//# sourceMappingURL=state.d.ts.map

package/dist/agent/chat/state.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAKpD,OAAO,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE9C,eAAO,MAAM,0BAA0B,eAAe,CAAC;AAEvD,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,MAAM,MAAM,eAAe,CAAC,CAAC,IAAI;IAC/B,OAAO,EAAE,OAAO,0BAA0B,CAAC;IAC3C,QAAQ,EAAE,CAAC,EAAE,CAAC;CACf,CAAC;AAEF,wBAAgB,eAAe,CAC7B,UAAU,EAAE,MAAM,EAClB,aAAa,EAAE,eAAe,CAAC,GAAG,CAAC,EACnC,aAAa,EAAE,mBAAmB,4BAMnC;AAED,wBAAgB,0BAA0B,CAAC,CAAC,EAC1C,QAAQ,EAAE,GAAG,GACZ,eAAe,CAAC,CAAC,CAAC,CAMpB;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,4BAE7D;AAED,wBAAgB,aAAa,CAAC,CAAC,KAAK,eAAe,CAAC,CAAC,CAAC,GAAG,SAAS,CAajE;AAED,wBAAgB,UAAU,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,QAmB/C"}

package/dist/agent/chat/state.js ADDED Viewed

@@ -0,0 +1,63 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.saveToDisk = exports.loadChatState = exports.chatStateFromModel = exports.createChatStateForMessages = exports.createChatState = exports.CHAT_STATE_PATH = exports.CURRENT_CHAT_STATE_VERSION = void 0;
+const fs_1 = __importDefault(require("fs"));
+const path_1 = __importDefault(require("path"));
+const model_1 = require("./model");
+exports.CURRENT_CHAT_STATE_VERSION = "20250327.1";
+exports.CHAT_STATE_PATH = path_1.default.join(process.cwd(), ".empiricalrun", "last-chat.json");
+function createChatState(userPrompt, existingState, selectedModel) {
+    const messages = existingState.messages || [];
+    const chatModel = (0, model_1.createChatModel)(messages, selectedModel);
+    chatModel.pushUserMessage(userPrompt);
+    return createChatStateForMessages(chatModel.messages);
+}
+exports.createChatState = createChatState;
+function createChatStateForMessages(messages) {
+    // TODO: Add better types for messages
+    return {
+        version: exports.CURRENT_CHAT_STATE_VERSION,
+        messages: messages,
+    };
+}
+exports.createChatStateForMessages = createChatStateForMessages;
+function chatStateFromModel(chatModel) {
+    return createChatStateForMessages(chatModel.messages);
+}
+exports.chatStateFromModel = chatStateFromModel;
+function loadChatState() {
+    if (!fs_1.default.existsSync(exports.CHAT_STATE_PATH)) {
+        return undefined;
+    }
+    const raw = fs_1.default.readFileSync(exports.CHAT_STATE_PATH, "utf8");
+    const state = JSON.parse(raw);
+    if (state.version !== exports.CURRENT_CHAT_STATE_VERSION) {
+        throw new Error(`Unsupported chat state v${state.version}. Expected v${exports.CURRENT_CHAT_STATE_VERSION}.`);
+    }
+    return state;
+}
+exports.loadChatState = loadChatState;
+function saveToDisk(messages) {
+    const statePath = exports.CHAT_STATE_PATH;
+    let existingState = {
+        version: exports.CURRENT_CHAT_STATE_VERSION,
+        messages: [],
+    };
+    // Ensure directory exists before trying to read/write
+    const dirname = path_1.default.dirname(statePath);
+    if (!fs_1.default.existsSync(dirname)) {
+        fs_1.default.mkdirSync(dirname, { recursive: true });
+    }
+    if (fs_1.default.existsSync(statePath)) {
+        existingState = JSON.parse(fs_1.default.readFileSync(statePath, "utf8"));
+    }
+    const newState = {
+        ...existingState,
+        messages: messages,
+    };
+    fs_1.default.writeFileSync(statePath, JSON.stringify(newState, null, 2));
+}
+exports.saveToDisk = saveToDisk;

package/dist/agent/chat/types.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import { ChatStateOnDisk } from "./state";
+export type SupportedChatModels = "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
+type LatestMessage = {
+    role: string;
+    textMessage: string;
+};
+export type ReporterFunction = (state: ChatStateOnDisk<any>, latestHumanReadableMessage: LatestMessage | undefined) => Promise<void>;
+export {};
+//# sourceMappingURL=types.d.ts.map

package/dist/agent/chat/types.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1C,MAAM,MAAM,mBAAmB,GAC3B,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;AAEnC,KAAK,aAAa,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAC7B,KAAK,EAAE,eAAe,CAAC,GAAG,CAAC,EAC3B,0BAA0B,EAAE,aAAa,GAAG,SAAS,KAClD,OAAO,CAAC,IAAI,CAAC,CAAC"}

package/dist/agent/chat/types.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ "use strict";
2	+ Object.defineProperty(exports, "__esModule", { value: true });

package/dist/agent/cua/index.d.ts CHANGED Viewed

@@ -1,8 +1,10 @@
+import { TraceClient } from "@empiricalrun/llm";
 import { Page } from "playwright";
 export declare function startPlaywrightCodegen(page: Page): Promise<void>;
-export declare function createTestUsingComputerUseAgent({ page, task, }: {
+export declare function createTestUsingComputerUseAgent({ page, task, trace, }: {
     page: Page;
     task: string;
+    trace?: TraceClient;
 }): Promise<{
     code: string;
     importPaths: string[];

package/dist/agent/cua/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"~~AASA~~,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;~~AAOlC~~,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,~~GACL~~,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;~~CACd~~,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,~~CAmMD~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAS/D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAMlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CAqLD"}

package/dist/agent/cua/index.js CHANGED Viewed

@@ -5,8 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.createTestUsingComputerUseAgent = exports.startPlaywrightCodegen = void 0;
 const llm_1 = require("@empiricalrun/llm");
-const crypto_1 = __importDefault(require("crypto"));
-const logger_1 = require("../../bin/logger");
+const openai_1 = __importDefault(require("openai"));
 const utils_1 = require("../browsing/utils");
 const computer_1 = require("./computer");
 const model_1 = require("./model");
@@ -32,25 +31,18 @@ async function startPlaywrightCodegen(page) {
     await page.pause();
 }
 exports.startPlaywrightCodegen = startPlaywrightCodegen;
-async function createTestUsingComputerUseAgent({ page, task, }) {
+async function createTestUsingComputerUseAgent({ page, task, trace, }) {
     await (0, utils_1.injectPwLocatorGenerator)(page);
     const screenshotBytes = await (0, computer_1.getScreenshot)(page);
     const viewport = page.viewportSize();
     let screenWidth = viewport?.width || 1280;
     let screenHeight = viewport?.height || 720;
-    const logger = new logger_1.CustomLogger({ useReporter: false });
-    const trace = llm_1.langfuseInstance?.trace({
-        name: "computer-use-agent",
-        id: crypto_1.default.randomUUID(),
-        input: { task },
-    });
-    if (trace) {
-        const traceUrl = trace.getTraceUrl();
-        logger.log(`Starting computer use agent: ${traceUrl}`);
-    }
-    const span = trace?.span({
-        name: "initial-model-call",
-    });
+    const openAIClient = trace
+        ? (0, llm_1.observeOpenAI)(new openai_1.default(), {
+            generationName: `computer-use-agent`,
+            parent: trace,
+        })
+        : new openai_1.default();
     let response = await (0, model_1.callComputerUseModel)({
         input: [
             {
@@ -70,8 +62,8 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
         ],
         screenWidth,
         screenHeight,
+        openAIClient,
     });
-    span?.end({ output: response });
     let isTaskDone = false;
     let maxIterations = 15;
     let generatedCode = "";
@@ -80,10 +72,6 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
     while (!isTaskDone && iterationIndex < maxIterations) {
         actionsSummary.push(`\n# Agent iteration ${iterationIndex}`);
         iterationIndex++;
-        const iterationSpan = trace?.span({
-            name: `iteration-${iterationIndex}`,
-            input: { response },
-        });
         const computerCalls = response.output.filter((item) => item.type === "computer_call");
         const functionCalls = response.output.filter((item) => item.type === "function_call");
         if (computerCalls.length === 0 && functionCalls.length === 0) {
@@ -174,8 +162,8 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
             ],
             screenWidth,
             screenHeight,
+            openAIClient,
         });
-        iterationSpan?.end({ output: response });
     }
     if (!isTaskDone) {
         actionsSummary.push(`Max iteration limit hit: Task not done after ${maxIterations} iterations`);

package/dist/agent/cua/model.d.ts CHANGED Viewed

@@ -1,8 +1,10 @@
+import OpenAI from "openai";
 import { Response, ResponseInputItem } from "openai/resources/responses/responses.mjs";
-export declare function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }: {
+export declare function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, openAIClient, }: {
     input: ResponseInputItem[];
     previousResponseId?: string;
     screenWidth: number;
     screenHeight: number;
+    openAIClient: OpenAI;
 }): Promise<Response>;
 //# sourceMappingURL=model.d.ts.map

package/dist/agent/cua/model.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"~~AACA~~,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
1	+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}

package/dist/agent/cua/model.js CHANGED Viewed

@@ -1,10 +1,6 @@
 "use strict";
-var __importDefault = (this && this.__importDefault) || function (mod) {
-    return (mod && mod.__esModule) ? mod : { "default": mod };
-};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.callComputerUseModel = void 0;
-const openai_1 = __importDefault(require("openai"));
 const INSTRUCTIONS = `You will be asked to execute some actions in a browser context.
 Don't ask the user for confirmations - just execute the actions.
@@ -30,9 +26,8 @@ const pageGotoTool = {
     },
     strict: true,
 };
-async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }) {
-    const openai = new openai_1.default();
-    return await openai.responses.create({
+async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, openAIClient, }) {
+    const response = await openAIClient.responses.create({
         model: "computer-use-preview-2025-03-11",
         previous_response_id: previousResponseId,
         parallel_tool_calls: false,
@@ -53,5 +48,6 @@ async function callComputerUseModel({ input, previousResponseId, screenWidth, sc
         input,
         truncation: "auto",
     });
+    return response;
 }
 exports.callComputerUseModel = callComputerUseModel;

package/dist/bin/index.js CHANGED Viewed

@@ -35,7 +35,7 @@ function setupProcessListeners(cleanup) {
         events.forEach((event) => process.removeListener(event, cleanup));
     };
 }
-async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath) {
+async function runChatAgent({ modelInput, chatSessionId, useDiskForChatState, initialPromptPath, }) {
     const MODEL_MAPPING = {
         "claude-3-7": "claude-3-7-sonnet-20250219",
         "3-7": "claude-3-7-sonnet-20250219",
@@ -46,6 +46,16 @@ async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath)
     if (modelInput && !MODEL_MAPPING[modelInput]) {
         throw new Error(`Invalid chat model: ${modelInput}`);
     }
+    const defaultModel = "claude-3-7-sonnet-20250219";
+    const specifiedModel = modelInput && MODEL_MAPPING[modelInput];
+    if (chatSessionId) {
+        // If --chat-session-id is provided, we run the chat agent for the dashboard
+        // and not CLI (where user can input their own prompt)
+        return await (0, chat_1.runChatAgentForDashboard)({
+            chatSessionId: Number(chatSessionId),
+            selectedModel: specifiedModel || defaultModel,
+        });
+    }
     let initialPromptContent = undefined;
     if (initialPromptPath) {
         try {
@@ -56,9 +66,9 @@ async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath)
             throw new Error(`Failed to read initial prompt file at ${initialPromptPath}: ${error.message}`);
         }
     }
-    return await (0, chat_1.chatAgent)({
-        selectedModel: modelInput ? MODEL_MAPPING[modelInput] : undefined,
-        useDiskForChatState,
+    return await (0, chat_1.runChatAgentForCLI)({
+        selectedModel: specifiedModel || defaultModel,
+        useDiskForChatState: useDiskForChatState || false,
         initialPromptContent,
     });
 }
@@ -198,6 +208,7 @@ async function main() {
         .option("--file <test-file>", "File path of the test case (inside tests dir)")
         .option("--suites <suites>", "Comma separated list of describe blocks")
         .option("--use-chat", "Use chat agent (and not the workflow)")
+        .option("--chat-session-id <chat-session-id>", "Identifier for chat session (fetched from dash.empirical.run)")
         .option("--use-disk-for-chat-state", "Save and load chat state from disk")
         .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-03-25)")
         .option("--initial-prompt <path>", "Path to an initial prompt file (e.g. prompt.md)")
@@ -224,7 +235,12 @@ async function main() {
     // Download the build if repo has a download script
     await (0, test_build_1.downloadBuild)(testGenConfig.build || {});
     if (completedOptions.useChat) {
-        await runChatAgent(completedOptions.chatModel, completedOptions.useDiskForChatState, completedOptions.initialPrompt);
+        await runChatAgent({
+            chatSessionId: completedOptions.chatSessionId,
+            modelInput: completedOptions.chatModel,
+            useDiskForChatState: completedOptions.useDiskForChatState,
+            initialPromptPath: completedOptions.initialPrompt,
+        });
         return;
     }
     let agentUsed;

package/dist/bin/utils/index.d.ts CHANGED Viewed

@@ -7,6 +7,7 @@ export interface CliOptions {
     useChat?: boolean;
     useDiskForChatState?: boolean;
     initialPrompt?: string;
+    chatSessionId?: string;
     chatModel?: "claude-3-7" | "3-7" | "claude-3-5" | "3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
 }
 export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;

package/dist/bin/utils/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;CACpC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;CACpC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.53.4",
+  "version": "0.53.6",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"
@@ -10,25 +10,13 @@
   },
   "main": "dist/index.js",
   "exports": {
-    "./agent/infer-agent": {
-      "types": "./dist/agent/infer-agent/index.d.ts",
-      "default": "./dist/agent/infer-agent/index.js"
-    },
     "./agent/master/run": {
       "types": "./dist/agent/master/run.d.ts",
       "default": "./dist/agent/master/run.js"
     },
-    "./agent/master/planner": {
-      "types": "./dist/agent/master/planner.d.ts",
-      "default": "./dist/agent/master/planner.js"
-    },
-    "./agent/enrich-prompt": {
-      "types": "./dist/agent/enrich-prompt/index.d.ts",
-      "default": "./dist/agent/enrich-prompt/index.js"
-    },
-    "./types": {
-      "types": "./dist/types/index.d.ts",
-      "default": "./dist/types/index.js"
+    "./chat/state": {
+      "types": "./dist/agent/chat/state.d.ts",
+      "default": "./dist/agent/chat/state.js"
     },
     "./utils": {
       "types": "./dist/utils/index.d.ts",
@@ -68,7 +56,7 @@
     "tsx": "^4.16.2",
     "typescript": "^5.3.3",
     "zod": "^3.23.8",
-    "@empiricalrun/llm": "^0.14.3",
+    "@empiricalrun/llm": "^0.14.5",
     "@empiricalrun/r2-uploader": "^0.3.8",
     "@empiricalrun/test-run": "^0.7.6"
   },