@empiricalrun/test-gen 0.53.4 → 0.53.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.53.6
4
+
5
+ ### Patch Changes
6
+
7
+ - a3a1863: refactor: split chatagent into cli runner and agent loop
8
+ - a32c076: feat: enabled LLM tracing for chat agent
9
+ - eb89698: feat: used langfuse LLM tracing for claude and gemini usage
10
+ - 9cc17cc: fix: import for chat state for dashboard
11
+ - 17fcf83: feat: chat agent fetches and reports to the dashboard
12
+ - 1c1fd00: feat: expose chatagent methods, starting with createChatState
13
+ - c4c5a32: refactor: make chatmodels stateless and elevate state to chatagent
14
+ - 48702e0: feat: checkout chat session branch before running chat agent
15
+ - Updated dependencies [eb89698]
16
+ - Updated dependencies [c4c5a32]
17
+ - @empiricalrun/llm@0.14.5
18
+
19
+ ## 0.53.5
20
+
21
+ ### Patch Changes
22
+
23
+ - 9f3cb10: feat: automated tracing for LLM call overlay dismiss
24
+ - Updated dependencies [9f3cb10]
25
+ - @empiricalrun/llm@0.14.4
26
+
3
27
  ## 0.53.4
4
28
 
5
29
  ### Patch Changes
@@ -0,0 +1,10 @@
1
+ import { TraceClient } from "@empiricalrun/llm";
2
+ import { IChatModel } from "@empiricalrun/llm/chat";
3
+ import { ReporterFunction, SupportedChatModels } from "./types";
4
+ export declare function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }: {
5
+ chatModel: IChatModel<any>;
6
+ selectedModel: SupportedChatModels;
7
+ reporter: ReporterFunction;
8
+ trace?: TraceClient;
9
+ }): Promise<void>;
10
+ //# sourceMappingURL=agent-loop.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAEL,UAAU,EAMX,MAAM,wBAAwB,CAAC;AAYhC,OAAO,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAyChE,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBA2CA"}
@@ -0,0 +1,91 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.chatAgentLoop = void 0;
4
+ const chat_1 = require("@empiricalrun/llm/chat");
5
+ const picocolors_1 = require("picocolors");
6
+ const web_1 = require("../../bin/utils/platform/web");
7
+ const commit_and_create_pr_1 = require("../../tools/commit-and-create-pr");
8
+ const diagnosis_fetcher_1 = require("../../tools/diagnosis-fetcher");
9
+ const grep_1 = require("../../tools/grep");
10
+ const test_gen_browser_1 = require("../../tools/test-gen-browser");
11
+ const test_run_1 = require("../../tools/test-run");
12
+ const test_run_fetcher_1 = require("../../tools/test-run-fetcher");
13
+ const prompt_1 = require("./prompt");
14
+ const state_1 = require("./state");
15
+ function getTools(selectedModel) {
16
+ let tools = [
17
+ grep_1.grepTool,
18
+ test_run_1.runTestTool,
19
+ test_run_fetcher_1.fetchTestRunReportTool,
20
+ diagnosis_fetcher_1.fetchDiagnosisReportTool,
21
+ test_gen_browser_1.generateTestWithBrowserAgent,
22
+ commit_and_create_pr_1.commitAndPushChangesTool,
23
+ ];
24
+ if (selectedModel.startsWith("gemini")) {
25
+ // Claude will have its own built-in text editor tools
26
+ chat_1.textEditorTools.forEach((tool) => {
27
+ const originalExecute = tool.execute;
28
+ tool.execute = (input) => originalExecute(input, web_1.validateTypescript);
29
+ });
30
+ tools.push(...chat_1.textEditorTools);
31
+ }
32
+ const toolExecutors = {
33
+ ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
34
+ };
35
+ if (selectedModel.startsWith("claude")) {
36
+ toolExecutors.str_replace_editor = (input) => (0, chat_1.strReplaceEditorExecutor)(input, web_1.validateTypescript);
37
+ }
38
+ return { tools, toolExecutors };
39
+ }
40
+ function getModelName(model) {
41
+ if (model.startsWith("claude"))
42
+ return "Claude";
43
+ if (model.startsWith("gemini"))
44
+ return "Gemini";
45
+ return "AI";
46
+ }
47
+ const log = (...args) => {
48
+ console.log((0, picocolors_1.gray)(args.join(" ")));
49
+ };
50
+ async function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }) {
51
+ const systemPrompt = await (0, prompt_1.buildSystemPrompt)();
52
+ const { tools, toolExecutors } = getTools(selectedModel);
53
+ while (!chatModel.askUserForInput) {
54
+ const toolCalls = chatModel.getPendingToolCalls();
55
+ if (toolCalls.length) {
56
+ const toolResults = [];
57
+ for (const call of toolCalls) {
58
+ const args = JSON.stringify(call.input);
59
+ log(`Executing tool ${call.name} with args: ${args}`);
60
+ const toolExecutor = toolExecutors[call.name];
61
+ if (!toolExecutor) {
62
+ throw new Error(`Tool ${call.name} not found`);
63
+ }
64
+ const callResponse = await toolExecutor(call.input);
65
+ if (callResponse.isError) {
66
+ log(`Tool ${call.name} failed: ${callResponse.result}`);
67
+ }
68
+ else {
69
+ log(`Tool ${call.name} completed`);
70
+ }
71
+ toolResults.push(callResponse);
72
+ }
73
+ chatModel.pushToolResultsMessage(toolCalls, toolResults);
74
+ }
75
+ log(`${getModelName(selectedModel)} is working...`);
76
+ const response = await chatModel.getLLMResponse({
77
+ systemPrompt,
78
+ tools: tools.map((tool) => (0, chat_1.zodToOpenAITool)(tool.schema)),
79
+ selectedModel,
80
+ trace,
81
+ });
82
+ if (!response) {
83
+ throw new Error("No response from LLM");
84
+ }
85
+ chatModel.pushMessage(response);
86
+ const latest = chatModel.getHumanReadableLatestMessage();
87
+ await reporter((0, state_1.chatStateFromModel)(chatModel), latest);
88
+ }
89
+ (0, chat_1.cleanupBackupFiles)(process.cwd());
90
+ }
91
+ exports.chatAgentLoop = chatAgentLoop;
@@ -1,6 +1,11 @@
1
- export declare function chatAgent({ selectedModel, useDiskForChatState, initialPromptContent, }: {
2
- selectedModel?: "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
3
- useDiskForChatState?: boolean;
4
- initialPromptContent?: string;
5
- }): Promise<string>;
1
+ import { SupportedChatModels } from "./types";
2
+ export declare function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }: {
3
+ selectedModel: SupportedChatModels;
4
+ useDiskForChatState: boolean;
5
+ initialPromptContent: string | undefined;
6
+ }): Promise<void>;
7
+ export declare function runChatAgentForDashboard({ chatSessionId, selectedModel, }: {
8
+ selectedModel: SupportedChatModels;
9
+ chatSessionId: number;
10
+ }): Promise<void>;
6
11
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AA2FA,wBAAsB,SAAS,CAAC,EAC9B,aAA4C,EAC5C,mBAA2B,EAC3B,oBAAoB,GACrB,EAAE;IACD,aAAa,CAAC,EACV,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;IACnC,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,oBAAoB,CAAC,EAAE,MAAM,CAAC;CAC/B,mBAyFA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AASA,OAAO,EAAoB,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAgBhE,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBAoFA;AA+BD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA8BA"}
@@ -1,69 +1,29 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.chatAgent = void 0;
4
- const chat_1 = require("@empiricalrun/llm/chat");
3
+ exports.runChatAgentForDashboard = exports.runChatAgentForCLI = void 0;
4
+ const llm_1 = require("@empiricalrun/llm");
5
+ const child_process_1 = require("child_process");
5
6
  const picocolors_1 = require("picocolors");
6
- const web_1 = require("../../bin/utils/platform/web");
7
7
  const human_in_the_loop_1 = require("../../human-in-the-loop");
8
- const commit_and_create_pr_1 = require("../../tools/commit-and-create-pr");
9
- const diagnosis_fetcher_1 = require("../../tools/diagnosis-fetcher");
10
- const grep_1 = require("../../tools/grep");
11
- const test_gen_browser_1 = require("../../tools/test-gen-browser");
12
- const test_run_1 = require("../../tools/test-run");
13
- const test_run_fetcher_1 = require("../../tools/test-run-fetcher");
14
- const prompt_1 = require("./prompt");
15
- function getTools(selectedModel) {
16
- let tools = [
17
- grep_1.grepTool,
18
- test_run_1.runTestTool,
19
- test_run_fetcher_1.fetchTestRunReportTool,
20
- diagnosis_fetcher_1.fetchDiagnosisReportTool,
21
- test_gen_browser_1.generateTestWithBrowserAgent,
22
- commit_and_create_pr_1.commitAndPushChangesTool,
23
- ];
24
- if (selectedModel.startsWith("gemini")) {
25
- // Claude will have its own built-in text editor tools
26
- chat_1.textEditorTools.forEach((tool) => {
27
- const originalExecute = tool.execute;
28
- tool.execute = (input) => originalExecute(input, web_1.validateTypescript);
29
- });
30
- tools.push(...chat_1.textEditorTools);
31
- }
32
- const toolExecutors = {
33
- // TODO: Add validateTypescript
34
- ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
35
- };
36
- if (selectedModel.startsWith("claude")) {
37
- toolExecutors.str_replace_editor = (input) => (0, chat_1.strReplaceEditorExecutor)(input, web_1.validateTypescript);
38
- }
39
- return { tools, toolExecutors };
40
- }
41
- function createChatModel(useDiskForChatState, selectedModel) {
42
- if (selectedModel.startsWith("claude")) {
43
- return new chat_1.ClaudeChatModel(useDiskForChatState);
44
- }
45
- if (selectedModel.startsWith("gemini")) {
46
- return new chat_1.GeminiChatModel(useDiskForChatState);
47
- }
48
- throw new Error(`Unsupported model: ${selectedModel}`);
49
- }
50
- function getModelName(model) {
51
- if (model.startsWith("claude"))
52
- return "Claude";
53
- if (model.startsWith("gemini"))
54
- return "Gemini";
55
- return "AI";
56
- }
57
- function concludeAgent(usageSummary) {
58
- console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
59
- (0, chat_1.cleanupBackupFiles)(process.cwd());
60
- }
8
+ const agent_loop_1 = require("./agent-loop");
9
+ const model_1 = require("./model");
10
+ const state_1 = require("./state");
61
11
  function stopCriteria(userPrompt) {
62
12
  return userPrompt?.toLowerCase() === "stop";
63
13
  }
64
- async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDiskForChatState = false, initialPromptContent, }) {
65
- let chatModel = createChatModel(useDiskForChatState, selectedModel);
66
- let userPrompt = undefined;
14
+ function concludeAgent(chatModel, useDiskForChatState) {
15
+ console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + chatModel.getUsageSummary())}`);
16
+ if (useDiskForChatState) {
17
+ (0, state_1.saveToDisk)(chatModel.messages);
18
+ }
19
+ }
20
+ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }) {
21
+ let chatState;
22
+ if (useDiskForChatState) {
23
+ chatState = (0, state_1.loadChatState)();
24
+ }
25
+ let messagesLoadedFromDisk = chatState?.messages || [];
26
+ let chatModel = (0, model_1.createChatModel)(messagesLoadedFromDisk, selectedModel);
67
27
  if (initialPromptContent && chatModel.messages.length === 0) {
68
28
  chatModel.pushUserMessage(initialPromptContent);
69
29
  chatModel.askUserForInput = false;
@@ -71,22 +31,37 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
71
31
  else if (initialPromptContent && chatModel.messages.length > 0) {
72
32
  console.warn(`Ignoring initial prompt because we have existing messages.`);
73
33
  }
34
+ if (chatModel.askUserForInput) {
35
+ // Show last message to the user for context when we loaded from disk
36
+ const latest = chatModel.getHumanReadableLatestMessage();
37
+ if (latest) {
38
+ console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
39
+ }
40
+ }
74
41
  const handleSigInt = () => {
75
- concludeAgent(chatModel.getUsageSummary());
42
+ concludeAgent(chatModel, useDiskForChatState);
76
43
  process.exit(0);
77
44
  };
78
45
  process.once("SIGINT", handleSigInt);
79
46
  process.once("SIGTERM", handleSigInt);
80
- const ora = (await import("ora")).default;
81
- if (chatModel.askUserForInput) {
82
- // Show last message to the user for context when we loaded from disk
83
- const latest = chatModel.getHumanReadableLatestMessage();
47
+ let userPrompt;
48
+ let reporterFunc = async (chatState, latest) => {
49
+ if (useDiskForChatState) {
50
+ (0, state_1.saveToDisk)(chatState.messages);
51
+ }
84
52
  if (latest) {
85
- console.log(`${latest.role}: ${latest.textMessage}`);
53
+ console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
86
54
  }
55
+ };
56
+ const trace = (0, llm_1.createLangfuseTrace)({
57
+ name: "chat_agent",
58
+ input: initialPromptContent || "",
59
+ tags: [selectedModel, "chat_agent"],
60
+ });
61
+ if (trace) {
62
+ const traceUrl = trace.getTraceUrl();
63
+ console.log(`Starting ${selectedModel}: ${traceUrl}`);
87
64
  }
88
- const systemPrompt = await (0, prompt_1.buildSystemPrompt)();
89
- const { tools, toolExecutors } = getTools(selectedModel);
90
65
  while (!stopCriteria(userPrompt)) {
91
66
  if (chatModel.askUserForInput) {
92
67
  try {
@@ -97,7 +72,7 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
97
72
  catch (e) {
98
73
  // https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
99
74
  if (e instanceof Error && e.name === "ExitPromptError") {
100
- concludeAgent(chatModel.getUsageSummary());
75
+ concludeAgent(chatModel, useDiskForChatState);
101
76
  process.exit(0);
102
77
  }
103
78
  throw e;
@@ -105,47 +80,73 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
105
80
  if (!stopCriteria(userPrompt)) {
106
81
  chatModel.pushUserMessage(userPrompt);
107
82
  }
108
- continue;
109
83
  }
110
- const toolCalls = chatModel.getPendingToolCalls();
111
- if (toolCalls.length) {
112
- const toolResults = [];
113
- for (const call of toolCalls) {
114
- const args = JSON.stringify(call.input);
115
- console.log(`Executing tool ${call.name} with args: ${args}`);
116
- const toolExecutor = toolExecutors[call.name];
117
- if (!toolExecutor) {
118
- throw new Error(`Tool ${call.name} not found`);
119
- }
120
- const callResponse = await toolExecutor(call.input);
121
- if (callResponse.isError) {
122
- ora(`Tool ${call.name} failed: ${callResponse.result}`).fail();
123
- }
124
- else {
125
- ora(`Tool ${call.name} completed`).succeed();
126
- }
127
- toolResults.push(callResponse);
128
- }
129
- chatModel.pushToolResultsMessage(toolCalls, toolResults);
130
- }
131
- const spinner = ora(`${getModelName(selectedModel)} is working...`).start();
132
- const response = await chatModel.getLLMResponse({
133
- systemPrompt,
134
- tools: tools.map((tool) => (0, chat_1.zodToOpenAITool)(tool.schema)),
135
- selectedModel,
136
- });
137
- spinner.stop();
138
- if (!response) {
139
- throw new Error("No response from LLM");
140
- }
141
- chatModel.pushMessage(response);
142
- const latest = chatModel.getHumanReadableLatestMessage();
143
- if (latest) {
144
- console.log(`${latest.role}: ${latest.textMessage}`);
84
+ else {
85
+ // TODO: Should we pass a loader function? That would allow us to show a spinner
86
+ await (0, agent_loop_1.chatAgentLoop)({
87
+ chatModel,
88
+ selectedModel,
89
+ reporter: reporterFunc,
90
+ trace,
91
+ });
145
92
  }
146
93
  }
94
+ trace?.update({
95
+ output: {
96
+ messages: chatModel.messages,
97
+ },
98
+ });
99
+ await llm_1.langfuseInstance?.flushAsync();
147
100
  const usageSummary = chatModel.getUsageSummary();
148
- concludeAgent(usageSummary);
149
- return usageSummary;
101
+ console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
102
+ }
103
+ exports.runChatAgentForCLI = runChatAgentForCLI;
104
+ const DASHBOARD_DOMAIN = process.env.DASHBOARD_DOMAIN || "https://dash.empirical.run";
105
+ async function getChatSessionFromDashboard(chatSessionId) {
106
+ const response = await fetch(`${DASHBOARD_DOMAIN}/api/chat-sessions/${chatSessionId}`, {
107
+ headers: {
108
+ "Content-Type": "application/json",
109
+ Authorization: `weQPMWKT`,
110
+ },
111
+ });
112
+ const data = await response.json();
113
+ return data.data.chat_session;
114
+ }
115
+ async function checkoutBranch(branchName) {
116
+ // TODO: This assumes repoDir is process.cwd()
117
+ try {
118
+ (0, child_process_1.execSync)(`git checkout ${branchName}`);
119
+ }
120
+ catch (e) {
121
+ // If branch doesn't exist, create it
122
+ (0, child_process_1.execSync)(`git checkout -b ${branchName}`);
123
+ }
124
+ }
125
+ async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
126
+ const chatSession = await getChatSessionFromDashboard(chatSessionId);
127
+ const chatState = chatSession.chat_state;
128
+ const branchName = chatSession.branch_name;
129
+ await checkoutBranch(branchName);
130
+ let chatModel = (0, model_1.createChatModel)(chatState.messages, selectedModel);
131
+ let reporterFunc = async (chatState, latest) => {
132
+ const response = await fetch(`${DASHBOARD_DOMAIN}/api/chat-sessions/${chatSessionId}`, {
133
+ method: "PATCH",
134
+ body: JSON.stringify({
135
+ chat_state: chatState,
136
+ last_assistant_message: latest?.textMessage,
137
+ }),
138
+ headers: {
139
+ "Content-Type": "application/json",
140
+ Authorization: `weQPMWKT`,
141
+ },
142
+ });
143
+ const data = await response.json();
144
+ console.log(`Patch request sent for chat session: ${JSON.stringify(data)}`);
145
+ };
146
+ await (0, agent_loop_1.chatAgentLoop)({
147
+ chatModel,
148
+ selectedModel,
149
+ reporter: reporterFunc,
150
+ });
150
151
  }
151
- exports.chatAgent = chatAgent;
152
+ exports.runChatAgentForDashboard = runChatAgentForDashboard;
@@ -0,0 +1,4 @@
1
+ import { IChatModel } from "@empiricalrun/llm/chat";
2
+ import { SupportedChatModels } from "./types";
3
+ export declare function createChatModel(messages: any[], selectedModel: SupportedChatModels): IChatModel<any>;
4
+ //# sourceMappingURL=model.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/model.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,UAAU,EACX,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE9C,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,GAAG,EAAE,EACf,aAAa,EAAE,mBAAmB,GACjC,UAAU,CAAC,GAAG,CAAC,CAQjB"}
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createChatModel = void 0;
4
+ const chat_1 = require("@empiricalrun/llm/chat");
5
+ function createChatModel(messages, selectedModel) {
6
+ if (selectedModel.startsWith("claude")) {
7
+ return new chat_1.ClaudeChatModel(messages);
8
+ }
9
+ if (selectedModel.startsWith("gemini")) {
10
+ return new chat_1.GeminiChatModel(messages);
11
+ }
12
+ throw new Error(`Unsupported model: ${selectedModel}`);
13
+ }
14
+ exports.createChatModel = createChatModel;
@@ -0,0 +1,14 @@
1
+ import { IChatModel } from "@empiricalrun/llm/chat";
2
+ import { SupportedChatModels } from "./types";
3
+ export declare const CURRENT_CHAT_STATE_VERSION = "20250327.1";
4
+ export declare const CHAT_STATE_PATH: string;
5
+ export type ChatStateOnDisk<T> = {
6
+ version: typeof CURRENT_CHAT_STATE_VERSION;
7
+ messages: T[];
8
+ };
9
+ export declare function createChatState(userPrompt: string, existingState: ChatStateOnDisk<any>, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
10
+ export declare function createChatStateForMessages<T>(messages: any): ChatStateOnDisk<T>;
11
+ export declare function chatStateFromModel<T>(chatModel: IChatModel<T>): ChatStateOnDisk<unknown>;
12
+ export declare function loadChatState<T>(): ChatStateOnDisk<T> | undefined;
13
+ export declare function saveToDisk<T>(messages: Array<T>): void;
14
+ //# sourceMappingURL=state.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAKpD,OAAO,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE9C,eAAO,MAAM,0BAA0B,eAAe,CAAC;AAEvD,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,MAAM,MAAM,eAAe,CAAC,CAAC,IAAI;IAC/B,OAAO,EAAE,OAAO,0BAA0B,CAAC;IAC3C,QAAQ,EAAE,CAAC,EAAE,CAAC;CACf,CAAC;AAEF,wBAAgB,eAAe,CAC7B,UAAU,EAAE,MAAM,EAClB,aAAa,EAAE,eAAe,CAAC,GAAG,CAAC,EACnC,aAAa,EAAE,mBAAmB,4BAMnC;AAED,wBAAgB,0BAA0B,CAAC,CAAC,EAC1C,QAAQ,EAAE,GAAG,GACZ,eAAe,CAAC,CAAC,CAAC,CAMpB;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,4BAE7D;AAED,wBAAgB,aAAa,CAAC,CAAC,KAAK,eAAe,CAAC,CAAC,CAAC,GAAG,SAAS,CAajE;AAED,wBAAgB,UAAU,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,QAmB/C"}
@@ -0,0 +1,63 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.saveToDisk = exports.loadChatState = exports.chatStateFromModel = exports.createChatStateForMessages = exports.createChatState = exports.CHAT_STATE_PATH = exports.CURRENT_CHAT_STATE_VERSION = void 0;
7
+ const fs_1 = __importDefault(require("fs"));
8
+ const path_1 = __importDefault(require("path"));
9
+ const model_1 = require("./model");
10
+ exports.CURRENT_CHAT_STATE_VERSION = "20250327.1";
11
+ exports.CHAT_STATE_PATH = path_1.default.join(process.cwd(), ".empiricalrun", "last-chat.json");
12
+ function createChatState(userPrompt, existingState, selectedModel) {
13
+ const messages = existingState.messages || [];
14
+ const chatModel = (0, model_1.createChatModel)(messages, selectedModel);
15
+ chatModel.pushUserMessage(userPrompt);
16
+ return createChatStateForMessages(chatModel.messages);
17
+ }
18
+ exports.createChatState = createChatState;
19
+ function createChatStateForMessages(messages) {
20
+ // TODO: Add better types for messages
21
+ return {
22
+ version: exports.CURRENT_CHAT_STATE_VERSION,
23
+ messages: messages,
24
+ };
25
+ }
26
+ exports.createChatStateForMessages = createChatStateForMessages;
27
+ function chatStateFromModel(chatModel) {
28
+ return createChatStateForMessages(chatModel.messages);
29
+ }
30
+ exports.chatStateFromModel = chatStateFromModel;
31
+ function loadChatState() {
32
+ if (!fs_1.default.existsSync(exports.CHAT_STATE_PATH)) {
33
+ return undefined;
34
+ }
35
+ const raw = fs_1.default.readFileSync(exports.CHAT_STATE_PATH, "utf8");
36
+ const state = JSON.parse(raw);
37
+ if (state.version !== exports.CURRENT_CHAT_STATE_VERSION) {
38
+ throw new Error(`Unsupported chat state v${state.version}. Expected v${exports.CURRENT_CHAT_STATE_VERSION}.`);
39
+ }
40
+ return state;
41
+ }
42
+ exports.loadChatState = loadChatState;
43
+ function saveToDisk(messages) {
44
+ const statePath = exports.CHAT_STATE_PATH;
45
+ let existingState = {
46
+ version: exports.CURRENT_CHAT_STATE_VERSION,
47
+ messages: [],
48
+ };
49
+ // Ensure directory exists before trying to read/write
50
+ const dirname = path_1.default.dirname(statePath);
51
+ if (!fs_1.default.existsSync(dirname)) {
52
+ fs_1.default.mkdirSync(dirname, { recursive: true });
53
+ }
54
+ if (fs_1.default.existsSync(statePath)) {
55
+ existingState = JSON.parse(fs_1.default.readFileSync(statePath, "utf8"));
56
+ }
57
+ const newState = {
58
+ ...existingState,
59
+ messages: messages,
60
+ };
61
+ fs_1.default.writeFileSync(statePath, JSON.stringify(newState, null, 2));
62
+ }
63
+ exports.saveToDisk = saveToDisk;
@@ -0,0 +1,9 @@
1
+ import { ChatStateOnDisk } from "./state";
2
+ export type SupportedChatModels = "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
3
+ type LatestMessage = {
4
+ role: string;
5
+ textMessage: string;
6
+ };
7
+ export type ReporterFunction = (state: ChatStateOnDisk<any>, latestHumanReadableMessage: LatestMessage | undefined) => Promise<void>;
8
+ export {};
9
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1C,MAAM,MAAM,mBAAmB,GAC3B,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;AAEnC,KAAK,aAAa,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAC7B,KAAK,EAAE,eAAe,CAAC,GAAG,CAAC,EAC3B,0BAA0B,EAAE,aAAa,GAAG,SAAS,KAClD,OAAO,CAAC,IAAI,CAAC,CAAC"}
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -1,8 +1,10 @@
1
+ import { TraceClient } from "@empiricalrun/llm";
1
2
  import { Page } from "playwright";
2
3
  export declare function startPlaywrightCodegen(page: Page): Promise<void>;
3
- export declare function createTestUsingComputerUseAgent({ page, task, }: {
4
+ export declare function createTestUsingComputerUseAgent({ page, task, trace, }: {
4
5
  page: Page;
5
6
  task: string;
7
+ trace?: TraceClient;
6
8
  }): Promise<{
7
9
  code: string;
8
10
  importPaths: string[];
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AASA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,GACL,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CAmMD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAS/D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAMlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CAqLD"}
@@ -5,8 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.createTestUsingComputerUseAgent = exports.startPlaywrightCodegen = void 0;
7
7
  const llm_1 = require("@empiricalrun/llm");
8
- const crypto_1 = __importDefault(require("crypto"));
9
- const logger_1 = require("../../bin/logger");
8
+ const openai_1 = __importDefault(require("openai"));
10
9
  const utils_1 = require("../browsing/utils");
11
10
  const computer_1 = require("./computer");
12
11
  const model_1 = require("./model");
@@ -32,25 +31,18 @@ async function startPlaywrightCodegen(page) {
32
31
  await page.pause();
33
32
  }
34
33
  exports.startPlaywrightCodegen = startPlaywrightCodegen;
35
- async function createTestUsingComputerUseAgent({ page, task, }) {
34
+ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
36
35
  await (0, utils_1.injectPwLocatorGenerator)(page);
37
36
  const screenshotBytes = await (0, computer_1.getScreenshot)(page);
38
37
  const viewport = page.viewportSize();
39
38
  let screenWidth = viewport?.width || 1280;
40
39
  let screenHeight = viewport?.height || 720;
41
- const logger = new logger_1.CustomLogger({ useReporter: false });
42
- const trace = llm_1.langfuseInstance?.trace({
43
- name: "computer-use-agent",
44
- id: crypto_1.default.randomUUID(),
45
- input: { task },
46
- });
47
- if (trace) {
48
- const traceUrl = trace.getTraceUrl();
49
- logger.log(`Starting computer use agent: ${traceUrl}`);
50
- }
51
- const span = trace?.span({
52
- name: "initial-model-call",
53
- });
40
+ const openAIClient = trace
41
+ ? (0, llm_1.observeOpenAI)(new openai_1.default(), {
42
+ generationName: `computer-use-agent`,
43
+ parent: trace,
44
+ })
45
+ : new openai_1.default();
54
46
  let response = await (0, model_1.callComputerUseModel)({
55
47
  input: [
56
48
  {
@@ -70,8 +62,8 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
70
62
  ],
71
63
  screenWidth,
72
64
  screenHeight,
65
+ openAIClient,
73
66
  });
74
- span?.end({ output: response });
75
67
  let isTaskDone = false;
76
68
  let maxIterations = 15;
77
69
  let generatedCode = "";
@@ -80,10 +72,6 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
80
72
  while (!isTaskDone && iterationIndex < maxIterations) {
81
73
  actionsSummary.push(`\n# Agent iteration ${iterationIndex}`);
82
74
  iterationIndex++;
83
- const iterationSpan = trace?.span({
84
- name: `iteration-${iterationIndex}`,
85
- input: { response },
86
- });
87
75
  const computerCalls = response.output.filter((item) => item.type === "computer_call");
88
76
  const functionCalls = response.output.filter((item) => item.type === "function_call");
89
77
  if (computerCalls.length === 0 && functionCalls.length === 0) {
@@ -174,8 +162,8 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
174
162
  ],
175
163
  screenWidth,
176
164
  screenHeight,
165
+ openAIClient,
177
166
  });
178
- iterationSpan?.end({ output: response });
179
167
  }
180
168
  if (!isTaskDone) {
181
169
  actionsSummary.push(`Max iteration limit hit: Task not done after ${maxIterations} iterations`);
@@ -1,8 +1,10 @@
1
+ import OpenAI from "openai";
1
2
  import { Response, ResponseInputItem } from "openai/resources/responses/responses.mjs";
2
- export declare function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }: {
3
+ export declare function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, openAIClient, }: {
3
4
  input: ResponseInputItem[];
4
5
  previousResponseId?: string;
5
6
  screenWidth: number;
6
7
  screenHeight: number;
8
+ openAIClient: OpenAI;
7
9
  }): Promise<Response>;
8
10
  //# sourceMappingURL=model.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
1
+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
@@ -1,10 +1,6 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
3
  exports.callComputerUseModel = void 0;
7
- const openai_1 = __importDefault(require("openai"));
8
4
  const INSTRUCTIONS = `You will be asked to execute some actions in a browser context.
9
5
  Don't ask the user for confirmations - just execute the actions.
10
6
 
@@ -30,9 +26,8 @@ const pageGotoTool = {
30
26
  },
31
27
  strict: true,
32
28
  };
33
- async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }) {
34
- const openai = new openai_1.default();
35
- return await openai.responses.create({
29
+ async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, openAIClient, }) {
30
+ const response = await openAIClient.responses.create({
36
31
  model: "computer-use-preview-2025-03-11",
37
32
  previous_response_id: previousResponseId,
38
33
  parallel_tool_calls: false,
@@ -53,5 +48,6 @@ async function callComputerUseModel({ input, previousResponseId, screenWidth, sc
53
48
  input,
54
49
  truncation: "auto",
55
50
  });
51
+ return response;
56
52
  }
57
53
  exports.callComputerUseModel = callComputerUseModel;
package/dist/bin/index.js CHANGED
@@ -35,7 +35,7 @@ function setupProcessListeners(cleanup) {
35
35
  events.forEach((event) => process.removeListener(event, cleanup));
36
36
  };
37
37
  }
38
- async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath) {
38
+ async function runChatAgent({ modelInput, chatSessionId, useDiskForChatState, initialPromptPath, }) {
39
39
  const MODEL_MAPPING = {
40
40
  "claude-3-7": "claude-3-7-sonnet-20250219",
41
41
  "3-7": "claude-3-7-sonnet-20250219",
@@ -46,6 +46,16 @@ async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath)
46
46
  if (modelInput && !MODEL_MAPPING[modelInput]) {
47
47
  throw new Error(`Invalid chat model: ${modelInput}`);
48
48
  }
49
+ const defaultModel = "claude-3-7-sonnet-20250219";
50
+ const specifiedModel = modelInput && MODEL_MAPPING[modelInput];
51
+ if (chatSessionId) {
52
+ // If --chat-session-id is provided, we run the chat agent for the dashboard
53
+ // and not CLI (where user can input their own prompt)
54
+ return await (0, chat_1.runChatAgentForDashboard)({
55
+ chatSessionId: Number(chatSessionId),
56
+ selectedModel: specifiedModel || defaultModel,
57
+ });
58
+ }
49
59
  let initialPromptContent = undefined;
50
60
  if (initialPromptPath) {
51
61
  try {
@@ -56,9 +66,9 @@ async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath)
56
66
  throw new Error(`Failed to read initial prompt file at ${initialPromptPath}: ${error.message}`);
57
67
  }
58
68
  }
59
- return await (0, chat_1.chatAgent)({
60
- selectedModel: modelInput ? MODEL_MAPPING[modelInput] : undefined,
61
- useDiskForChatState,
69
+ return await (0, chat_1.runChatAgentForCLI)({
70
+ selectedModel: specifiedModel || defaultModel,
71
+ useDiskForChatState: useDiskForChatState || false,
62
72
  initialPromptContent,
63
73
  });
64
74
  }
@@ -198,6 +208,7 @@ async function main() {
198
208
  .option("--file <test-file>", "File path of the test case (inside tests dir)")
199
209
  .option("--suites <suites>", "Comma separated list of describe blocks")
200
210
  .option("--use-chat", "Use chat agent (and not the workflow)")
211
+ .option("--chat-session-id <chat-session-id>", "Identifier for chat session (fetched from dash.empirical.run)")
201
212
  .option("--use-disk-for-chat-state", "Save and load chat state from disk")
202
213
  .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-03-25)")
203
214
  .option("--initial-prompt <path>", "Path to an initial prompt file (e.g. prompt.md)")
@@ -224,7 +235,12 @@ async function main() {
224
235
  // Download the build if repo has a download script
225
236
  await (0, test_build_1.downloadBuild)(testGenConfig.build || {});
226
237
  if (completedOptions.useChat) {
227
- await runChatAgent(completedOptions.chatModel, completedOptions.useDiskForChatState, completedOptions.initialPrompt);
238
+ await runChatAgent({
239
+ chatSessionId: completedOptions.chatSessionId,
240
+ modelInput: completedOptions.chatModel,
241
+ useDiskForChatState: completedOptions.useDiskForChatState,
242
+ initialPromptPath: completedOptions.initialPrompt,
243
+ });
228
244
  return;
229
245
  }
230
246
  let agentUsed;
@@ -7,6 +7,7 @@ export interface CliOptions {
7
7
  useChat?: boolean;
8
8
  useDiskForChatState?: boolean;
9
9
  initialPrompt?: string;
10
+ chatSessionId?: string;
10
11
  chatModel?: "claude-3-7" | "3-7" | "claude-3-5" | "3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
11
12
  }
12
13
  export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;CACpC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;CACpC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.53.4",
3
+ "version": "0.53.6",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -10,25 +10,13 @@
10
10
  },
11
11
  "main": "dist/index.js",
12
12
  "exports": {
13
- "./agent/infer-agent": {
14
- "types": "./dist/agent/infer-agent/index.d.ts",
15
- "default": "./dist/agent/infer-agent/index.js"
16
- },
17
13
  "./agent/master/run": {
18
14
  "types": "./dist/agent/master/run.d.ts",
19
15
  "default": "./dist/agent/master/run.js"
20
16
  },
21
- "./agent/master/planner": {
22
- "types": "./dist/agent/master/planner.d.ts",
23
- "default": "./dist/agent/master/planner.js"
24
- },
25
- "./agent/enrich-prompt": {
26
- "types": "./dist/agent/enrich-prompt/index.d.ts",
27
- "default": "./dist/agent/enrich-prompt/index.js"
28
- },
29
- "./types": {
30
- "types": "./dist/types/index.d.ts",
31
- "default": "./dist/types/index.js"
17
+ "./chat/state": {
18
+ "types": "./dist/agent/chat/state.d.ts",
19
+ "default": "./dist/agent/chat/state.js"
32
20
  },
33
21
  "./utils": {
34
22
  "types": "./dist/utils/index.d.ts",
@@ -68,7 +56,7 @@
68
56
  "tsx": "^4.16.2",
69
57
  "typescript": "^5.3.3",
70
58
  "zod": "^3.23.8",
71
- "@empiricalrun/llm": "^0.14.3",
59
+ "@empiricalrun/llm": "^0.14.5",
72
60
  "@empiricalrun/r2-uploader": "^0.3.8",
73
61
  "@empiricalrun/test-run": "^0.7.6"
74
62
  },