@empiricalrun/test-gen 0.53.5 → 0.53.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.53.7
4
+
5
+ ### Patch Changes
6
+
7
+ - 4f14f11: fix: handle git remote urls that have access tokens in them
8
+
9
+ ## 0.53.6
10
+
11
+ ### Patch Changes
12
+
13
+ - a3a1863: refactor: split chatagent into cli runner and agent loop
14
+ - a32c076: feat: enabled LLM tracing for chat agent
15
+ - eb89698: feat: used langfuse LLM tracing for claude and gemini usage
16
+ - 9cc17cc: fix: import for chat state for dashboard
17
+ - 17fcf83: feat: chat agent fetches and reports to the dashboard
18
+ - 1c1fd00: feat: expose chatagent methods, starting with createChatState
19
+ - c4c5a32: refactor: make chatmodels stateless and elevate state to chatagent
20
+ - 48702e0: feat: checkout chat session branch before running chat agent
21
+ - Updated dependencies [eb89698]
22
+ - Updated dependencies [c4c5a32]
23
+ - @empiricalrun/llm@0.14.5
24
+
3
25
  ## 0.53.5
4
26
 
5
27
  ### Patch Changes
@@ -0,0 +1,10 @@
1
+ import { TraceClient } from "@empiricalrun/llm";
2
+ import { IChatModel } from "@empiricalrun/llm/chat";
3
+ import { ReporterFunction, SupportedChatModels } from "./types";
4
+ export declare function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }: {
5
+ chatModel: IChatModel<any>;
6
+ selectedModel: SupportedChatModels;
7
+ reporter: ReporterFunction;
8
+ trace?: TraceClient;
9
+ }): Promise<void>;
10
+ //# sourceMappingURL=agent-loop.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAEL,UAAU,EAMX,MAAM,wBAAwB,CAAC;AAYhC,OAAO,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAyChE,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBA2CA"}
@@ -0,0 +1,91 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.chatAgentLoop = void 0;
4
+ const chat_1 = require("@empiricalrun/llm/chat");
5
+ const picocolors_1 = require("picocolors");
6
+ const web_1 = require("../../bin/utils/platform/web");
7
+ const commit_and_create_pr_1 = require("../../tools/commit-and-create-pr");
8
+ const diagnosis_fetcher_1 = require("../../tools/diagnosis-fetcher");
9
+ const grep_1 = require("../../tools/grep");
10
+ const test_gen_browser_1 = require("../../tools/test-gen-browser");
11
+ const test_run_1 = require("../../tools/test-run");
12
+ const test_run_fetcher_1 = require("../../tools/test-run-fetcher");
13
+ const prompt_1 = require("./prompt");
14
+ const state_1 = require("./state");
15
+ function getTools(selectedModel) {
16
+ let tools = [
17
+ grep_1.grepTool,
18
+ test_run_1.runTestTool,
19
+ test_run_fetcher_1.fetchTestRunReportTool,
20
+ diagnosis_fetcher_1.fetchDiagnosisReportTool,
21
+ test_gen_browser_1.generateTestWithBrowserAgent,
22
+ commit_and_create_pr_1.commitAndPushChangesTool,
23
+ ];
24
+ if (selectedModel.startsWith("gemini")) {
25
+ // Claude will have its own built-in text editor tools
26
+ chat_1.textEditorTools.forEach((tool) => {
27
+ const originalExecute = tool.execute;
28
+ tool.execute = (input) => originalExecute(input, web_1.validateTypescript);
29
+ });
30
+ tools.push(...chat_1.textEditorTools);
31
+ }
32
+ const toolExecutors = {
33
+ ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
34
+ };
35
+ if (selectedModel.startsWith("claude")) {
36
+ toolExecutors.str_replace_editor = (input) => (0, chat_1.strReplaceEditorExecutor)(input, web_1.validateTypescript);
37
+ }
38
+ return { tools, toolExecutors };
39
+ }
40
+ function getModelName(model) {
41
+ if (model.startsWith("claude"))
42
+ return "Claude";
43
+ if (model.startsWith("gemini"))
44
+ return "Gemini";
45
+ return "AI";
46
+ }
47
+ const log = (...args) => {
48
+ console.log((0, picocolors_1.gray)(args.join(" ")));
49
+ };
50
+ async function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }) {
51
+ const systemPrompt = await (0, prompt_1.buildSystemPrompt)();
52
+ const { tools, toolExecutors } = getTools(selectedModel);
53
+ while (!chatModel.askUserForInput) {
54
+ const toolCalls = chatModel.getPendingToolCalls();
55
+ if (toolCalls.length) {
56
+ const toolResults = [];
57
+ for (const call of toolCalls) {
58
+ const args = JSON.stringify(call.input);
59
+ log(`Executing tool ${call.name} with args: ${args}`);
60
+ const toolExecutor = toolExecutors[call.name];
61
+ if (!toolExecutor) {
62
+ throw new Error(`Tool ${call.name} not found`);
63
+ }
64
+ const callResponse = await toolExecutor(call.input);
65
+ if (callResponse.isError) {
66
+ log(`Tool ${call.name} failed: ${callResponse.result}`);
67
+ }
68
+ else {
69
+ log(`Tool ${call.name} completed`);
70
+ }
71
+ toolResults.push(callResponse);
72
+ }
73
+ chatModel.pushToolResultsMessage(toolCalls, toolResults);
74
+ }
75
+ log(`${getModelName(selectedModel)} is working...`);
76
+ const response = await chatModel.getLLMResponse({
77
+ systemPrompt,
78
+ tools: tools.map((tool) => (0, chat_1.zodToOpenAITool)(tool.schema)),
79
+ selectedModel,
80
+ trace,
81
+ });
82
+ if (!response) {
83
+ throw new Error("No response from LLM");
84
+ }
85
+ chatModel.pushMessage(response);
86
+ const latest = chatModel.getHumanReadableLatestMessage();
87
+ await reporter((0, state_1.chatStateFromModel)(chatModel), latest);
88
+ }
89
+ (0, chat_1.cleanupBackupFiles)(process.cwd());
90
+ }
91
+ exports.chatAgentLoop = chatAgentLoop;
@@ -1,6 +1,11 @@
1
- export declare function chatAgent({ selectedModel, useDiskForChatState, initialPromptContent, }: {
2
- selectedModel?: "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
3
- useDiskForChatState?: boolean;
4
- initialPromptContent?: string;
5
- }): Promise<string>;
1
+ import { SupportedChatModels } from "./types";
2
+ export declare function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }: {
3
+ selectedModel: SupportedChatModels;
4
+ useDiskForChatState: boolean;
5
+ initialPromptContent: string | undefined;
6
+ }): Promise<void>;
7
+ export declare function runChatAgentForDashboard({ chatSessionId, selectedModel, }: {
8
+ selectedModel: SupportedChatModels;
9
+ chatSessionId: number;
10
+ }): Promise<void>;
6
11
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AA2FA,wBAAsB,SAAS,CAAC,EAC9B,aAA4C,EAC5C,mBAA2B,EAC3B,oBAAoB,GACrB,EAAE;IACD,aAAa,CAAC,EACV,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;IACnC,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,oBAAoB,CAAC,EAAE,MAAM,CAAC;CAC/B,mBAyFA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AASA,OAAO,EAAoB,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAgBhE,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBAoFA;AA+BD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA8BA"}
@@ -1,69 +1,29 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.chatAgent = void 0;
4
- const chat_1 = require("@empiricalrun/llm/chat");
3
+ exports.runChatAgentForDashboard = exports.runChatAgentForCLI = void 0;
4
+ const llm_1 = require("@empiricalrun/llm");
5
+ const child_process_1 = require("child_process");
5
6
  const picocolors_1 = require("picocolors");
6
- const web_1 = require("../../bin/utils/platform/web");
7
7
  const human_in_the_loop_1 = require("../../human-in-the-loop");
8
- const commit_and_create_pr_1 = require("../../tools/commit-and-create-pr");
9
- const diagnosis_fetcher_1 = require("../../tools/diagnosis-fetcher");
10
- const grep_1 = require("../../tools/grep");
11
- const test_gen_browser_1 = require("../../tools/test-gen-browser");
12
- const test_run_1 = require("../../tools/test-run");
13
- const test_run_fetcher_1 = require("../../tools/test-run-fetcher");
14
- const prompt_1 = require("./prompt");
15
- function getTools(selectedModel) {
16
- let tools = [
17
- grep_1.grepTool,
18
- test_run_1.runTestTool,
19
- test_run_fetcher_1.fetchTestRunReportTool,
20
- diagnosis_fetcher_1.fetchDiagnosisReportTool,
21
- test_gen_browser_1.generateTestWithBrowserAgent,
22
- commit_and_create_pr_1.commitAndPushChangesTool,
23
- ];
24
- if (selectedModel.startsWith("gemini")) {
25
- // Claude will have its own built-in text editor tools
26
- chat_1.textEditorTools.forEach((tool) => {
27
- const originalExecute = tool.execute;
28
- tool.execute = (input) => originalExecute(input, web_1.validateTypescript);
29
- });
30
- tools.push(...chat_1.textEditorTools);
31
- }
32
- const toolExecutors = {
33
- // TODO: Add validateTypescript
34
- ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
35
- };
36
- if (selectedModel.startsWith("claude")) {
37
- toolExecutors.str_replace_editor = (input) => (0, chat_1.strReplaceEditorExecutor)(input, web_1.validateTypescript);
38
- }
39
- return { tools, toolExecutors };
40
- }
41
- function createChatModel(useDiskForChatState, selectedModel) {
42
- if (selectedModel.startsWith("claude")) {
43
- return new chat_1.ClaudeChatModel(useDiskForChatState);
44
- }
45
- if (selectedModel.startsWith("gemini")) {
46
- return new chat_1.GeminiChatModel(useDiskForChatState);
47
- }
48
- throw new Error(`Unsupported model: ${selectedModel}`);
49
- }
50
- function getModelName(model) {
51
- if (model.startsWith("claude"))
52
- return "Claude";
53
- if (model.startsWith("gemini"))
54
- return "Gemini";
55
- return "AI";
56
- }
57
- function concludeAgent(usageSummary) {
58
- console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
59
- (0, chat_1.cleanupBackupFiles)(process.cwd());
60
- }
8
+ const agent_loop_1 = require("./agent-loop");
9
+ const model_1 = require("./model");
10
+ const state_1 = require("./state");
61
11
  function stopCriteria(userPrompt) {
62
12
  return userPrompt?.toLowerCase() === "stop";
63
13
  }
64
- async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDiskForChatState = false, initialPromptContent, }) {
65
- let chatModel = createChatModel(useDiskForChatState, selectedModel);
66
- let userPrompt = undefined;
14
+ function concludeAgent(chatModel, useDiskForChatState) {
15
+ console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + chatModel.getUsageSummary())}`);
16
+ if (useDiskForChatState) {
17
+ (0, state_1.saveToDisk)(chatModel.messages);
18
+ }
19
+ }
20
+ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }) {
21
+ let chatState;
22
+ if (useDiskForChatState) {
23
+ chatState = (0, state_1.loadChatState)();
24
+ }
25
+ let messagesLoadedFromDisk = chatState?.messages || [];
26
+ let chatModel = (0, model_1.createChatModel)(messagesLoadedFromDisk, selectedModel);
67
27
  if (initialPromptContent && chatModel.messages.length === 0) {
68
28
  chatModel.pushUserMessage(initialPromptContent);
69
29
  chatModel.askUserForInput = false;
@@ -71,22 +31,37 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
71
31
  else if (initialPromptContent && chatModel.messages.length > 0) {
72
32
  console.warn(`Ignoring initial prompt because we have existing messages.`);
73
33
  }
34
+ if (chatModel.askUserForInput) {
35
+ // Show last message to the user for context when we loaded from disk
36
+ const latest = chatModel.getHumanReadableLatestMessage();
37
+ if (latest) {
38
+ console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
39
+ }
40
+ }
74
41
  const handleSigInt = () => {
75
- concludeAgent(chatModel.getUsageSummary());
42
+ concludeAgent(chatModel, useDiskForChatState);
76
43
  process.exit(0);
77
44
  };
78
45
  process.once("SIGINT", handleSigInt);
79
46
  process.once("SIGTERM", handleSigInt);
80
- const ora = (await import("ora")).default;
81
- if (chatModel.askUserForInput) {
82
- // Show last message to the user for context when we loaded from disk
83
- const latest = chatModel.getHumanReadableLatestMessage();
47
+ let userPrompt;
48
+ let reporterFunc = async (chatState, latest) => {
49
+ if (useDiskForChatState) {
50
+ (0, state_1.saveToDisk)(chatState.messages);
51
+ }
84
52
  if (latest) {
85
- console.log(`${latest.role}: ${latest.textMessage}`);
53
+ console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
86
54
  }
55
+ };
56
+ const trace = (0, llm_1.createLangfuseTrace)({
57
+ name: "chat_agent",
58
+ input: initialPromptContent || "",
59
+ tags: [selectedModel, "chat_agent"],
60
+ });
61
+ if (trace) {
62
+ const traceUrl = trace.getTraceUrl();
63
+ console.log(`Starting ${selectedModel}: ${traceUrl}`);
87
64
  }
88
- const systemPrompt = await (0, prompt_1.buildSystemPrompt)();
89
- const { tools, toolExecutors } = getTools(selectedModel);
90
65
  while (!stopCriteria(userPrompt)) {
91
66
  if (chatModel.askUserForInput) {
92
67
  try {
@@ -97,7 +72,7 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
97
72
  catch (e) {
98
73
  // https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
99
74
  if (e instanceof Error && e.name === "ExitPromptError") {
100
- concludeAgent(chatModel.getUsageSummary());
75
+ concludeAgent(chatModel, useDiskForChatState);
101
76
  process.exit(0);
102
77
  }
103
78
  throw e;
@@ -105,47 +80,73 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
105
80
  if (!stopCriteria(userPrompt)) {
106
81
  chatModel.pushUserMessage(userPrompt);
107
82
  }
108
- continue;
109
83
  }
110
- const toolCalls = chatModel.getPendingToolCalls();
111
- if (toolCalls.length) {
112
- const toolResults = [];
113
- for (const call of toolCalls) {
114
- const args = JSON.stringify(call.input);
115
- console.log(`Executing tool ${call.name} with args: ${args}`);
116
- const toolExecutor = toolExecutors[call.name];
117
- if (!toolExecutor) {
118
- throw new Error(`Tool ${call.name} not found`);
119
- }
120
- const callResponse = await toolExecutor(call.input);
121
- if (callResponse.isError) {
122
- ora(`Tool ${call.name} failed: ${callResponse.result}`).fail();
123
- }
124
- else {
125
- ora(`Tool ${call.name} completed`).succeed();
126
- }
127
- toolResults.push(callResponse);
128
- }
129
- chatModel.pushToolResultsMessage(toolCalls, toolResults);
130
- }
131
- const spinner = ora(`${getModelName(selectedModel)} is working...`).start();
132
- const response = await chatModel.getLLMResponse({
133
- systemPrompt,
134
- tools: tools.map((tool) => (0, chat_1.zodToOpenAITool)(tool.schema)),
135
- selectedModel,
136
- });
137
- spinner.stop();
138
- if (!response) {
139
- throw new Error("No response from LLM");
140
- }
141
- chatModel.pushMessage(response);
142
- const latest = chatModel.getHumanReadableLatestMessage();
143
- if (latest) {
144
- console.log(`${latest.role}: ${latest.textMessage}`);
84
+ else {
85
+ // TODO: Should we pass a loader function? That would allow us to show a spinner
86
+ await (0, agent_loop_1.chatAgentLoop)({
87
+ chatModel,
88
+ selectedModel,
89
+ reporter: reporterFunc,
90
+ trace,
91
+ });
145
92
  }
146
93
  }
94
+ trace?.update({
95
+ output: {
96
+ messages: chatModel.messages,
97
+ },
98
+ });
99
+ await llm_1.langfuseInstance?.flushAsync();
147
100
  const usageSummary = chatModel.getUsageSummary();
148
- concludeAgent(usageSummary);
149
- return usageSummary;
101
+ console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
102
+ }
103
+ exports.runChatAgentForCLI = runChatAgentForCLI;
104
+ const DASHBOARD_DOMAIN = process.env.DASHBOARD_DOMAIN || "https://dash.empirical.run";
105
+ async function getChatSessionFromDashboard(chatSessionId) {
106
+ const response = await fetch(`${DASHBOARD_DOMAIN}/api/chat-sessions/${chatSessionId}`, {
107
+ headers: {
108
+ "Content-Type": "application/json",
109
+ Authorization: `weQPMWKT`,
110
+ },
111
+ });
112
+ const data = await response.json();
113
+ return data.data.chat_session;
114
+ }
115
+ async function checkoutBranch(branchName) {
116
+ // TODO: This assumes repoDir is process.cwd()
117
+ try {
118
+ (0, child_process_1.execSync)(`git checkout ${branchName}`);
119
+ }
120
+ catch (e) {
121
+ // If branch doesn't exist, create it
122
+ (0, child_process_1.execSync)(`git checkout -b ${branchName}`);
123
+ }
124
+ }
125
+ async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
126
+ const chatSession = await getChatSessionFromDashboard(chatSessionId);
127
+ const chatState = chatSession.chat_state;
128
+ const branchName = chatSession.branch_name;
129
+ await checkoutBranch(branchName);
130
+ let chatModel = (0, model_1.createChatModel)(chatState.messages, selectedModel);
131
+ let reporterFunc = async (chatState, latest) => {
132
+ const response = await fetch(`${DASHBOARD_DOMAIN}/api/chat-sessions/${chatSessionId}`, {
133
+ method: "PATCH",
134
+ body: JSON.stringify({
135
+ chat_state: chatState,
136
+ last_assistant_message: latest?.textMessage,
137
+ }),
138
+ headers: {
139
+ "Content-Type": "application/json",
140
+ Authorization: `weQPMWKT`,
141
+ },
142
+ });
143
+ const data = await response.json();
144
+ console.log(`Patch request sent for chat session: ${JSON.stringify(data)}`);
145
+ };
146
+ await (0, agent_loop_1.chatAgentLoop)({
147
+ chatModel,
148
+ selectedModel,
149
+ reporter: reporterFunc,
150
+ });
150
151
  }
151
- exports.chatAgent = chatAgent;
152
+ exports.runChatAgentForDashboard = runChatAgentForDashboard;
@@ -0,0 +1,4 @@
1
+ import { IChatModel } from "@empiricalrun/llm/chat";
2
+ import { SupportedChatModels } from "./types";
3
+ export declare function createChatModel(messages: any[], selectedModel: SupportedChatModels): IChatModel<any>;
4
+ //# sourceMappingURL=model.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/model.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,UAAU,EACX,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE9C,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,GAAG,EAAE,EACf,aAAa,EAAE,mBAAmB,GACjC,UAAU,CAAC,GAAG,CAAC,CAQjB"}
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createChatModel = void 0;
4
+ const chat_1 = require("@empiricalrun/llm/chat");
5
+ function createChatModel(messages, selectedModel) {
6
+ if (selectedModel.startsWith("claude")) {
7
+ return new chat_1.ClaudeChatModel(messages);
8
+ }
9
+ if (selectedModel.startsWith("gemini")) {
10
+ return new chat_1.GeminiChatModel(messages);
11
+ }
12
+ throw new Error(`Unsupported model: ${selectedModel}`);
13
+ }
14
+ exports.createChatModel = createChatModel;
@@ -0,0 +1,14 @@
1
+ import { IChatModel } from "@empiricalrun/llm/chat";
2
+ import { SupportedChatModels } from "./types";
3
+ export declare const CURRENT_CHAT_STATE_VERSION = "20250327.1";
4
+ export declare const CHAT_STATE_PATH: string;
5
+ export type ChatStateOnDisk<T> = {
6
+ version: typeof CURRENT_CHAT_STATE_VERSION;
7
+ messages: T[];
8
+ };
9
+ export declare function createChatState(userPrompt: string, existingState: ChatStateOnDisk<any>, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
10
+ export declare function createChatStateForMessages<T>(messages: any): ChatStateOnDisk<T>;
11
+ export declare function chatStateFromModel<T>(chatModel: IChatModel<T>): ChatStateOnDisk<unknown>;
12
+ export declare function loadChatState<T>(): ChatStateOnDisk<T> | undefined;
13
+ export declare function saveToDisk<T>(messages: Array<T>): void;
14
+ //# sourceMappingURL=state.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAKpD,OAAO,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE9C,eAAO,MAAM,0BAA0B,eAAe,CAAC;AAEvD,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,MAAM,MAAM,eAAe,CAAC,CAAC,IAAI;IAC/B,OAAO,EAAE,OAAO,0BAA0B,CAAC;IAC3C,QAAQ,EAAE,CAAC,EAAE,CAAC;CACf,CAAC;AAEF,wBAAgB,eAAe,CAC7B,UAAU,EAAE,MAAM,EAClB,aAAa,EAAE,eAAe,CAAC,GAAG,CAAC,EACnC,aAAa,EAAE,mBAAmB,4BAMnC;AAED,wBAAgB,0BAA0B,CAAC,CAAC,EAC1C,QAAQ,EAAE,GAAG,GACZ,eAAe,CAAC,CAAC,CAAC,CAMpB;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,4BAE7D;AAED,wBAAgB,aAAa,CAAC,CAAC,KAAK,eAAe,CAAC,CAAC,CAAC,GAAG,SAAS,CAajE;AAED,wBAAgB,UAAU,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,QAmB/C"}
@@ -0,0 +1,63 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.saveToDisk = exports.loadChatState = exports.chatStateFromModel = exports.createChatStateForMessages = exports.createChatState = exports.CHAT_STATE_PATH = exports.CURRENT_CHAT_STATE_VERSION = void 0;
7
+ const fs_1 = __importDefault(require("fs"));
8
+ const path_1 = __importDefault(require("path"));
9
+ const model_1 = require("./model");
10
+ exports.CURRENT_CHAT_STATE_VERSION = "20250327.1";
11
+ exports.CHAT_STATE_PATH = path_1.default.join(process.cwd(), ".empiricalrun", "last-chat.json");
12
+ function createChatState(userPrompt, existingState, selectedModel) {
13
+ const messages = existingState.messages || [];
14
+ const chatModel = (0, model_1.createChatModel)(messages, selectedModel);
15
+ chatModel.pushUserMessage(userPrompt);
16
+ return createChatStateForMessages(chatModel.messages);
17
+ }
18
+ exports.createChatState = createChatState;
19
+ function createChatStateForMessages(messages) {
20
+ // TODO: Add better types for messages
21
+ return {
22
+ version: exports.CURRENT_CHAT_STATE_VERSION,
23
+ messages: messages,
24
+ };
25
+ }
26
+ exports.createChatStateForMessages = createChatStateForMessages;
27
+ function chatStateFromModel(chatModel) {
28
+ return createChatStateForMessages(chatModel.messages);
29
+ }
30
+ exports.chatStateFromModel = chatStateFromModel;
31
+ function loadChatState() {
32
+ if (!fs_1.default.existsSync(exports.CHAT_STATE_PATH)) {
33
+ return undefined;
34
+ }
35
+ const raw = fs_1.default.readFileSync(exports.CHAT_STATE_PATH, "utf8");
36
+ const state = JSON.parse(raw);
37
+ if (state.version !== exports.CURRENT_CHAT_STATE_VERSION) {
38
+ throw new Error(`Unsupported chat state v${state.version}. Expected v${exports.CURRENT_CHAT_STATE_VERSION}.`);
39
+ }
40
+ return state;
41
+ }
42
+ exports.loadChatState = loadChatState;
43
+ function saveToDisk(messages) {
44
+ const statePath = exports.CHAT_STATE_PATH;
45
+ let existingState = {
46
+ version: exports.CURRENT_CHAT_STATE_VERSION,
47
+ messages: [],
48
+ };
49
+ // Ensure directory exists before trying to read/write
50
+ const dirname = path_1.default.dirname(statePath);
51
+ if (!fs_1.default.existsSync(dirname)) {
52
+ fs_1.default.mkdirSync(dirname, { recursive: true });
53
+ }
54
+ if (fs_1.default.existsSync(statePath)) {
55
+ existingState = JSON.parse(fs_1.default.readFileSync(statePath, "utf8"));
56
+ }
57
+ const newState = {
58
+ ...existingState,
59
+ messages: messages,
60
+ };
61
+ fs_1.default.writeFileSync(statePath, JSON.stringify(newState, null, 2));
62
+ }
63
+ exports.saveToDisk = saveToDisk;
@@ -0,0 +1,9 @@
1
+ import { ChatStateOnDisk } from "./state";
2
+ export type SupportedChatModels = "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
3
+ type LatestMessage = {
4
+ role: string;
5
+ textMessage: string;
6
+ };
7
+ export type ReporterFunction = (state: ChatStateOnDisk<any>, latestHumanReadableMessage: LatestMessage | undefined) => Promise<void>;
8
+ export {};
9
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1C,MAAM,MAAM,mBAAmB,GAC3B,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;AAEnC,KAAK,aAAa,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAC7B,KAAK,EAAE,eAAe,CAAC,GAAG,CAAC,EAC3B,0BAA0B,EAAE,aAAa,GAAG,SAAS,KAClD,OAAO,CAAC,IAAI,CAAC,CAAC"}
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
package/dist/bin/index.js CHANGED
@@ -35,7 +35,7 @@ function setupProcessListeners(cleanup) {
35
35
  events.forEach((event) => process.removeListener(event, cleanup));
36
36
  };
37
37
  }
38
- async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath) {
38
+ async function runChatAgent({ modelInput, chatSessionId, useDiskForChatState, initialPromptPath, }) {
39
39
  const MODEL_MAPPING = {
40
40
  "claude-3-7": "claude-3-7-sonnet-20250219",
41
41
  "3-7": "claude-3-7-sonnet-20250219",
@@ -46,6 +46,16 @@ async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath)
46
46
  if (modelInput && !MODEL_MAPPING[modelInput]) {
47
47
  throw new Error(`Invalid chat model: ${modelInput}`);
48
48
  }
49
+ const defaultModel = "claude-3-7-sonnet-20250219";
50
+ const specifiedModel = modelInput && MODEL_MAPPING[modelInput];
51
+ if (chatSessionId) {
52
+ // If --chat-session-id is provided, we run the chat agent for the dashboard
53
+ // and not CLI (where user can input their own prompt)
54
+ return await (0, chat_1.runChatAgentForDashboard)({
55
+ chatSessionId: Number(chatSessionId),
56
+ selectedModel: specifiedModel || defaultModel,
57
+ });
58
+ }
49
59
  let initialPromptContent = undefined;
50
60
  if (initialPromptPath) {
51
61
  try {
@@ -56,9 +66,9 @@ async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath)
56
66
  throw new Error(`Failed to read initial prompt file at ${initialPromptPath}: ${error.message}`);
57
67
  }
58
68
  }
59
- return await (0, chat_1.chatAgent)({
60
- selectedModel: modelInput ? MODEL_MAPPING[modelInput] : undefined,
61
- useDiskForChatState,
69
+ return await (0, chat_1.runChatAgentForCLI)({
70
+ selectedModel: specifiedModel || defaultModel,
71
+ useDiskForChatState: useDiskForChatState || false,
62
72
  initialPromptContent,
63
73
  });
64
74
  }
@@ -198,6 +208,7 @@ async function main() {
198
208
  .option("--file <test-file>", "File path of the test case (inside tests dir)")
199
209
  .option("--suites <suites>", "Comma separated list of describe blocks")
200
210
  .option("--use-chat", "Use chat agent (and not the workflow)")
211
+ .option("--chat-session-id <chat-session-id>", "Identifier for chat session (fetched from dash.empirical.run)")
201
212
  .option("--use-disk-for-chat-state", "Save and load chat state from disk")
202
213
  .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-03-25)")
203
214
  .option("--initial-prompt <path>", "Path to an initial prompt file (e.g. prompt.md)")
@@ -224,7 +235,12 @@ async function main() {
224
235
  // Download the build if repo has a download script
225
236
  await (0, test_build_1.downloadBuild)(testGenConfig.build || {});
226
237
  if (completedOptions.useChat) {
227
- await runChatAgent(completedOptions.chatModel, completedOptions.useDiskForChatState, completedOptions.initialPrompt);
238
+ await runChatAgent({
239
+ chatSessionId: completedOptions.chatSessionId,
240
+ modelInput: completedOptions.chatModel,
241
+ useDiskForChatState: completedOptions.useDiskForChatState,
242
+ initialPromptPath: completedOptions.initialPrompt,
243
+ });
228
244
  return;
229
245
  }
230
246
  let agentUsed;
@@ -7,6 +7,7 @@ export interface CliOptions {
7
7
  useChat?: boolean;
8
8
  useDiskForChatState?: boolean;
9
9
  initialPrompt?: string;
10
+ chatSessionId?: string;
10
11
  chatModel?: "claude-3-7" | "3-7" | "claude-3-5" | "3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
11
12
  }
12
13
  export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;CACpC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;CACpC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
@@ -1,3 +1,9 @@
1
1
  import type { Tool } from "@empiricalrun/llm/chat";
2
+ interface GitHubRepoInfo {
3
+ owner: string;
4
+ repo: string;
5
+ }
6
+ export declare function parseGitHubUrl(url: string): GitHubRepoInfo;
2
7
  export declare const commitAndPushChangesTool: Tool;
8
+ export {};
3
9
  //# sourceMappingURL=commit-and-create-pr.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"commit-and-create-pr.d.ts","sourceRoot":"","sources":["../../src/tools/commit-and-create-pr.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAyBnD,eAAO,MAAM,wBAAwB,EAAE,IAwFtC,CAAC"}
1
+ {"version":3,"file":"commit-and-create-pr.d.ts","sourceRoot":"","sources":["../../src/tools/commit-and-create-pr.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAUnD,UAAU,cAAc;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,cAAc,CAiB1D;AAiBD,eAAO,MAAM,wBAAwB,EAAE,IAqFtC,CAAC"}
@@ -3,13 +3,28 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.commitAndPushChangesTool = void 0;
6
+ exports.commitAndPushChangesTool = exports.parseGitHubUrl = void 0;
7
7
  const child_process_1 = require("child_process");
8
8
  const crypto_1 = __importDefault(require("crypto"));
9
9
  const zod_1 = require("zod");
10
10
  const utils_1 = require("./utils");
11
11
  const GIT_USER_NAME = "empiricalrun[bot]";
12
12
  const GIT_USER_EMAIL = "180257021+empiricalrun[bot]@users.noreply.github.com";
13
+ function parseGitHubUrl(url) {
14
+ const githubIndex = url.indexOf("github.com");
15
+ if (githubIndex === -1) {
16
+ throw new Error("Invalid GitHub repository URL");
17
+ }
18
+ const [owner, repo] = url
19
+ .substring(githubIndex + "github.com/".length)
20
+ .replace(".git", "")
21
+ .split("/");
22
+ if (!owner || !repo) {
23
+ throw new Error("Invalid GitHub repository URL format - missing owner or repo");
24
+ }
25
+ return { owner, repo };
26
+ }
27
+ exports.parseGitHubUrl = parseGitHubUrl;
13
28
  const CommitAndPushChangesSchema = zod_1.z.object({
14
29
  commitMessage: zod_1.z
15
30
  .string()
@@ -57,10 +72,7 @@ Returns the URL of the created or updated pull request.`,
57
72
  const repoUrl = (0, child_process_1.execSync)("git config --get remote.origin.url")
58
73
  .toString()
59
74
  .trim();
60
- const [owner, repo] = repoUrl
61
- .replace("https://github.com/", "")
62
- .replace(".git", "")
63
- .split("/");
75
+ const { owner, repo } = parseGitHubUrl(repoUrl);
64
76
  const existingPRs = (await (0, utils_1.callGitHubProxy)({
65
77
  method: "GET",
66
78
  url: `https://api.github.com/repos/${owner}/${repo}/pulls`,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.53.5",
3
+ "version": "0.53.7",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -10,25 +10,13 @@
10
10
  },
11
11
  "main": "dist/index.js",
12
12
  "exports": {
13
- "./agent/infer-agent": {
14
- "types": "./dist/agent/infer-agent/index.d.ts",
15
- "default": "./dist/agent/infer-agent/index.js"
16
- },
17
13
  "./agent/master/run": {
18
14
  "types": "./dist/agent/master/run.d.ts",
19
15
  "default": "./dist/agent/master/run.js"
20
16
  },
21
- "./agent/master/planner": {
22
- "types": "./dist/agent/master/planner.d.ts",
23
- "default": "./dist/agent/master/planner.js"
24
- },
25
- "./agent/enrich-prompt": {
26
- "types": "./dist/agent/enrich-prompt/index.d.ts",
27
- "default": "./dist/agent/enrich-prompt/index.js"
28
- },
29
- "./types": {
30
- "types": "./dist/types/index.d.ts",
31
- "default": "./dist/types/index.js"
17
+ "./chat/state": {
18
+ "types": "./dist/agent/chat/state.d.ts",
19
+ "default": "./dist/agent/chat/state.js"
32
20
  },
33
21
  "./utils": {
34
22
  "types": "./dist/utils/index.d.ts",
@@ -68,7 +56,7 @@
68
56
  "tsx": "^4.16.2",
69
57
  "typescript": "^5.3.3",
70
58
  "zod": "^3.23.8",
71
- "@empiricalrun/llm": "^0.14.4",
59
+ "@empiricalrun/llm": "^0.14.5",
72
60
  "@empiricalrun/r2-uploader": "^0.3.8",
73
61
  "@empiricalrun/test-run": "^0.7.6"
74
62
  },