@empiricalrun/test-gen 0.53.5 → 0.53.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/agent/chat/agent-loop.d.ts +10 -0
- package/dist/agent/chat/agent-loop.d.ts.map +1 -0
- package/dist/agent/chat/agent-loop.js +91 -0
- package/dist/agent/chat/index.d.ts +10 -5
- package/dist/agent/chat/index.d.ts.map +1 -1
- package/dist/agent/chat/index.js +108 -107
- package/dist/agent/chat/model.d.ts +4 -0
- package/dist/agent/chat/model.d.ts.map +1 -0
- package/dist/agent/chat/model.js +14 -0
- package/dist/agent/chat/state.d.ts +14 -0
- package/dist/agent/chat/state.d.ts.map +1 -0
- package/dist/agent/chat/state.js +63 -0
- package/dist/agent/chat/types.d.ts +9 -0
- package/dist/agent/chat/types.d.ts.map +1 -0
- package/dist/agent/chat/types.js +2 -0
- package/dist/bin/index.js +21 -5
- package/dist/bin/utils/index.d.ts +1 -0
- package/dist/bin/utils/index.d.ts.map +1 -1
- package/package.json +5 -17
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.53.6
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- a3a1863: refactor: split chatagent into cli runner and agent loop
|
|
8
|
+
- a32c076: feat: enabled LLM tracing for chat agent
|
|
9
|
+
- eb89698: feat: used langfuse LLM tracing for claude and gemini usage
|
|
10
|
+
- 9cc17cc: fix: import for chat state for dashboard
|
|
11
|
+
- 17fcf83: feat: chat agent fetches and reports to the dashboard
|
|
12
|
+
- 1c1fd00: feat: expose chatagent methods, starting with createChatState
|
|
13
|
+
- c4c5a32: refactor: make chatmodels stateless and elevate state to chatagent
|
|
14
|
+
- 48702e0: feat: checkout chat session branch before running chat agent
|
|
15
|
+
- Updated dependencies [eb89698]
|
|
16
|
+
- Updated dependencies [c4c5a32]
|
|
17
|
+
- @empiricalrun/llm@0.14.5
|
|
18
|
+
|
|
3
19
|
## 0.53.5
|
|
4
20
|
|
|
5
21
|
### Patch Changes
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
+
import { IChatModel } from "@empiricalrun/llm/chat";
|
|
3
|
+
import { ReporterFunction, SupportedChatModels } from "./types";
|
|
4
|
+
export declare function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }: {
|
|
5
|
+
chatModel: IChatModel<any>;
|
|
6
|
+
selectedModel: SupportedChatModels;
|
|
7
|
+
reporter: ReporterFunction;
|
|
8
|
+
trace?: TraceClient;
|
|
9
|
+
}): Promise<void>;
|
|
10
|
+
//# sourceMappingURL=agent-loop.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAEL,UAAU,EAMX,MAAM,wBAAwB,CAAC;AAYhC,OAAO,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAyChE,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBA2CA"}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.chatAgentLoop = void 0;
|
|
4
|
+
const chat_1 = require("@empiricalrun/llm/chat");
|
|
5
|
+
const picocolors_1 = require("picocolors");
|
|
6
|
+
const web_1 = require("../../bin/utils/platform/web");
|
|
7
|
+
const commit_and_create_pr_1 = require("../../tools/commit-and-create-pr");
|
|
8
|
+
const diagnosis_fetcher_1 = require("../../tools/diagnosis-fetcher");
|
|
9
|
+
const grep_1 = require("../../tools/grep");
|
|
10
|
+
const test_gen_browser_1 = require("../../tools/test-gen-browser");
|
|
11
|
+
const test_run_1 = require("../../tools/test-run");
|
|
12
|
+
const test_run_fetcher_1 = require("../../tools/test-run-fetcher");
|
|
13
|
+
const prompt_1 = require("./prompt");
|
|
14
|
+
const state_1 = require("./state");
|
|
15
|
+
function getTools(selectedModel) {
|
|
16
|
+
let tools = [
|
|
17
|
+
grep_1.grepTool,
|
|
18
|
+
test_run_1.runTestTool,
|
|
19
|
+
test_run_fetcher_1.fetchTestRunReportTool,
|
|
20
|
+
diagnosis_fetcher_1.fetchDiagnosisReportTool,
|
|
21
|
+
test_gen_browser_1.generateTestWithBrowserAgent,
|
|
22
|
+
commit_and_create_pr_1.commitAndPushChangesTool,
|
|
23
|
+
];
|
|
24
|
+
if (selectedModel.startsWith("gemini")) {
|
|
25
|
+
// Claude will have its own built-in text editor tools
|
|
26
|
+
chat_1.textEditorTools.forEach((tool) => {
|
|
27
|
+
const originalExecute = tool.execute;
|
|
28
|
+
tool.execute = (input) => originalExecute(input, web_1.validateTypescript);
|
|
29
|
+
});
|
|
30
|
+
tools.push(...chat_1.textEditorTools);
|
|
31
|
+
}
|
|
32
|
+
const toolExecutors = {
|
|
33
|
+
...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
|
|
34
|
+
};
|
|
35
|
+
if (selectedModel.startsWith("claude")) {
|
|
36
|
+
toolExecutors.str_replace_editor = (input) => (0, chat_1.strReplaceEditorExecutor)(input, web_1.validateTypescript);
|
|
37
|
+
}
|
|
38
|
+
return { tools, toolExecutors };
|
|
39
|
+
}
|
|
40
|
+
function getModelName(model) {
|
|
41
|
+
if (model.startsWith("claude"))
|
|
42
|
+
return "Claude";
|
|
43
|
+
if (model.startsWith("gemini"))
|
|
44
|
+
return "Gemini";
|
|
45
|
+
return "AI";
|
|
46
|
+
}
|
|
47
|
+
const log = (...args) => {
|
|
48
|
+
console.log((0, picocolors_1.gray)(args.join(" ")));
|
|
49
|
+
};
|
|
50
|
+
async function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }) {
|
|
51
|
+
const systemPrompt = await (0, prompt_1.buildSystemPrompt)();
|
|
52
|
+
const { tools, toolExecutors } = getTools(selectedModel);
|
|
53
|
+
while (!chatModel.askUserForInput) {
|
|
54
|
+
const toolCalls = chatModel.getPendingToolCalls();
|
|
55
|
+
if (toolCalls.length) {
|
|
56
|
+
const toolResults = [];
|
|
57
|
+
for (const call of toolCalls) {
|
|
58
|
+
const args = JSON.stringify(call.input);
|
|
59
|
+
log(`Executing tool ${call.name} with args: ${args}`);
|
|
60
|
+
const toolExecutor = toolExecutors[call.name];
|
|
61
|
+
if (!toolExecutor) {
|
|
62
|
+
throw new Error(`Tool ${call.name} not found`);
|
|
63
|
+
}
|
|
64
|
+
const callResponse = await toolExecutor(call.input);
|
|
65
|
+
if (callResponse.isError) {
|
|
66
|
+
log(`Tool ${call.name} failed: ${callResponse.result}`);
|
|
67
|
+
}
|
|
68
|
+
else {
|
|
69
|
+
log(`Tool ${call.name} completed`);
|
|
70
|
+
}
|
|
71
|
+
toolResults.push(callResponse);
|
|
72
|
+
}
|
|
73
|
+
chatModel.pushToolResultsMessage(toolCalls, toolResults);
|
|
74
|
+
}
|
|
75
|
+
log(`${getModelName(selectedModel)} is working...`);
|
|
76
|
+
const response = await chatModel.getLLMResponse({
|
|
77
|
+
systemPrompt,
|
|
78
|
+
tools: tools.map((tool) => (0, chat_1.zodToOpenAITool)(tool.schema)),
|
|
79
|
+
selectedModel,
|
|
80
|
+
trace,
|
|
81
|
+
});
|
|
82
|
+
if (!response) {
|
|
83
|
+
throw new Error("No response from LLM");
|
|
84
|
+
}
|
|
85
|
+
chatModel.pushMessage(response);
|
|
86
|
+
const latest = chatModel.getHumanReadableLatestMessage();
|
|
87
|
+
await reporter((0, state_1.chatStateFromModel)(chatModel), latest);
|
|
88
|
+
}
|
|
89
|
+
(0, chat_1.cleanupBackupFiles)(process.cwd());
|
|
90
|
+
}
|
|
91
|
+
exports.chatAgentLoop = chatAgentLoop;
|
|
@@ -1,6 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
import { SupportedChatModels } from "./types";
|
|
2
|
+
export declare function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }: {
|
|
3
|
+
selectedModel: SupportedChatModels;
|
|
4
|
+
useDiskForChatState: boolean;
|
|
5
|
+
initialPromptContent: string | undefined;
|
|
6
|
+
}): Promise<void>;
|
|
7
|
+
export declare function runChatAgentForDashboard({ chatSessionId, selectedModel, }: {
|
|
8
|
+
selectedModel: SupportedChatModels;
|
|
9
|
+
chatSessionId: number;
|
|
10
|
+
}): Promise<void>;
|
|
6
11
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AASA,OAAO,EAAoB,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAgBhE,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBAoFA;AA+BD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA8BA"}
|
package/dist/agent/chat/index.js
CHANGED
|
@@ -1,69 +1,29 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
4
|
-
const
|
|
3
|
+
exports.runChatAgentForDashboard = exports.runChatAgentForCLI = void 0;
|
|
4
|
+
const llm_1 = require("@empiricalrun/llm");
|
|
5
|
+
const child_process_1 = require("child_process");
|
|
5
6
|
const picocolors_1 = require("picocolors");
|
|
6
|
-
const web_1 = require("../../bin/utils/platform/web");
|
|
7
7
|
const human_in_the_loop_1 = require("../../human-in-the-loop");
|
|
8
|
-
const
|
|
9
|
-
const
|
|
10
|
-
const
|
|
11
|
-
const test_gen_browser_1 = require("../../tools/test-gen-browser");
|
|
12
|
-
const test_run_1 = require("../../tools/test-run");
|
|
13
|
-
const test_run_fetcher_1 = require("../../tools/test-run-fetcher");
|
|
14
|
-
const prompt_1 = require("./prompt");
|
|
15
|
-
function getTools(selectedModel) {
|
|
16
|
-
let tools = [
|
|
17
|
-
grep_1.grepTool,
|
|
18
|
-
test_run_1.runTestTool,
|
|
19
|
-
test_run_fetcher_1.fetchTestRunReportTool,
|
|
20
|
-
diagnosis_fetcher_1.fetchDiagnosisReportTool,
|
|
21
|
-
test_gen_browser_1.generateTestWithBrowserAgent,
|
|
22
|
-
commit_and_create_pr_1.commitAndPushChangesTool,
|
|
23
|
-
];
|
|
24
|
-
if (selectedModel.startsWith("gemini")) {
|
|
25
|
-
// Claude will have its own built-in text editor tools
|
|
26
|
-
chat_1.textEditorTools.forEach((tool) => {
|
|
27
|
-
const originalExecute = tool.execute;
|
|
28
|
-
tool.execute = (input) => originalExecute(input, web_1.validateTypescript);
|
|
29
|
-
});
|
|
30
|
-
tools.push(...chat_1.textEditorTools);
|
|
31
|
-
}
|
|
32
|
-
const toolExecutors = {
|
|
33
|
-
// TODO: Add validateTypescript
|
|
34
|
-
...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
|
|
35
|
-
};
|
|
36
|
-
if (selectedModel.startsWith("claude")) {
|
|
37
|
-
toolExecutors.str_replace_editor = (input) => (0, chat_1.strReplaceEditorExecutor)(input, web_1.validateTypescript);
|
|
38
|
-
}
|
|
39
|
-
return { tools, toolExecutors };
|
|
40
|
-
}
|
|
41
|
-
function createChatModel(useDiskForChatState, selectedModel) {
|
|
42
|
-
if (selectedModel.startsWith("claude")) {
|
|
43
|
-
return new chat_1.ClaudeChatModel(useDiskForChatState);
|
|
44
|
-
}
|
|
45
|
-
if (selectedModel.startsWith("gemini")) {
|
|
46
|
-
return new chat_1.GeminiChatModel(useDiskForChatState);
|
|
47
|
-
}
|
|
48
|
-
throw new Error(`Unsupported model: ${selectedModel}`);
|
|
49
|
-
}
|
|
50
|
-
function getModelName(model) {
|
|
51
|
-
if (model.startsWith("claude"))
|
|
52
|
-
return "Claude";
|
|
53
|
-
if (model.startsWith("gemini"))
|
|
54
|
-
return "Gemini";
|
|
55
|
-
return "AI";
|
|
56
|
-
}
|
|
57
|
-
function concludeAgent(usageSummary) {
|
|
58
|
-
console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
|
|
59
|
-
(0, chat_1.cleanupBackupFiles)(process.cwd());
|
|
60
|
-
}
|
|
8
|
+
const agent_loop_1 = require("./agent-loop");
|
|
9
|
+
const model_1 = require("./model");
|
|
10
|
+
const state_1 = require("./state");
|
|
61
11
|
function stopCriteria(userPrompt) {
|
|
62
12
|
return userPrompt?.toLowerCase() === "stop";
|
|
63
13
|
}
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
14
|
+
function concludeAgent(chatModel, useDiskForChatState) {
|
|
15
|
+
console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + chatModel.getUsageSummary())}`);
|
|
16
|
+
if (useDiskForChatState) {
|
|
17
|
+
(0, state_1.saveToDisk)(chatModel.messages);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }) {
|
|
21
|
+
let chatState;
|
|
22
|
+
if (useDiskForChatState) {
|
|
23
|
+
chatState = (0, state_1.loadChatState)();
|
|
24
|
+
}
|
|
25
|
+
let messagesLoadedFromDisk = chatState?.messages || [];
|
|
26
|
+
let chatModel = (0, model_1.createChatModel)(messagesLoadedFromDisk, selectedModel);
|
|
67
27
|
if (initialPromptContent && chatModel.messages.length === 0) {
|
|
68
28
|
chatModel.pushUserMessage(initialPromptContent);
|
|
69
29
|
chatModel.askUserForInput = false;
|
|
@@ -71,22 +31,37 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
|
|
|
71
31
|
else if (initialPromptContent && chatModel.messages.length > 0) {
|
|
72
32
|
console.warn(`Ignoring initial prompt because we have existing messages.`);
|
|
73
33
|
}
|
|
34
|
+
if (chatModel.askUserForInput) {
|
|
35
|
+
// Show last message to the user for context when we loaded from disk
|
|
36
|
+
const latest = chatModel.getHumanReadableLatestMessage();
|
|
37
|
+
if (latest) {
|
|
38
|
+
console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
74
41
|
const handleSigInt = () => {
|
|
75
|
-
concludeAgent(chatModel
|
|
42
|
+
concludeAgent(chatModel, useDiskForChatState);
|
|
76
43
|
process.exit(0);
|
|
77
44
|
};
|
|
78
45
|
process.once("SIGINT", handleSigInt);
|
|
79
46
|
process.once("SIGTERM", handleSigInt);
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
47
|
+
let userPrompt;
|
|
48
|
+
let reporterFunc = async (chatState, latest) => {
|
|
49
|
+
if (useDiskForChatState) {
|
|
50
|
+
(0, state_1.saveToDisk)(chatState.messages);
|
|
51
|
+
}
|
|
84
52
|
if (latest) {
|
|
85
|
-
console.log(`${latest.role}: ${latest.textMessage}`);
|
|
53
|
+
console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
|
|
86
54
|
}
|
|
55
|
+
};
|
|
56
|
+
const trace = (0, llm_1.createLangfuseTrace)({
|
|
57
|
+
name: "chat_agent",
|
|
58
|
+
input: initialPromptContent || "",
|
|
59
|
+
tags: [selectedModel, "chat_agent"],
|
|
60
|
+
});
|
|
61
|
+
if (trace) {
|
|
62
|
+
const traceUrl = trace.getTraceUrl();
|
|
63
|
+
console.log(`Starting ${selectedModel}: ${traceUrl}`);
|
|
87
64
|
}
|
|
88
|
-
const systemPrompt = await (0, prompt_1.buildSystemPrompt)();
|
|
89
|
-
const { tools, toolExecutors } = getTools(selectedModel);
|
|
90
65
|
while (!stopCriteria(userPrompt)) {
|
|
91
66
|
if (chatModel.askUserForInput) {
|
|
92
67
|
try {
|
|
@@ -97,7 +72,7 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
|
|
|
97
72
|
catch (e) {
|
|
98
73
|
// https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
|
|
99
74
|
if (e instanceof Error && e.name === "ExitPromptError") {
|
|
100
|
-
concludeAgent(chatModel
|
|
75
|
+
concludeAgent(chatModel, useDiskForChatState);
|
|
101
76
|
process.exit(0);
|
|
102
77
|
}
|
|
103
78
|
throw e;
|
|
@@ -105,47 +80,73 @@ async function chatAgent({ selectedModel = "claude-3-7-sonnet-20250219", useDisk
|
|
|
105
80
|
if (!stopCriteria(userPrompt)) {
|
|
106
81
|
chatModel.pushUserMessage(userPrompt);
|
|
107
82
|
}
|
|
108
|
-
continue;
|
|
109
83
|
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
throw new Error(`Tool ${call.name} not found`);
|
|
119
|
-
}
|
|
120
|
-
const callResponse = await toolExecutor(call.input);
|
|
121
|
-
if (callResponse.isError) {
|
|
122
|
-
ora(`Tool ${call.name} failed: ${callResponse.result}`).fail();
|
|
123
|
-
}
|
|
124
|
-
else {
|
|
125
|
-
ora(`Tool ${call.name} completed`).succeed();
|
|
126
|
-
}
|
|
127
|
-
toolResults.push(callResponse);
|
|
128
|
-
}
|
|
129
|
-
chatModel.pushToolResultsMessage(toolCalls, toolResults);
|
|
130
|
-
}
|
|
131
|
-
const spinner = ora(`${getModelName(selectedModel)} is working...`).start();
|
|
132
|
-
const response = await chatModel.getLLMResponse({
|
|
133
|
-
systemPrompt,
|
|
134
|
-
tools: tools.map((tool) => (0, chat_1.zodToOpenAITool)(tool.schema)),
|
|
135
|
-
selectedModel,
|
|
136
|
-
});
|
|
137
|
-
spinner.stop();
|
|
138
|
-
if (!response) {
|
|
139
|
-
throw new Error("No response from LLM");
|
|
140
|
-
}
|
|
141
|
-
chatModel.pushMessage(response);
|
|
142
|
-
const latest = chatModel.getHumanReadableLatestMessage();
|
|
143
|
-
if (latest) {
|
|
144
|
-
console.log(`${latest.role}: ${latest.textMessage}`);
|
|
84
|
+
else {
|
|
85
|
+
// TODO: Should we pass a loader function? That would allow us to show a spinner
|
|
86
|
+
await (0, agent_loop_1.chatAgentLoop)({
|
|
87
|
+
chatModel,
|
|
88
|
+
selectedModel,
|
|
89
|
+
reporter: reporterFunc,
|
|
90
|
+
trace,
|
|
91
|
+
});
|
|
145
92
|
}
|
|
146
93
|
}
|
|
94
|
+
trace?.update({
|
|
95
|
+
output: {
|
|
96
|
+
messages: chatModel.messages,
|
|
97
|
+
},
|
|
98
|
+
});
|
|
99
|
+
await llm_1.langfuseInstance?.flushAsync();
|
|
147
100
|
const usageSummary = chatModel.getUsageSummary();
|
|
148
|
-
|
|
149
|
-
|
|
101
|
+
console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
|
|
102
|
+
}
|
|
103
|
+
exports.runChatAgentForCLI = runChatAgentForCLI;
|
|
104
|
+
const DASHBOARD_DOMAIN = process.env.DASHBOARD_DOMAIN || "https://dash.empirical.run";
|
|
105
|
+
async function getChatSessionFromDashboard(chatSessionId) {
|
|
106
|
+
const response = await fetch(`${DASHBOARD_DOMAIN}/api/chat-sessions/${chatSessionId}`, {
|
|
107
|
+
headers: {
|
|
108
|
+
"Content-Type": "application/json",
|
|
109
|
+
Authorization: `weQPMWKT`,
|
|
110
|
+
},
|
|
111
|
+
});
|
|
112
|
+
const data = await response.json();
|
|
113
|
+
return data.data.chat_session;
|
|
114
|
+
}
|
|
115
|
+
async function checkoutBranch(branchName) {
|
|
116
|
+
// TODO: This assumes repoDir is process.cwd()
|
|
117
|
+
try {
|
|
118
|
+
(0, child_process_1.execSync)(`git checkout ${branchName}`);
|
|
119
|
+
}
|
|
120
|
+
catch (e) {
|
|
121
|
+
// If branch doesn't exist, create it
|
|
122
|
+
(0, child_process_1.execSync)(`git checkout -b ${branchName}`);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
|
|
126
|
+
const chatSession = await getChatSessionFromDashboard(chatSessionId);
|
|
127
|
+
const chatState = chatSession.chat_state;
|
|
128
|
+
const branchName = chatSession.branch_name;
|
|
129
|
+
await checkoutBranch(branchName);
|
|
130
|
+
let chatModel = (0, model_1.createChatModel)(chatState.messages, selectedModel);
|
|
131
|
+
let reporterFunc = async (chatState, latest) => {
|
|
132
|
+
const response = await fetch(`${DASHBOARD_DOMAIN}/api/chat-sessions/${chatSessionId}`, {
|
|
133
|
+
method: "PATCH",
|
|
134
|
+
body: JSON.stringify({
|
|
135
|
+
chat_state: chatState,
|
|
136
|
+
last_assistant_message: latest?.textMessage,
|
|
137
|
+
}),
|
|
138
|
+
headers: {
|
|
139
|
+
"Content-Type": "application/json",
|
|
140
|
+
Authorization: `weQPMWKT`,
|
|
141
|
+
},
|
|
142
|
+
});
|
|
143
|
+
const data = await response.json();
|
|
144
|
+
console.log(`Patch request sent for chat session: ${JSON.stringify(data)}`);
|
|
145
|
+
};
|
|
146
|
+
await (0, agent_loop_1.chatAgentLoop)({
|
|
147
|
+
chatModel,
|
|
148
|
+
selectedModel,
|
|
149
|
+
reporter: reporterFunc,
|
|
150
|
+
});
|
|
150
151
|
}
|
|
151
|
-
exports.
|
|
152
|
+
exports.runChatAgentForDashboard = runChatAgentForDashboard;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/model.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,UAAU,EACX,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE9C,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,GAAG,EAAE,EACf,aAAa,EAAE,mBAAmB,GACjC,UAAU,CAAC,GAAG,CAAC,CAQjB"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createChatModel = void 0;
|
|
4
|
+
const chat_1 = require("@empiricalrun/llm/chat");
|
|
5
|
+
function createChatModel(messages, selectedModel) {
|
|
6
|
+
if (selectedModel.startsWith("claude")) {
|
|
7
|
+
return new chat_1.ClaudeChatModel(messages);
|
|
8
|
+
}
|
|
9
|
+
if (selectedModel.startsWith("gemini")) {
|
|
10
|
+
return new chat_1.GeminiChatModel(messages);
|
|
11
|
+
}
|
|
12
|
+
throw new Error(`Unsupported model: ${selectedModel}`);
|
|
13
|
+
}
|
|
14
|
+
exports.createChatModel = createChatModel;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { IChatModel } from "@empiricalrun/llm/chat";
|
|
2
|
+
import { SupportedChatModels } from "./types";
|
|
3
|
+
export declare const CURRENT_CHAT_STATE_VERSION = "20250327.1";
|
|
4
|
+
export declare const CHAT_STATE_PATH: string;
|
|
5
|
+
export type ChatStateOnDisk<T> = {
|
|
6
|
+
version: typeof CURRENT_CHAT_STATE_VERSION;
|
|
7
|
+
messages: T[];
|
|
8
|
+
};
|
|
9
|
+
export declare function createChatState(userPrompt: string, existingState: ChatStateOnDisk<any>, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
|
|
10
|
+
export declare function createChatStateForMessages<T>(messages: any): ChatStateOnDisk<T>;
|
|
11
|
+
export declare function chatStateFromModel<T>(chatModel: IChatModel<T>): ChatStateOnDisk<unknown>;
|
|
12
|
+
export declare function loadChatState<T>(): ChatStateOnDisk<T> | undefined;
|
|
13
|
+
export declare function saveToDisk<T>(messages: Array<T>): void;
|
|
14
|
+
//# sourceMappingURL=state.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAKpD,OAAO,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE9C,eAAO,MAAM,0BAA0B,eAAe,CAAC;AAEvD,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,MAAM,MAAM,eAAe,CAAC,CAAC,IAAI;IAC/B,OAAO,EAAE,OAAO,0BAA0B,CAAC;IAC3C,QAAQ,EAAE,CAAC,EAAE,CAAC;CACf,CAAC;AAEF,wBAAgB,eAAe,CAC7B,UAAU,EAAE,MAAM,EAClB,aAAa,EAAE,eAAe,CAAC,GAAG,CAAC,EACnC,aAAa,EAAE,mBAAmB,4BAMnC;AAED,wBAAgB,0BAA0B,CAAC,CAAC,EAC1C,QAAQ,EAAE,GAAG,GACZ,eAAe,CAAC,CAAC,CAAC,CAMpB;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,4BAE7D;AAED,wBAAgB,aAAa,CAAC,CAAC,KAAK,eAAe,CAAC,CAAC,CAAC,GAAG,SAAS,CAajE;AAED,wBAAgB,UAAU,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,QAmB/C"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.saveToDisk = exports.loadChatState = exports.chatStateFromModel = exports.createChatStateForMessages = exports.createChatState = exports.CHAT_STATE_PATH = exports.CURRENT_CHAT_STATE_VERSION = void 0;
|
|
7
|
+
const fs_1 = __importDefault(require("fs"));
|
|
8
|
+
const path_1 = __importDefault(require("path"));
|
|
9
|
+
const model_1 = require("./model");
|
|
10
|
+
exports.CURRENT_CHAT_STATE_VERSION = "20250327.1";
|
|
11
|
+
exports.CHAT_STATE_PATH = path_1.default.join(process.cwd(), ".empiricalrun", "last-chat.json");
|
|
12
|
+
function createChatState(userPrompt, existingState, selectedModel) {
|
|
13
|
+
const messages = existingState.messages || [];
|
|
14
|
+
const chatModel = (0, model_1.createChatModel)(messages, selectedModel);
|
|
15
|
+
chatModel.pushUserMessage(userPrompt);
|
|
16
|
+
return createChatStateForMessages(chatModel.messages);
|
|
17
|
+
}
|
|
18
|
+
exports.createChatState = createChatState;
|
|
19
|
+
function createChatStateForMessages(messages) {
|
|
20
|
+
// TODO: Add better types for messages
|
|
21
|
+
return {
|
|
22
|
+
version: exports.CURRENT_CHAT_STATE_VERSION,
|
|
23
|
+
messages: messages,
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
exports.createChatStateForMessages = createChatStateForMessages;
|
|
27
|
+
function chatStateFromModel(chatModel) {
|
|
28
|
+
return createChatStateForMessages(chatModel.messages);
|
|
29
|
+
}
|
|
30
|
+
exports.chatStateFromModel = chatStateFromModel;
|
|
31
|
+
function loadChatState() {
|
|
32
|
+
if (!fs_1.default.existsSync(exports.CHAT_STATE_PATH)) {
|
|
33
|
+
return undefined;
|
|
34
|
+
}
|
|
35
|
+
const raw = fs_1.default.readFileSync(exports.CHAT_STATE_PATH, "utf8");
|
|
36
|
+
const state = JSON.parse(raw);
|
|
37
|
+
if (state.version !== exports.CURRENT_CHAT_STATE_VERSION) {
|
|
38
|
+
throw new Error(`Unsupported chat state v${state.version}. Expected v${exports.CURRENT_CHAT_STATE_VERSION}.`);
|
|
39
|
+
}
|
|
40
|
+
return state;
|
|
41
|
+
}
|
|
42
|
+
exports.loadChatState = loadChatState;
|
|
43
|
+
function saveToDisk(messages) {
|
|
44
|
+
const statePath = exports.CHAT_STATE_PATH;
|
|
45
|
+
let existingState = {
|
|
46
|
+
version: exports.CURRENT_CHAT_STATE_VERSION,
|
|
47
|
+
messages: [],
|
|
48
|
+
};
|
|
49
|
+
// Ensure directory exists before trying to read/write
|
|
50
|
+
const dirname = path_1.default.dirname(statePath);
|
|
51
|
+
if (!fs_1.default.existsSync(dirname)) {
|
|
52
|
+
fs_1.default.mkdirSync(dirname, { recursive: true });
|
|
53
|
+
}
|
|
54
|
+
if (fs_1.default.existsSync(statePath)) {
|
|
55
|
+
existingState = JSON.parse(fs_1.default.readFileSync(statePath, "utf8"));
|
|
56
|
+
}
|
|
57
|
+
const newState = {
|
|
58
|
+
...existingState,
|
|
59
|
+
messages: messages,
|
|
60
|
+
};
|
|
61
|
+
fs_1.default.writeFileSync(statePath, JSON.stringify(newState, null, 2));
|
|
62
|
+
}
|
|
63
|
+
exports.saveToDisk = saveToDisk;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { ChatStateOnDisk } from "./state";
|
|
2
|
+
export type SupportedChatModels = "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
|
|
3
|
+
type LatestMessage = {
|
|
4
|
+
role: string;
|
|
5
|
+
textMessage: string;
|
|
6
|
+
};
|
|
7
|
+
export type ReporterFunction = (state: ChatStateOnDisk<any>, latestHumanReadableMessage: LatestMessage | undefined) => Promise<void>;
|
|
8
|
+
export {};
|
|
9
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1C,MAAM,MAAM,mBAAmB,GAC3B,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;AAEnC,KAAK,aAAa,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAC7B,KAAK,EAAE,eAAe,CAAC,GAAG,CAAC,EAC3B,0BAA0B,EAAE,aAAa,GAAG,SAAS,KAClD,OAAO,CAAC,IAAI,CAAC,CAAC"}
|
package/dist/bin/index.js
CHANGED
|
@@ -35,7 +35,7 @@ function setupProcessListeners(cleanup) {
|
|
|
35
35
|
events.forEach((event) => process.removeListener(event, cleanup));
|
|
36
36
|
};
|
|
37
37
|
}
|
|
38
|
-
async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath) {
|
|
38
|
+
async function runChatAgent({ modelInput, chatSessionId, useDiskForChatState, initialPromptPath, }) {
|
|
39
39
|
const MODEL_MAPPING = {
|
|
40
40
|
"claude-3-7": "claude-3-7-sonnet-20250219",
|
|
41
41
|
"3-7": "claude-3-7-sonnet-20250219",
|
|
@@ -46,6 +46,16 @@ async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath)
|
|
|
46
46
|
if (modelInput && !MODEL_MAPPING[modelInput]) {
|
|
47
47
|
throw new Error(`Invalid chat model: ${modelInput}`);
|
|
48
48
|
}
|
|
49
|
+
const defaultModel = "claude-3-7-sonnet-20250219";
|
|
50
|
+
const specifiedModel = modelInput && MODEL_MAPPING[modelInput];
|
|
51
|
+
if (chatSessionId) {
|
|
52
|
+
// If --chat-session-id is provided, we run the chat agent for the dashboard
|
|
53
|
+
// and not CLI (where user can input their own prompt)
|
|
54
|
+
return await (0, chat_1.runChatAgentForDashboard)({
|
|
55
|
+
chatSessionId: Number(chatSessionId),
|
|
56
|
+
selectedModel: specifiedModel || defaultModel,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
49
59
|
let initialPromptContent = undefined;
|
|
50
60
|
if (initialPromptPath) {
|
|
51
61
|
try {
|
|
@@ -56,9 +66,9 @@ async function runChatAgent(modelInput, useDiskForChatState, initialPromptPath)
|
|
|
56
66
|
throw new Error(`Failed to read initial prompt file at ${initialPromptPath}: ${error.message}`);
|
|
57
67
|
}
|
|
58
68
|
}
|
|
59
|
-
return await (0, chat_1.
|
|
60
|
-
selectedModel:
|
|
61
|
-
useDiskForChatState,
|
|
69
|
+
return await (0, chat_1.runChatAgentForCLI)({
|
|
70
|
+
selectedModel: specifiedModel || defaultModel,
|
|
71
|
+
useDiskForChatState: useDiskForChatState || false,
|
|
62
72
|
initialPromptContent,
|
|
63
73
|
});
|
|
64
74
|
}
|
|
@@ -198,6 +208,7 @@ async function main() {
|
|
|
198
208
|
.option("--file <test-file>", "File path of the test case (inside tests dir)")
|
|
199
209
|
.option("--suites <suites>", "Comma separated list of describe blocks")
|
|
200
210
|
.option("--use-chat", "Use chat agent (and not the workflow)")
|
|
211
|
+
.option("--chat-session-id <chat-session-id>", "Identifier for chat session (fetched from dash.empirical.run)")
|
|
201
212
|
.option("--use-disk-for-chat-state", "Save and load chat state from disk")
|
|
202
213
|
.option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-03-25)")
|
|
203
214
|
.option("--initial-prompt <path>", "Path to an initial prompt file (e.g. prompt.md)")
|
|
@@ -224,7 +235,12 @@ async function main() {
|
|
|
224
235
|
// Download the build if repo has a download script
|
|
225
236
|
await (0, test_build_1.downloadBuild)(testGenConfig.build || {});
|
|
226
237
|
if (completedOptions.useChat) {
|
|
227
|
-
await runChatAgent(
|
|
238
|
+
await runChatAgent({
|
|
239
|
+
chatSessionId: completedOptions.chatSessionId,
|
|
240
|
+
modelInput: completedOptions.chatModel,
|
|
241
|
+
useDiskForChatState: completedOptions.useDiskForChatState,
|
|
242
|
+
initialPromptPath: completedOptions.initialPrompt,
|
|
243
|
+
});
|
|
228
244
|
return;
|
|
229
245
|
}
|
|
230
246
|
let agentUsed;
|
|
@@ -7,6 +7,7 @@ export interface CliOptions {
|
|
|
7
7
|
useChat?: boolean;
|
|
8
8
|
useDiskForChatState?: boolean;
|
|
9
9
|
initialPrompt?: string;
|
|
10
|
+
chatSessionId?: string;
|
|
10
11
|
chatModel?: "claude-3-7" | "3-7" | "claude-3-5" | "3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25";
|
|
11
12
|
}
|
|
12
13
|
export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;CACpC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;CACpC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.53.
|
|
3
|
+
"version": "0.53.6",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -10,25 +10,13 @@
|
|
|
10
10
|
},
|
|
11
11
|
"main": "dist/index.js",
|
|
12
12
|
"exports": {
|
|
13
|
-
"./agent/infer-agent": {
|
|
14
|
-
"types": "./dist/agent/infer-agent/index.d.ts",
|
|
15
|
-
"default": "./dist/agent/infer-agent/index.js"
|
|
16
|
-
},
|
|
17
13
|
"./agent/master/run": {
|
|
18
14
|
"types": "./dist/agent/master/run.d.ts",
|
|
19
15
|
"default": "./dist/agent/master/run.js"
|
|
20
16
|
},
|
|
21
|
-
"./
|
|
22
|
-
"types": "./dist/agent/
|
|
23
|
-
"default": "./dist/agent/
|
|
24
|
-
},
|
|
25
|
-
"./agent/enrich-prompt": {
|
|
26
|
-
"types": "./dist/agent/enrich-prompt/index.d.ts",
|
|
27
|
-
"default": "./dist/agent/enrich-prompt/index.js"
|
|
28
|
-
},
|
|
29
|
-
"./types": {
|
|
30
|
-
"types": "./dist/types/index.d.ts",
|
|
31
|
-
"default": "./dist/types/index.js"
|
|
17
|
+
"./chat/state": {
|
|
18
|
+
"types": "./dist/agent/chat/state.d.ts",
|
|
19
|
+
"default": "./dist/agent/chat/state.js"
|
|
32
20
|
},
|
|
33
21
|
"./utils": {
|
|
34
22
|
"types": "./dist/utils/index.d.ts",
|
|
@@ -68,7 +56,7 @@
|
|
|
68
56
|
"tsx": "^4.16.2",
|
|
69
57
|
"typescript": "^5.3.3",
|
|
70
58
|
"zod": "^3.23.8",
|
|
71
|
-
"@empiricalrun/llm": "^0.14.
|
|
59
|
+
"@empiricalrun/llm": "^0.14.5",
|
|
72
60
|
"@empiricalrun/r2-uploader": "^0.3.8",
|
|
73
61
|
"@empiricalrun/test-run": "^0.7.6"
|
|
74
62
|
},
|