@empiricalrun/test-gen 0.58.0 → 0.60.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +73 -0
- package/dist/agent/browsing/run.d.ts +9 -2
- package/dist/agent/browsing/run.d.ts.map +1 -1
- package/dist/agent/browsing/run.js +30 -30
- package/dist/agent/browsing/utils.d.ts +1 -14
- package/dist/agent/browsing/utils.d.ts.map +1 -1
- package/dist/agent/browsing/utils.js +1 -58
- package/dist/agent/chat/agent-loop.d.ts +2 -1
- package/dist/agent/chat/agent-loop.d.ts.map +1 -1
- package/dist/agent/chat/agent-loop.js +42 -34
- package/dist/agent/chat/exports.d.ts +5 -6
- package/dist/agent/chat/exports.d.ts.map +1 -1
- package/dist/agent/chat/exports.js +13 -42
- package/dist/agent/chat/index.d.ts +2 -1
- package/dist/agent/chat/index.d.ts.map +1 -1
- package/dist/agent/chat/index.js +23 -8
- package/dist/agent/chat/models.d.ts +6 -0
- package/dist/agent/chat/models.d.ts.map +1 -0
- package/dist/agent/chat/models.js +37 -0
- package/dist/agent/chat/prompt.d.ts.map +1 -1
- package/dist/agent/chat/prompt.js +37 -8
- package/dist/agent/chat/state.d.ts +31 -10
- package/dist/agent/chat/state.d.ts.map +1 -1
- package/dist/agent/chat/state.js +132 -27
- package/dist/agent/chat/types.d.ts +2 -3
- package/dist/agent/chat/types.d.ts.map +1 -1
- package/dist/agent/chat/utils.d.ts +14 -0
- package/dist/agent/chat/utils.d.ts.map +1 -0
- package/dist/agent/chat/utils.js +50 -0
- package/dist/agent/master/browser-tests/index.spec.js +6 -6
- package/dist/bin/index.js +12 -2
- package/dist/bin/utils/index.d.ts +1 -0
- package/dist/bin/utils/index.d.ts.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -0
- package/dist/test-build/index.js +1 -1
- package/dist/tool-call-service/index.d.ts +2 -1
- package/dist/tool-call-service/index.d.ts.map +1 -1
- package/dist/tool-call-service/index.js +51 -71
- package/dist/tool-call-service/utils.d.ts +10 -0
- package/dist/tool-call-service/utils.d.ts.map +1 -0
- package/dist/tool-call-service/utils.js +23 -0
- package/dist/tools/download-build.d.ts +9 -0
- package/dist/tools/download-build.d.ts.map +1 -1
- package/dist/tools/download-build.js +5 -4
- package/dist/tools/str_replace_editor.d.ts.map +1 -1
- package/dist/tools/str_replace_editor.js +24 -7
- package/dist/tools/test-gen-browser.d.ts.map +1 -1
- package/dist/tools/test-gen-browser.js +26 -19
- package/dist/tools/test-run.d.ts.map +1 -1
- package/dist/tools/test-run.js +8 -13
- package/dist/utils/checkpoint.d.ts.map +1 -1
- package/dist/utils/checkpoint.js +3 -1
- package/dist/utils/exec.d.ts +2 -2
- package/dist/utils/exec.d.ts.map +1 -1
- package/dist/utils/exec.js +5 -4
- package/package.json +5 -4
- package/tsconfig.tsbuildinfo +1 -1
package/dist/agent/chat/index.js
CHANGED
|
@@ -11,16 +11,17 @@ const file_tree_1 = require("../../utils/file-tree");
|
|
|
11
11
|
const git_1 = require("../../utils/git");
|
|
12
12
|
const agent_loop_1 = require("./agent-loop");
|
|
13
13
|
const state_1 = require("./state");
|
|
14
|
+
const DASHBOARD_DOMAIN = process.env.DASHBOARD_DOMAIN || "https://dash.empirical.run";
|
|
14
15
|
function stopCriteria(userPrompt) {
|
|
15
16
|
return userPrompt?.toLowerCase() === "stop";
|
|
16
17
|
}
|
|
17
|
-
function concludeAgent(chatModel, useDiskForChatState, selectedModel) {
|
|
18
|
+
function concludeAgent(chatModel, useDiskForChatState, selectedModel, error) {
|
|
18
19
|
console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + chatModel.getUsageSummary())}`);
|
|
19
20
|
if (useDiskForChatState) {
|
|
20
|
-
(0, state_1.saveToDisk)(chatModel.messages, selectedModel);
|
|
21
|
+
(0, state_1.saveToDisk)(chatModel.messages, selectedModel, chatModel.askUserForInput, error);
|
|
21
22
|
}
|
|
22
23
|
}
|
|
23
|
-
async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }) {
|
|
24
|
+
async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, withRetry, }) {
|
|
24
25
|
let chatState;
|
|
25
26
|
if (useDiskForChatState) {
|
|
26
27
|
chatState = (0, state_1.loadChatState)();
|
|
@@ -45,8 +46,12 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
45
46
|
console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
|
|
46
47
|
}
|
|
47
48
|
}
|
|
49
|
+
// if withRetry set the chatState error null
|
|
50
|
+
if (withRetry && chatState) {
|
|
51
|
+
chatState.error = null;
|
|
52
|
+
}
|
|
48
53
|
const handleSigInt = () => {
|
|
49
|
-
concludeAgent(chatModel, useDiskForChatState, selectedModel);
|
|
54
|
+
concludeAgent(chatModel, useDiskForChatState, selectedModel, null);
|
|
50
55
|
process.exit(0);
|
|
51
56
|
};
|
|
52
57
|
process.once("SIGINT", handleSigInt);
|
|
@@ -54,7 +59,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
54
59
|
let userPrompt;
|
|
55
60
|
let reporterFunc = async (chatState, latest) => {
|
|
56
61
|
if (useDiskForChatState) {
|
|
57
|
-
(0, state_1.saveToDisk)(chatState.messages, selectedModel);
|
|
62
|
+
(0, state_1.saveToDisk)(chatState.messages, selectedModel, chatState.askUserForInput, chatState.error);
|
|
58
63
|
}
|
|
59
64
|
if (latest) {
|
|
60
65
|
console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
|
|
@@ -79,9 +84,14 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
79
84
|
catch (e) {
|
|
80
85
|
// https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
|
|
81
86
|
if (e instanceof Error && e.name === "ExitPromptError") {
|
|
82
|
-
concludeAgent(chatModel, useDiskForChatState, selectedModel);
|
|
87
|
+
concludeAgent(chatModel, useDiskForChatState, selectedModel, null);
|
|
83
88
|
process.exit(0);
|
|
84
89
|
}
|
|
90
|
+
concludeAgent(chatModel, useDiskForChatState, selectedModel, {
|
|
91
|
+
message: e.message,
|
|
92
|
+
stack: e.stack || "Stack trace not available",
|
|
93
|
+
timestamp: new Date().toISOString(),
|
|
94
|
+
});
|
|
85
95
|
throw e;
|
|
86
96
|
}
|
|
87
97
|
if (!stopCriteria(userPrompt)) {
|
|
@@ -99,6 +109,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
99
109
|
trace,
|
|
100
110
|
toolCallService,
|
|
101
111
|
fileInfo,
|
|
112
|
+
isToolExecutionRemote: false,
|
|
102
113
|
});
|
|
103
114
|
}
|
|
104
115
|
}
|
|
@@ -111,7 +122,6 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
111
122
|
const usageSummary = chatModel.getUsageSummary();
|
|
112
123
|
console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
|
|
113
124
|
}
|
|
114
|
-
const DASHBOARD_DOMAIN = process.env.DASHBOARD_DOMAIN || "https://dash.empirical.run";
|
|
115
125
|
async function getChatSessionFromDashboard(chatSessionId) {
|
|
116
126
|
const response = await fetch(`${DASHBOARD_DOMAIN}/api/chat-sessions/${chatSessionId}`, {
|
|
117
127
|
headers: {
|
|
@@ -127,7 +137,11 @@ async function getChatSessionFromDashboard(chatSessionId) {
|
|
|
127
137
|
}
|
|
128
138
|
async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
|
|
129
139
|
const chatSession = await getChatSessionFromDashboard(chatSessionId);
|
|
130
|
-
|
|
140
|
+
let chatState = chatSession.chat_state;
|
|
141
|
+
// If not already canonical, migrate to canonical format
|
|
142
|
+
if (!chatState.version || chatState.version !== state_1.LATEST_CHAT_STATE_VERSION) {
|
|
143
|
+
chatState = (0, state_1.migrateChatState)(chatState);
|
|
144
|
+
}
|
|
131
145
|
const branchName = chatSession.branch_name;
|
|
132
146
|
const trace = llm_1.langfuseInstance?.trace({
|
|
133
147
|
id: chatSession.langfuse_trace_id,
|
|
@@ -164,5 +178,6 @@ async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
|
|
|
164
178
|
trace,
|
|
165
179
|
toolCallService,
|
|
166
180
|
fileInfo,
|
|
181
|
+
isToolExecutionRemote: false,
|
|
167
182
|
});
|
|
168
183
|
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { ModelInfo } from "@empiricalrun/shared-types";
|
|
2
|
+
export declare const SUPPORTED_CHAT_MODELS: readonly ModelInfo[];
|
|
3
|
+
export type SupportedChatModels = (typeof SUPPORTED_CHAT_MODELS)[number]["id"];
|
|
4
|
+
export declare const defaultModel: SupportedChatModels;
|
|
5
|
+
export declare const modelLabels: Record<SupportedChatModels, string>;
|
|
6
|
+
//# sourceMappingURL=models.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,4BAA4B,CAAC;AAE5D,eAAO,MAAM,qBAAqB,EAAE,SAAS,SAAS,EAqB5C,CAAC;AAEX,MAAM,MAAM,mBAAmB,GAAG,CAAC,OAAO,qBAAqB,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC;AAW/E,eAAO,MAAM,YAAY,EAAE,mBAA6C,CAAC;AAEzE,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAOzD,CAAC"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.modelLabels = exports.defaultModel = exports.SUPPORTED_CHAT_MODELS = void 0;
|
|
4
|
+
exports.SUPPORTED_CHAT_MODELS = [
|
|
5
|
+
{
|
|
6
|
+
id: "gemini-2.5-pro-preview-03-25",
|
|
7
|
+
label: "Gemini 2.5 Pro",
|
|
8
|
+
provider: "google",
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
id: "o4-mini-2025-04-16",
|
|
12
|
+
label: "OpenAI O4 Mini",
|
|
13
|
+
provider: "openai",
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
id: "claude-3-7-sonnet-20250219",
|
|
17
|
+
label: "Claude 3.7 Sonnet",
|
|
18
|
+
provider: "claude",
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
id: "claude-3-5-sonnet-20241022",
|
|
22
|
+
label: "Claude 3.5 Sonnet",
|
|
23
|
+
provider: "claude",
|
|
24
|
+
},
|
|
25
|
+
];
|
|
26
|
+
const DEFAULT_CHAT_MODEL_ID = "gemini-2.5-pro-preview-03-25";
|
|
27
|
+
function getDefaultChatModelId() {
|
|
28
|
+
if (!exports.SUPPORTED_CHAT_MODELS.some((m) => m.id === DEFAULT_CHAT_MODEL_ID)) {
|
|
29
|
+
throw new Error("Default chat model is not in SUPPORTED_CHAT_MODELS");
|
|
30
|
+
}
|
|
31
|
+
return DEFAULT_CHAT_MODEL_ID;
|
|
32
|
+
}
|
|
33
|
+
exports.defaultModel = getDefaultChatModelId();
|
|
34
|
+
exports.modelLabels = exports.SUPPORTED_CHAT_MODELS.reduce((acc, model) => ({
|
|
35
|
+
...acc,
|
|
36
|
+
[model.id]: model.label,
|
|
37
|
+
}), {});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,
|
|
1
|
+
{"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,mBAiGzD"}
|
|
@@ -52,20 +52,49 @@ The position of the comment is important: the browser agent will look for this c
|
|
|
52
52
|
the actual code to click on the login button. If you are fixing a failing test, your comment should be
|
|
53
53
|
around the failing line of code, so that it can be replaced/modified.
|
|
54
54
|
|
|
55
|
-
#
|
|
55
|
+
# Proactiveness
|
|
56
|
+
|
|
57
|
+
1. You are allowed to be proactive, but ONLY for read-only tool calls: like searching for content, reading files, fetching data from tools, and
|
|
58
|
+
running Playwright tests.
|
|
59
|
+
2. For any read-write tool calls (e.g. modifying any file), you should share your plan and get the user's approval before proceeding.
|
|
60
|
+
|
|
61
|
+
# Rules to follow
|
|
56
62
|
|
|
57
63
|
You must follow these rules while adding new tests or modifying existing tests. There can be exceptions to these rules, but
|
|
58
64
|
ONLY when explicitly asked for by the user.
|
|
59
65
|
|
|
60
|
-
1.
|
|
61
|
-
2.
|
|
62
|
-
3.
|
|
63
|
-
4.
|
|
66
|
+
1. You can't delete some steps from the test to make it pass. The test needs to accomplish its objective (which is to validate a particular user scenario)
|
|
67
|
+
2. Do not add any conditional logic or try catch blocks in a test. A good test deterministically tests a user scenario
|
|
68
|
+
3. Trust Playwright's ability to auto-wait while taking actions on elements. For example, do not add checks on locator.isVisible() before clicking on it: Playwright already does this
|
|
69
|
+
4. Do not add waitForTimeout or waitForLoadState in a test. Playwright will automatically wait for the page to load.
|
|
70
|
+
5. Try/catch blocks are a code smell for tests: you should not use them.
|
|
71
|
+
6. Do not use then() or catch() syntax in a test. Use async/await only
|
|
64
72
|
|
|
65
|
-
|
|
73
|
+
There are few exceptions to these rules. BEFORE applying any of the following exceptions, you MUST share your plan with the user and get their approval.
|
|
74
|
+
|
|
75
|
+
## Exceptions for conditional logic
|
|
76
|
+
|
|
77
|
+
There are few exceptions where you can add conditional logic to a test. If the application UI reveals some UI elements on certain conditions, we can add conditional logic.
|
|
66
78
|
|
|
67
|
-
|
|
68
|
-
|
|
79
|
+
For example, a form view shows a "Save" button only when the form is dirty. In this case, we will have to check if the "Save" button is visible before clicking on it. To do this,
|
|
80
|
+
follow this pattern:
|
|
81
|
+
|
|
82
|
+
\`\`\`
|
|
83
|
+
const saveButton = page.getByRole('button', { name: 'Save' });
|
|
84
|
+
if (await saveButton.isVisible()) {
|
|
85
|
+
await saveButton.click();
|
|
86
|
+
}
|
|
87
|
+
\`\`\`
|
|
88
|
+
|
|
89
|
+
Note that locator.isVisible() DOES NOT wait for the element to be visible. If the element in question shows up after a delay, we have no option but to add a waitForTimeout.
|
|
90
|
+
|
|
91
|
+
\`\`\`
|
|
92
|
+
const saveButton = page.getByRole('button', { name: 'Save' });
|
|
93
|
+
await page.waitForTimeout(100); // Wait for the element to be visible -- only if necessary.
|
|
94
|
+
if (await saveButton.isVisible()) {
|
|
95
|
+
await saveButton.click();
|
|
96
|
+
}
|
|
97
|
+
\`\`\`
|
|
69
98
|
|
|
70
99
|
# Repo context
|
|
71
100
|
${repoContext}
|
|
@@ -1,14 +1,35 @@
|
|
|
1
1
|
import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
|
|
2
|
-
|
|
2
|
+
import { CanonicalMessage, ChatState, ChatStateError } from "@empiricalrun/shared-types";
|
|
3
|
+
export declare const CHAT_STATE_VERSIONS_MIGRATIONS_MAP: Record<string, (state: any) => any>;
|
|
4
|
+
export declare const LATEST_CHAT_STATE_VERSION = "0.1";
|
|
3
5
|
export declare const CHAT_STATE_PATH: string;
|
|
4
|
-
export
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
export declare function createChatState({ userPrompt, existingState, selectedModel, error, }: {
|
|
7
|
+
userPrompt: string | undefined;
|
|
8
|
+
existingState: ChatState | undefined;
|
|
9
|
+
selectedModel: SupportedChatModels;
|
|
10
|
+
error: ChatStateError | null;
|
|
11
|
+
}): ChatState;
|
|
12
|
+
export declare function createChatStateForMessages({ messages, selectedModel, askUserForInput, error, }: {
|
|
13
|
+
messages: any;
|
|
14
|
+
selectedModel: SupportedChatModels;
|
|
15
|
+
askUserForInput: boolean;
|
|
16
|
+
error: ChatStateError | null;
|
|
17
|
+
}): ChatState;
|
|
18
|
+
export declare function chatStateFromModel<T>({ chatModel, selectedModel, error, }: {
|
|
19
|
+
chatModel: IChatModel<T>;
|
|
20
|
+
selectedModel: SupportedChatModels;
|
|
21
|
+
error: ChatStateError | null;
|
|
22
|
+
}): ChatState;
|
|
23
|
+
export declare function loadChatState(): ChatState | undefined;
|
|
24
|
+
/**
|
|
25
|
+
* Migrates a chat state object from an old version to the latest version.
|
|
26
|
+
* Add migration logic for each version as needed.
|
|
27
|
+
*/
|
|
28
|
+
export declare function migrateChatState(oldState: any): ChatState;
|
|
29
|
+
export declare function saveToDisk<T>(messages: Array<T>, selectedModel: SupportedChatModels, askUserForInput: boolean, error: ChatStateError | null): void;
|
|
30
|
+
export declare function getLatestDownloadBuildUrl(messages: CanonicalMessage[]): string | null;
|
|
31
|
+
export declare function fetchToolCallAvailability(toolRequestId: String, messages: CanonicalMessage[]): {
|
|
32
|
+
hasToolRequest: boolean;
|
|
33
|
+
hasToolResponse: boolean;
|
|
8
34
|
};
|
|
9
|
-
export declare function createChatState(userPrompt: string, existingState: ChatStateOnDisk<any> | undefined, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
|
|
10
|
-
export declare function createChatStateForMessages<T>(messages: any, selectedModel: SupportedChatModels): ChatStateOnDisk<T>;
|
|
11
|
-
export declare function chatStateFromModel<T>(chatModel: IChatModel<T>, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
|
|
12
|
-
export declare function loadChatState<T>(): ChatStateOnDisk<T> | undefined;
|
|
13
|
-
export declare function saveToDisk<T>(messages: Array<T>, selectedModel: SupportedChatModels): void;
|
|
14
35
|
//# sourceMappingURL=state.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,gBAAgB,EAChB,SAAS,EACT,cAAc,EACf,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,GAAG,CAAC;IACd,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,GAAG,SAAS,CASZ;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,EACpC,SAAS,EACT,aAAa,EACb,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAOA;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,GAAG,GAAG,SAAS,CAqBzD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,EACxB,KAAK,EAAE,cAAc,GAAG,IAAI,QAgB7B;AA2BD,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,yBAAyB,CACvC,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,gBAAgB,EAAE;;;EAe7B"}
|
package/dist/agent/chat/state.js
CHANGED
|
@@ -3,64 +3,169 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.CHAT_STATE_PATH = exports.
|
|
6
|
+
exports.CHAT_STATE_PATH = exports.LATEST_CHAT_STATE_VERSION = exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP = void 0;
|
|
7
7
|
exports.createChatState = createChatState;
|
|
8
8
|
exports.createChatStateForMessages = createChatStateForMessages;
|
|
9
9
|
exports.chatStateFromModel = chatStateFromModel;
|
|
10
10
|
exports.loadChatState = loadChatState;
|
|
11
|
+
exports.migrateChatState = migrateChatState;
|
|
11
12
|
exports.saveToDisk = saveToDisk;
|
|
13
|
+
exports.getLatestDownloadBuildUrl = getLatestDownloadBuildUrl;
|
|
14
|
+
exports.fetchToolCallAvailability = fetchToolCallAvailability;
|
|
12
15
|
const chat_1 = require("@empiricalrun/llm/chat");
|
|
13
16
|
const fs_1 = __importDefault(require("fs"));
|
|
14
17
|
const path_1 = __importDefault(require("path"));
|
|
15
|
-
|
|
18
|
+
// Migration wrapper for v20250327.1 -> v0.1 chat state versions
|
|
19
|
+
// v20250327.1 was model-specific, but v0.1 is canonical
|
|
20
|
+
function migrateToV01(oldState) {
|
|
21
|
+
if (oldState.model &&
|
|
22
|
+
typeof oldState.model === "string" &&
|
|
23
|
+
Array.isArray(oldState.messages)) {
|
|
24
|
+
const provider = (0, chat_1.getProviderForModel)(oldState.model);
|
|
25
|
+
if (provider === "google") {
|
|
26
|
+
return {
|
|
27
|
+
...oldState,
|
|
28
|
+
version: "0.1",
|
|
29
|
+
messages: oldState.messages.map(chat_1.geminiToCanonical),
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
throw new Error(`Unsupported state for migration with model: ${oldState.model} and version: ${oldState.version}`);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
// If not Gemini or not matching, return the old state
|
|
37
|
+
return oldState;
|
|
38
|
+
}
|
|
39
|
+
exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP = {
|
|
40
|
+
"20250327.1": migrateToV01,
|
|
41
|
+
"0.1": (state) => state,
|
|
42
|
+
};
|
|
43
|
+
exports.LATEST_CHAT_STATE_VERSION = "0.1";
|
|
16
44
|
exports.CHAT_STATE_PATH = path_1.default.join(process.cwd(), ".empiricalrun", "last-chat.json");
|
|
17
|
-
function createChatState(userPrompt, existingState, selectedModel) {
|
|
45
|
+
function createChatState({ userPrompt, existingState, selectedModel, error, }) {
|
|
18
46
|
const messages = existingState?.messages || [];
|
|
19
47
|
const chatModel = (0, chat_1.createChatModel)(messages, selectedModel);
|
|
20
|
-
|
|
21
|
-
|
|
48
|
+
if (userPrompt) {
|
|
49
|
+
chatModel.pushUserMessage(userPrompt);
|
|
50
|
+
}
|
|
51
|
+
return createChatStateForMessages({
|
|
52
|
+
messages: chatModel.messages,
|
|
53
|
+
selectedModel,
|
|
54
|
+
askUserForInput: chatModel.askUserForInput,
|
|
55
|
+
error,
|
|
56
|
+
});
|
|
22
57
|
}
|
|
23
|
-
function createChatStateForMessages(messages, selectedModel) {
|
|
58
|
+
function createChatStateForMessages({ messages, selectedModel, askUserForInput, error, }) {
|
|
24
59
|
// TODO: Add better types for messages
|
|
25
60
|
return {
|
|
26
|
-
version: exports.
|
|
61
|
+
version: exports.LATEST_CHAT_STATE_VERSION,
|
|
27
62
|
model: selectedModel,
|
|
28
63
|
messages: messages,
|
|
64
|
+
askUserForInput: askUserForInput,
|
|
65
|
+
error: error,
|
|
29
66
|
};
|
|
30
67
|
}
|
|
31
|
-
function chatStateFromModel(chatModel, selectedModel) {
|
|
32
|
-
return createChatStateForMessages(
|
|
68
|
+
function chatStateFromModel({ chatModel, selectedModel, error, }) {
|
|
69
|
+
return createChatStateForMessages({
|
|
70
|
+
messages: chatModel.messages,
|
|
71
|
+
selectedModel,
|
|
72
|
+
askUserForInput: chatModel.askUserForInput,
|
|
73
|
+
error,
|
|
74
|
+
});
|
|
33
75
|
}
|
|
34
76
|
function loadChatState() {
|
|
35
77
|
if (!fs_1.default.existsSync(exports.CHAT_STATE_PATH)) {
|
|
36
78
|
return undefined;
|
|
37
79
|
}
|
|
38
80
|
const raw = fs_1.default.readFileSync(exports.CHAT_STATE_PATH, "utf8");
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
81
|
+
let state = JSON.parse(raw);
|
|
82
|
+
// Always migrate to the latest version after loading
|
|
83
|
+
const migratedState = migrateChatState(state);
|
|
84
|
+
// Only save if migration actually changed the state
|
|
85
|
+
if (JSON.stringify(state) !== JSON.stringify(migratedState)) {
|
|
86
|
+
fs_1.default.writeFileSync(exports.CHAT_STATE_PATH, JSON.stringify(migratedState, null, 2));
|
|
42
87
|
}
|
|
43
|
-
return
|
|
88
|
+
return migratedState;
|
|
44
89
|
}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
90
|
+
/**
|
|
91
|
+
* Migrates a chat state object from an old version to the latest version.
|
|
92
|
+
* Add migration logic for each version as needed.
|
|
93
|
+
*/
|
|
94
|
+
function migrateChatState(oldState) {
|
|
95
|
+
if (!oldState || Object.keys(oldState).length === 0) {
|
|
96
|
+
return oldState;
|
|
97
|
+
}
|
|
98
|
+
if (!oldState.version) {
|
|
99
|
+
throw new Error("No version found in chat state");
|
|
100
|
+
}
|
|
101
|
+
if (!exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP[oldState.version]) {
|
|
102
|
+
throw new Error(`No migration function found for version: ${oldState.version}`);
|
|
103
|
+
}
|
|
104
|
+
if (oldState.version === exports.LATEST_CHAT_STATE_VERSION) {
|
|
105
|
+
return oldState;
|
|
106
|
+
}
|
|
107
|
+
const migrateFn = exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP[oldState.version];
|
|
108
|
+
const migrated = migrateFn(oldState);
|
|
109
|
+
return {
|
|
110
|
+
version: exports.LATEST_CHAT_STATE_VERSION,
|
|
111
|
+
...migrated,
|
|
51
112
|
};
|
|
113
|
+
}
|
|
114
|
+
function saveToDisk(messages, selectedModel, askUserForInput, error) {
|
|
115
|
+
const statePath = exports.CHAT_STATE_PATH;
|
|
52
116
|
// Ensure directory exists before trying to read/write
|
|
53
117
|
const dirname = path_1.default.dirname(statePath);
|
|
54
118
|
if (!fs_1.default.existsSync(dirname)) {
|
|
55
119
|
fs_1.default.mkdirSync(dirname, { recursive: true });
|
|
56
120
|
}
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
};
|
|
121
|
+
// Use the helper to build the new state
|
|
122
|
+
const newState = createChatStateForMessages({
|
|
123
|
+
messages,
|
|
124
|
+
selectedModel,
|
|
125
|
+
askUserForInput,
|
|
126
|
+
error,
|
|
127
|
+
});
|
|
65
128
|
fs_1.default.writeFileSync(statePath, JSON.stringify(newState, null, 2));
|
|
66
129
|
}
|
|
130
|
+
function findLatestToolCall(messages, toolName) {
|
|
131
|
+
return messages
|
|
132
|
+
.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())
|
|
133
|
+
.find((m) => m.parts.some((p) => "toolCall" in p && p.toolCall?.name === toolName));
|
|
134
|
+
}
|
|
135
|
+
function findSuccessfulToolResponse(messages, toolCallId) {
|
|
136
|
+
return messages.find((m) => m.parts.some((p) => {
|
|
137
|
+
if ("toolResult" in p) {
|
|
138
|
+
return p.toolCallId === toolCallId && !p.toolResult.isError;
|
|
139
|
+
}
|
|
140
|
+
return false;
|
|
141
|
+
}));
|
|
142
|
+
}
|
|
143
|
+
function getLatestDownloadBuildUrl(messages) {
|
|
144
|
+
const toolCallMessage = findLatestToolCall(messages, "downloadBuild");
|
|
145
|
+
if (!toolCallMessage)
|
|
146
|
+
return null;
|
|
147
|
+
const toolCallPart = toolCallMessage.parts.find((p) => "toolCall" in p && p.toolCall?.name === "downloadBuild");
|
|
148
|
+
if (!toolCallPart || !("toolCall" in toolCallPart))
|
|
149
|
+
return null;
|
|
150
|
+
const toolCallId = toolCallPart.toolCall.id;
|
|
151
|
+
const toolResponseMessage = findSuccessfulToolResponse(messages, toolCallId);
|
|
152
|
+
if (!toolResponseMessage)
|
|
153
|
+
return null;
|
|
154
|
+
const input = toolCallPart.toolCall.input;
|
|
155
|
+
return input.buildUrl;
|
|
156
|
+
}
|
|
157
|
+
function fetchToolCallAvailability(toolRequestId, messages) {
|
|
158
|
+
let hasToolRequest = false;
|
|
159
|
+
let hasToolResponse = false;
|
|
160
|
+
messages.forEach((message) => {
|
|
161
|
+
message.parts.forEach((part) => {
|
|
162
|
+
if ("toolCall" in part && part.toolCall?.id === toolRequestId) {
|
|
163
|
+
hasToolRequest = true;
|
|
164
|
+
}
|
|
165
|
+
if ("toolResult" in part && part.toolCallId === toolRequestId) {
|
|
166
|
+
hasToolResponse = true;
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
return { hasToolRequest, hasToolResponse };
|
|
171
|
+
}
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
import {
|
|
2
|
-
export type SupportedChatModels = "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25" | "o4-mini-2025-04-16";
|
|
1
|
+
import { ChatState } from "@empiricalrun/shared-types";
|
|
3
2
|
type LatestMessage = {
|
|
4
3
|
role: string;
|
|
5
4
|
textMessage: string;
|
|
6
5
|
};
|
|
7
|
-
export type ReporterFunction = (state:
|
|
6
|
+
export type ReporterFunction = (state: ChatState, latestHumanReadableMessage: LatestMessage | undefined) => Promise<void>;
|
|
8
7
|
export {};
|
|
9
8
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,4BAA4B,CAAC;AAEvD,KAAK,aAAa,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAC7B,KAAK,EAAE,SAAS,EAChB,0BAA0B,EAAE,aAAa,GAAG,SAAS,KAClD,OAAO,CAAC,IAAI,CAAC,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
+
import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
|
|
3
|
+
import { ReporterFunction } from "./types";
|
|
4
|
+
export declare const log: (...args: any[]) => void;
|
|
5
|
+
export declare function getModelName(model: string): string;
|
|
6
|
+
export declare function handleAgentError({ context, error, chatModel, selectedModel, reporter, trace, }: {
|
|
7
|
+
context: string;
|
|
8
|
+
error: unknown;
|
|
9
|
+
chatModel: IChatModel<any>;
|
|
10
|
+
selectedModel: SupportedChatModels;
|
|
11
|
+
reporter: ReporterFunction;
|
|
12
|
+
trace?: TraceClient;
|
|
13
|
+
}): Promise<void>;
|
|
14
|
+
//# sourceMappingURL=utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAEL,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAKhC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,GAAG,EAAE,SAEjC,CAAC;AAcF,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,OAAO,EACP,KAAK,EACL,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAsBA"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.log = void 0;
|
|
4
|
+
exports.getModelName = getModelName;
|
|
5
|
+
exports.handleAgentError = handleAgentError;
|
|
6
|
+
const chat_1 = require("@empiricalrun/llm/chat");
|
|
7
|
+
const picocolors_1 = require("picocolors");
|
|
8
|
+
const state_1 = require("./state");
|
|
9
|
+
const log = (...args) => {
|
|
10
|
+
console.log((0, picocolors_1.gray)(args.join(" ")));
|
|
11
|
+
};
|
|
12
|
+
exports.log = log;
|
|
13
|
+
function logError(context, error, trace) {
|
|
14
|
+
console.error((0, picocolors_1.gray)(`[Error in ${context}]:`), error instanceof Error ? error.stack || error.message : error);
|
|
15
|
+
trace?.update({
|
|
16
|
+
output: {
|
|
17
|
+
error: error instanceof Error ? error.message : String(error),
|
|
18
|
+
},
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
function getModelName(model) {
|
|
22
|
+
const provider = (0, chat_1.getProviderForModel)(model);
|
|
23
|
+
if (provider === "claude")
|
|
24
|
+
return "Claude";
|
|
25
|
+
if (provider === "google")
|
|
26
|
+
return "Gemini";
|
|
27
|
+
if (provider === "openai")
|
|
28
|
+
return "o4";
|
|
29
|
+
return "AI";
|
|
30
|
+
}
|
|
31
|
+
async function handleAgentError({ context, error, chatModel, selectedModel, reporter, trace, }) {
|
|
32
|
+
const errorObject = {
|
|
33
|
+
message: error.message,
|
|
34
|
+
stack: error.stack || "Stack trace not available",
|
|
35
|
+
timestamp: new Date().toISOString(),
|
|
36
|
+
};
|
|
37
|
+
await reporter((0, state_1.chatStateFromModel)({
|
|
38
|
+
chatModel,
|
|
39
|
+
selectedModel,
|
|
40
|
+
error: errorObject,
|
|
41
|
+
}), chatModel.getHumanReadableLatestMessage());
|
|
42
|
+
trace?.update({
|
|
43
|
+
output: {
|
|
44
|
+
error: errorObject,
|
|
45
|
+
},
|
|
46
|
+
});
|
|
47
|
+
if (context) {
|
|
48
|
+
logError(context, error, trace);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -9,7 +9,7 @@ const utils_1 = require("../../browsing/utils");
|
|
|
9
9
|
const element_annotation_1 = require("../element-annotation");
|
|
10
10
|
const run_1 = require("../run");
|
|
11
11
|
const fixtures_1 = require("./fixtures");
|
|
12
|
-
|
|
12
|
+
fixtures_1.test.skip("able to scroll and interact with elements", async ({ page, server, }) => {
|
|
13
13
|
await page.goto(`${server.baseURL}/blog-page.html`);
|
|
14
14
|
const response = await (0, run_1.createTestUsingMasterAgent)({
|
|
15
15
|
task: `fill test@test.com into the email field and click the submit`,
|
|
@@ -21,7 +21,7 @@ const fixtures_1 = require("./fixtures");
|
|
|
21
21
|
(0, fixtures_1.expect)(response.code).toContain("await page.getByPlaceholder('Enter your email').fill(\"test@test.com\")");
|
|
22
22
|
(0, fixtures_1.expect)(response.code).toContain("await page.getByRole('button', { name: 'Subscribe' }).click()");
|
|
23
23
|
});
|
|
24
|
-
|
|
24
|
+
fixtures_1.test.skip("scroll when element does not exist", async ({ page, server }) => {
|
|
25
25
|
await page.goto(`${server.baseURL}/blog-page.html`);
|
|
26
26
|
const response = await (0, run_1.createTestUsingMasterAgent)({
|
|
27
27
|
task: `click search button`,
|
|
@@ -32,7 +32,7 @@ const fixtures_1 = require("./fixtures");
|
|
|
32
32
|
(0, fixtures_1.expect)(response.importPaths.length).toBe(0);
|
|
33
33
|
(0, fixtures_1.expect)(response.code.length).toBe(0);
|
|
34
34
|
});
|
|
35
|
-
|
|
35
|
+
fixtures_1.test.skip("scroll and click inside div elements", async ({ page, server }) => {
|
|
36
36
|
await page.goto(`${server.baseURL}/dropdown-scrolls.html`);
|
|
37
37
|
const response = await (0, run_1.createTestUsingMasterAgent)({
|
|
38
38
|
task: `click on x-3 inside bmw dropdown, and then,
|
|
@@ -49,7 +49,7 @@ click on maverick inside ford dropdown`,
|
|
|
49
49
|
(0, fixtures_1.expect)(lines.find((l) => l.match(/^await page.+Ford.+.click/))).toBeTruthy();
|
|
50
50
|
(0, fixtures_1.expect)(lines.find((l) => l.match(/^await page.+Maverick.+.click/))).toBeTruthy();
|
|
51
51
|
});
|
|
52
|
-
|
|
52
|
+
fixtures_1.test.skip("master agent can click icons accurately", async ({ page, server, }) => {
|
|
53
53
|
await page.goto(`${server.baseURL}/icons-navbar.html`);
|
|
54
54
|
await (0, fixtures_1.expect)(page.getByText("select an icon")).toBeVisible();
|
|
55
55
|
const response = await (0, run_1.createTestUsingMasterAgent)({
|
|
@@ -86,7 +86,7 @@ click on maverick inside ford dropdown`,
|
|
|
86
86
|
(0, fixtures_1.expect)(response.code).toContain("page.locator");
|
|
87
87
|
(0, fixtures_1.expect)(response.code).toContain("click()");
|
|
88
88
|
});
|
|
89
|
-
|
|
89
|
+
fixtures_1.test.skip("annotate and enrich annotations correctly", async ({ page, server, }) => {
|
|
90
90
|
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
91
91
|
await page.goto(`${server.baseURL}/iframe-elements.html`);
|
|
92
92
|
const { annotationKeys: keys } = await (0, element_annotation_1.getAnnotationKeys)({
|
|
@@ -105,7 +105,7 @@ click on maverick inside ford dropdown`,
|
|
|
105
105
|
// 2 clickable divs: 1 in main frame, 1 in iframe
|
|
106
106
|
(0, fixtures_1.expect)(keys.filter((k) => k.text.includes("Lorem Ipsum")).length).toBe(2);
|
|
107
107
|
});
|
|
108
|
-
|
|
108
|
+
fixtures_1.test.skip("fill action with multiple pages", async ({ context }) => {
|
|
109
109
|
const page1 = await context.newPage();
|
|
110
110
|
const page2 = await context.newPage();
|
|
111
111
|
const response = await (0, run_1.createTestUsingMasterAgent)({
|