@empiricalrun/test-gen 0.58.0 → 0.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/CHANGELOG.md +73 -0
  2. package/dist/agent/browsing/run.d.ts +9 -2
  3. package/dist/agent/browsing/run.d.ts.map +1 -1
  4. package/dist/agent/browsing/run.js +30 -30
  5. package/dist/agent/browsing/utils.d.ts +1 -14
  6. package/dist/agent/browsing/utils.d.ts.map +1 -1
  7. package/dist/agent/browsing/utils.js +1 -58
  8. package/dist/agent/chat/agent-loop.d.ts +2 -1
  9. package/dist/agent/chat/agent-loop.d.ts.map +1 -1
  10. package/dist/agent/chat/agent-loop.js +42 -34
  11. package/dist/agent/chat/exports.d.ts +5 -6
  12. package/dist/agent/chat/exports.d.ts.map +1 -1
  13. package/dist/agent/chat/exports.js +13 -42
  14. package/dist/agent/chat/index.d.ts +2 -1
  15. package/dist/agent/chat/index.d.ts.map +1 -1
  16. package/dist/agent/chat/index.js +23 -8
  17. package/dist/agent/chat/models.d.ts +6 -0
  18. package/dist/agent/chat/models.d.ts.map +1 -0
  19. package/dist/agent/chat/models.js +37 -0
  20. package/dist/agent/chat/prompt.d.ts.map +1 -1
  21. package/dist/agent/chat/prompt.js +37 -8
  22. package/dist/agent/chat/state.d.ts +31 -10
  23. package/dist/agent/chat/state.d.ts.map +1 -1
  24. package/dist/agent/chat/state.js +132 -27
  25. package/dist/agent/chat/types.d.ts +2 -3
  26. package/dist/agent/chat/types.d.ts.map +1 -1
  27. package/dist/agent/chat/utils.d.ts +14 -0
  28. package/dist/agent/chat/utils.d.ts.map +1 -0
  29. package/dist/agent/chat/utils.js +50 -0
  30. package/dist/agent/master/browser-tests/index.spec.js +6 -6
  31. package/dist/bin/index.js +12 -2
  32. package/dist/bin/utils/index.d.ts +1 -0
  33. package/dist/bin/utils/index.d.ts.map +1 -1
  34. package/dist/index.d.ts +1 -0
  35. package/dist/index.d.ts.map +1 -1
  36. package/dist/index.js +3 -0
  37. package/dist/test-build/index.js +1 -1
  38. package/dist/tool-call-service/index.d.ts +2 -1
  39. package/dist/tool-call-service/index.d.ts.map +1 -1
  40. package/dist/tool-call-service/index.js +51 -71
  41. package/dist/tool-call-service/utils.d.ts +10 -0
  42. package/dist/tool-call-service/utils.d.ts.map +1 -0
  43. package/dist/tool-call-service/utils.js +23 -0
  44. package/dist/tools/download-build.d.ts +9 -0
  45. package/dist/tools/download-build.d.ts.map +1 -1
  46. package/dist/tools/download-build.js +5 -4
  47. package/dist/tools/str_replace_editor.d.ts.map +1 -1
  48. package/dist/tools/str_replace_editor.js +24 -7
  49. package/dist/tools/test-gen-browser.d.ts.map +1 -1
  50. package/dist/tools/test-gen-browser.js +26 -19
  51. package/dist/tools/test-run.d.ts.map +1 -1
  52. package/dist/tools/test-run.js +8 -13
  53. package/dist/utils/checkpoint.d.ts.map +1 -1
  54. package/dist/utils/checkpoint.js +3 -1
  55. package/dist/utils/exec.d.ts +2 -2
  56. package/dist/utils/exec.d.ts.map +1 -1
  57. package/dist/utils/exec.js +5 -4
  58. package/package.json +5 -4
  59. package/tsconfig.tsbuildinfo +1 -1
@@ -11,16 +11,17 @@ const file_tree_1 = require("../../utils/file-tree");
11
11
  const git_1 = require("../../utils/git");
12
12
  const agent_loop_1 = require("./agent-loop");
13
13
  const state_1 = require("./state");
14
+ const DASHBOARD_DOMAIN = process.env.DASHBOARD_DOMAIN || "https://dash.empirical.run";
14
15
  function stopCriteria(userPrompt) {
15
16
  return userPrompt?.toLowerCase() === "stop";
16
17
  }
17
- function concludeAgent(chatModel, useDiskForChatState, selectedModel) {
18
+ function concludeAgent(chatModel, useDiskForChatState, selectedModel, error) {
18
19
  console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + chatModel.getUsageSummary())}`);
19
20
  if (useDiskForChatState) {
20
- (0, state_1.saveToDisk)(chatModel.messages, selectedModel);
21
+ (0, state_1.saveToDisk)(chatModel.messages, selectedModel, chatModel.askUserForInput, error);
21
22
  }
22
23
  }
23
- async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }) {
24
+ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, withRetry, }) {
24
25
  let chatState;
25
26
  if (useDiskForChatState) {
26
27
  chatState = (0, state_1.loadChatState)();
@@ -45,8 +46,12 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
45
46
  console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
46
47
  }
47
48
  }
49
+ // if withRetry set the chatState error null
50
+ if (withRetry && chatState) {
51
+ chatState.error = null;
52
+ }
48
53
  const handleSigInt = () => {
49
- concludeAgent(chatModel, useDiskForChatState, selectedModel);
54
+ concludeAgent(chatModel, useDiskForChatState, selectedModel, null);
50
55
  process.exit(0);
51
56
  };
52
57
  process.once("SIGINT", handleSigInt);
@@ -54,7 +59,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
54
59
  let userPrompt;
55
60
  let reporterFunc = async (chatState, latest) => {
56
61
  if (useDiskForChatState) {
57
- (0, state_1.saveToDisk)(chatState.messages, selectedModel);
62
+ (0, state_1.saveToDisk)(chatState.messages, selectedModel, chatState.askUserForInput, chatState.error);
58
63
  }
59
64
  if (latest) {
60
65
  console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
@@ -79,9 +84,14 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
79
84
  catch (e) {
80
85
  // https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
81
86
  if (e instanceof Error && e.name === "ExitPromptError") {
82
- concludeAgent(chatModel, useDiskForChatState, selectedModel);
87
+ concludeAgent(chatModel, useDiskForChatState, selectedModel, null);
83
88
  process.exit(0);
84
89
  }
90
+ concludeAgent(chatModel, useDiskForChatState, selectedModel, {
91
+ message: e.message,
92
+ stack: e.stack || "Stack trace not available",
93
+ timestamp: new Date().toISOString(),
94
+ });
85
95
  throw e;
86
96
  }
87
97
  if (!stopCriteria(userPrompt)) {
@@ -99,6 +109,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
99
109
  trace,
100
110
  toolCallService,
101
111
  fileInfo,
112
+ isToolExecutionRemote: false,
102
113
  });
103
114
  }
104
115
  }
@@ -111,7 +122,6 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
111
122
  const usageSummary = chatModel.getUsageSummary();
112
123
  console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
113
124
  }
114
- const DASHBOARD_DOMAIN = process.env.DASHBOARD_DOMAIN || "https://dash.empirical.run";
115
125
  async function getChatSessionFromDashboard(chatSessionId) {
116
126
  const response = await fetch(`${DASHBOARD_DOMAIN}/api/chat-sessions/${chatSessionId}`, {
117
127
  headers: {
@@ -127,7 +137,11 @@ async function getChatSessionFromDashboard(chatSessionId) {
127
137
  }
128
138
  async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
129
139
  const chatSession = await getChatSessionFromDashboard(chatSessionId);
130
- const chatState = chatSession.chat_state;
140
+ let chatState = chatSession.chat_state;
141
+ // If not already canonical, migrate to canonical format
142
+ if (!chatState.version || chatState.version !== state_1.LATEST_CHAT_STATE_VERSION) {
143
+ chatState = (0, state_1.migrateChatState)(chatState);
144
+ }
131
145
  const branchName = chatSession.branch_name;
132
146
  const trace = llm_1.langfuseInstance?.trace({
133
147
  id: chatSession.langfuse_trace_id,
@@ -164,5 +178,6 @@ async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
164
178
  trace,
165
179
  toolCallService,
166
180
  fileInfo,
181
+ isToolExecutionRemote: false,
167
182
  });
168
183
  }
@@ -0,0 +1,6 @@
1
+ import type { ModelInfo } from "@empiricalrun/shared-types";
2
+ export declare const SUPPORTED_CHAT_MODELS: readonly ModelInfo[];
3
+ export type SupportedChatModels = (typeof SUPPORTED_CHAT_MODELS)[number]["id"];
4
+ export declare const defaultModel: SupportedChatModels;
5
+ export declare const modelLabels: Record<SupportedChatModels, string>;
6
+ //# sourceMappingURL=models.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,4BAA4B,CAAC;AAE5D,eAAO,MAAM,qBAAqB,EAAE,SAAS,SAAS,EAqB5C,CAAC;AAEX,MAAM,MAAM,mBAAmB,GAAG,CAAC,OAAO,qBAAqB,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC;AAW/E,eAAO,MAAM,YAAY,EAAE,mBAA6C,CAAC;AAEzE,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAOzD,CAAC"}
@@ -0,0 +1,37 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.modelLabels = exports.defaultModel = exports.SUPPORTED_CHAT_MODELS = void 0;
4
+ exports.SUPPORTED_CHAT_MODELS = [
5
+ {
6
+ id: "gemini-2.5-pro-preview-03-25",
7
+ label: "Gemini 2.5 Pro",
8
+ provider: "google",
9
+ },
10
+ {
11
+ id: "o4-mini-2025-04-16",
12
+ label: "OpenAI O4 Mini",
13
+ provider: "openai",
14
+ },
15
+ {
16
+ id: "claude-3-7-sonnet-20250219",
17
+ label: "Claude 3.7 Sonnet",
18
+ provider: "claude",
19
+ },
20
+ {
21
+ id: "claude-3-5-sonnet-20241022",
22
+ label: "Claude 3.5 Sonnet",
23
+ provider: "claude",
24
+ },
25
+ ];
26
+ const DEFAULT_CHAT_MODEL_ID = "gemini-2.5-pro-preview-03-25";
27
+ function getDefaultChatModelId() {
28
+ if (!exports.SUPPORTED_CHAT_MODELS.some((m) => m.id === DEFAULT_CHAT_MODEL_ID)) {
29
+ throw new Error("Default chat model is not in SUPPORTED_CHAT_MODELS");
30
+ }
31
+ return DEFAULT_CHAT_MODEL_ID;
32
+ }
33
+ exports.defaultModel = getDefaultChatModelId();
34
+ exports.modelLabels = exports.SUPPORTED_CHAT_MODELS.reduce((acc, model) => ({
35
+ ...acc,
36
+ [model.id]: model.label,
37
+ }), {});
@@ -1 +1 @@
1
- {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,mBAoEzD"}
1
+ {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,mBAiGzD"}
@@ -52,20 +52,49 @@ The position of the comment is important: the browser agent will look for this c
52
52
  the actual code to click on the login button. If you are fixing a failing test, your comment should be
53
53
  around the failing line of code, so that it can be replaced/modified.
54
54
 
55
- # Rules for fixing Playwright tests
55
+ # Proactiveness
56
+
57
+ 1. You are allowed to be proactive, but ONLY for read-only tool calls: like searching for content, reading files, fetching data from tools, and
58
+ running Playwright tests.
59
+ 2. For any read-write tool calls (e.g. modifying any file), you should share your plan and get the user's approval before proceeding.
60
+
61
+ # Rules to follow
56
62
 
57
63
  You must follow these rules while adding new tests or modifying existing tests. There can be exceptions to these rules, but
58
64
  ONLY when explicitly asked for by the user.
59
65
 
60
- 1. Do not add any conditional logic or try catch blocks in a test. A good test deterministically tests a user scenario
61
- 2. Trust Playwright's ability to auto-wait while taking actions on elements. For example, do not add checks on locator.isVisible() before clicking on it: Playwright already does this
62
- 3. Do not add waitForTimeout or waitForLoadState in a test. Playwright will automatically wait for the page to load.
63
- 4. You can't delete some steps from the test to make it pass. The test needs to accomplish its objective (which is to validate a particular user scenario)
66
+ 1. You can't delete some steps from the test to make it pass. The test needs to accomplish its objective (which is to validate a particular user scenario)
67
+ 2. Do not add any conditional logic or try catch blocks in a test. A good test deterministically tests a user scenario
68
+ 3. Trust Playwright's ability to auto-wait while taking actions on elements. For example, do not add checks on locator.isVisible() before clicking on it: Playwright already does this
69
+ 4. Do not add waitForTimeout or waitForLoadState in a test. Playwright will automatically wait for the page to load.
70
+ 5. Try/catch blocks are a code smell for tests: you should not use them.
71
+ 6. Do not use then() or catch() syntax in a test. Use async/await only
64
72
 
65
- # Proactiveness
73
+ There are few exceptions to these rules. BEFORE applying any of the following exceptions, you MUST share your plan with the user and get their approval.
74
+
75
+ ## Exceptions for conditional logic
76
+
77
+ There are few exceptions where you can add conditional logic to a test. If the application UI reveals some UI elements on certain conditions, we can add conditional logic.
66
78
 
67
- You are allowed to be proactive, but ONLY for read-only actions, like searching for content, reading files, fetching data from tools, and
68
- running Playwright tests. For any read-write actions (e.g. modifying any file), you should share your plan and get the user's approval before proceeding.
79
+ For example, a form view shows a "Save" button only when the form is dirty. In this case, we will have to check if the "Save" button is visible before clicking on it. To do this,
80
+ follow this pattern:
81
+
82
+ \`\`\`
83
+ const saveButton = page.getByRole('button', { name: 'Save' });
84
+ if (await saveButton.isVisible()) {
85
+ await saveButton.click();
86
+ }
87
+ \`\`\`
88
+
89
+ Note that locator.isVisible() DOES NOT wait for the element to be visible. If the element in question shows up after a delay, we have no option but to add a waitForTimeout.
90
+
91
+ \`\`\`
92
+ const saveButton = page.getByRole('button', { name: 'Save' });
93
+ await page.waitForTimeout(100); // Wait for the element to be visible -- only if necessary.
94
+ if (await saveButton.isVisible()) {
95
+ await saveButton.click();
96
+ }
97
+ \`\`\`
69
98
 
70
99
  # Repo context
71
100
  ${repoContext}
@@ -1,14 +1,35 @@
1
1
  import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
2
- export declare const CURRENT_CHAT_STATE_VERSION = "20250327.1";
2
+ import { CanonicalMessage, ChatState, ChatStateError } from "@empiricalrun/shared-types";
3
+ export declare const CHAT_STATE_VERSIONS_MIGRATIONS_MAP: Record<string, (state: any) => any>;
4
+ export declare const LATEST_CHAT_STATE_VERSION = "0.1";
3
5
  export declare const CHAT_STATE_PATH: string;
4
- export type ChatStateOnDisk<T> = {
5
- version: string;
6
- model: SupportedChatModels;
7
- messages: T[];
6
+ export declare function createChatState({ userPrompt, existingState, selectedModel, error, }: {
7
+ userPrompt: string | undefined;
8
+ existingState: ChatState | undefined;
9
+ selectedModel: SupportedChatModels;
10
+ error: ChatStateError | null;
11
+ }): ChatState;
12
+ export declare function createChatStateForMessages({ messages, selectedModel, askUserForInput, error, }: {
13
+ messages: any;
14
+ selectedModel: SupportedChatModels;
15
+ askUserForInput: boolean;
16
+ error: ChatStateError | null;
17
+ }): ChatState;
18
+ export declare function chatStateFromModel<T>({ chatModel, selectedModel, error, }: {
19
+ chatModel: IChatModel<T>;
20
+ selectedModel: SupportedChatModels;
21
+ error: ChatStateError | null;
22
+ }): ChatState;
23
+ export declare function loadChatState(): ChatState | undefined;
24
+ /**
25
+ * Migrates a chat state object from an old version to the latest version.
26
+ * Add migration logic for each version as needed.
27
+ */
28
+ export declare function migrateChatState(oldState: any): ChatState;
29
+ export declare function saveToDisk<T>(messages: Array<T>, selectedModel: SupportedChatModels, askUserForInput: boolean, error: ChatStateError | null): void;
30
+ export declare function getLatestDownloadBuildUrl(messages: CanonicalMessage[]): string | null;
31
+ export declare function fetchToolCallAvailability(toolRequestId: String, messages: CanonicalMessage[]): {
32
+ hasToolRequest: boolean;
33
+ hasToolResponse: boolean;
8
34
  };
9
- export declare function createChatState(userPrompt: string, existingState: ChatStateOnDisk<any> | undefined, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
10
- export declare function createChatStateForMessages<T>(messages: any, selectedModel: SupportedChatModels): ChatStateOnDisk<T>;
11
- export declare function chatStateFromModel<T>(chatModel: IChatModel<T>, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
12
- export declare function loadChatState<T>(): ChatStateOnDisk<T> | undefined;
13
- export declare function saveToDisk<T>(messages: Array<T>, selectedModel: SupportedChatModels): void;
14
35
  //# sourceMappingURL=state.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAIhC,eAAO,MAAM,0BAA0B,eAAe,CAAC;AAEvD,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,MAAM,MAAM,eAAe,CAAC,CAAC,IAAI;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,mBAAmB,CAAC;IAC3B,QAAQ,EAAE,CAAC,EAAE,CAAC;CACf,CAAC;AAEF,wBAAgB,eAAe,CAC7B,UAAU,EAAE,MAAM,EAClB,aAAa,EAAE,eAAe,CAAC,GAAG,CAAC,GAAG,SAAS,EAC/C,aAAa,EAAE,mBAAmB,4BAMnC;AAED,wBAAgB,0BAA0B,CAAC,CAAC,EAC1C,QAAQ,EAAE,GAAG,EACb,aAAa,EAAE,mBAAmB,GACjC,eAAe,CAAC,CAAC,CAAC,CAOpB;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAClC,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,EACxB,aAAa,EAAE,mBAAmB,4BAGnC;AAED,wBAAgB,aAAa,CAAC,CAAC,KAAK,eAAe,CAAC,CAAC,CAAC,GAAG,SAAS,CAajE;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,QAsBnC"}
1
+ {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,gBAAgB,EAChB,SAAS,EACT,cAAc,EACf,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,GAAG,CAAC;IACd,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,GAAG,SAAS,CASZ;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,EACpC,SAAS,EACT,aAAa,EACb,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAOA;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,GAAG,GAAG,SAAS,CAqBzD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,EACxB,KAAK,EAAE,cAAc,GAAG,IAAI,QAgB7B;AA2BD,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,yBAAyB,CACvC,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,gBAAgB,EAAE;;;EAe7B"}
@@ -3,64 +3,169 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.CHAT_STATE_PATH = exports.CURRENT_CHAT_STATE_VERSION = void 0;
6
+ exports.CHAT_STATE_PATH = exports.LATEST_CHAT_STATE_VERSION = exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP = void 0;
7
7
  exports.createChatState = createChatState;
8
8
  exports.createChatStateForMessages = createChatStateForMessages;
9
9
  exports.chatStateFromModel = chatStateFromModel;
10
10
  exports.loadChatState = loadChatState;
11
+ exports.migrateChatState = migrateChatState;
11
12
  exports.saveToDisk = saveToDisk;
13
+ exports.getLatestDownloadBuildUrl = getLatestDownloadBuildUrl;
14
+ exports.fetchToolCallAvailability = fetchToolCallAvailability;
12
15
  const chat_1 = require("@empiricalrun/llm/chat");
13
16
  const fs_1 = __importDefault(require("fs"));
14
17
  const path_1 = __importDefault(require("path"));
15
- exports.CURRENT_CHAT_STATE_VERSION = "20250327.1";
18
+ // Migration wrapper for v20250327.1 -> v0.1 chat state versions
19
+ // v20250327.1 was model-specific, but v0.1 is canonical
20
+ function migrateToV01(oldState) {
21
+ if (oldState.model &&
22
+ typeof oldState.model === "string" &&
23
+ Array.isArray(oldState.messages)) {
24
+ const provider = (0, chat_1.getProviderForModel)(oldState.model);
25
+ if (provider === "google") {
26
+ return {
27
+ ...oldState,
28
+ version: "0.1",
29
+ messages: oldState.messages.map(chat_1.geminiToCanonical),
30
+ };
31
+ }
32
+ else {
33
+ throw new Error(`Unsupported state for migration with model: ${oldState.model} and version: ${oldState.version}`);
34
+ }
35
+ }
36
+ // If not Gemini or not matching, return the old state
37
+ return oldState;
38
+ }
39
+ exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP = {
40
+ "20250327.1": migrateToV01,
41
+ "0.1": (state) => state,
42
+ };
43
+ exports.LATEST_CHAT_STATE_VERSION = "0.1";
16
44
  exports.CHAT_STATE_PATH = path_1.default.join(process.cwd(), ".empiricalrun", "last-chat.json");
17
- function createChatState(userPrompt, existingState, selectedModel) {
45
+ function createChatState({ userPrompt, existingState, selectedModel, error, }) {
18
46
  const messages = existingState?.messages || [];
19
47
  const chatModel = (0, chat_1.createChatModel)(messages, selectedModel);
20
- chatModel.pushUserMessage(userPrompt);
21
- return createChatStateForMessages(chatModel.messages, selectedModel);
48
+ if (userPrompt) {
49
+ chatModel.pushUserMessage(userPrompt);
50
+ }
51
+ return createChatStateForMessages({
52
+ messages: chatModel.messages,
53
+ selectedModel,
54
+ askUserForInput: chatModel.askUserForInput,
55
+ error,
56
+ });
22
57
  }
23
- function createChatStateForMessages(messages, selectedModel) {
58
+ function createChatStateForMessages({ messages, selectedModel, askUserForInput, error, }) {
24
59
  // TODO: Add better types for messages
25
60
  return {
26
- version: exports.CURRENT_CHAT_STATE_VERSION,
61
+ version: exports.LATEST_CHAT_STATE_VERSION,
27
62
  model: selectedModel,
28
63
  messages: messages,
64
+ askUserForInput: askUserForInput,
65
+ error: error,
29
66
  };
30
67
  }
31
- function chatStateFromModel(chatModel, selectedModel) {
32
- return createChatStateForMessages(chatModel.messages, selectedModel);
68
+ function chatStateFromModel({ chatModel, selectedModel, error, }) {
69
+ return createChatStateForMessages({
70
+ messages: chatModel.messages,
71
+ selectedModel,
72
+ askUserForInput: chatModel.askUserForInput,
73
+ error,
74
+ });
33
75
  }
34
76
  function loadChatState() {
35
77
  if (!fs_1.default.existsSync(exports.CHAT_STATE_PATH)) {
36
78
  return undefined;
37
79
  }
38
80
  const raw = fs_1.default.readFileSync(exports.CHAT_STATE_PATH, "utf8");
39
- const state = JSON.parse(raw);
40
- if (state.version !== exports.CURRENT_CHAT_STATE_VERSION) {
41
- throw new Error(`Unsupported chat state v${state.version}. Expected v${exports.CURRENT_CHAT_STATE_VERSION}.`);
81
+ let state = JSON.parse(raw);
82
+ // Always migrate to the latest version after loading
83
+ const migratedState = migrateChatState(state);
84
+ // Only save if migration actually changed the state
85
+ if (JSON.stringify(state) !== JSON.stringify(migratedState)) {
86
+ fs_1.default.writeFileSync(exports.CHAT_STATE_PATH, JSON.stringify(migratedState, null, 2));
42
87
  }
43
- return state;
88
+ return migratedState;
44
89
  }
45
- function saveToDisk(messages, selectedModel) {
46
- const statePath = exports.CHAT_STATE_PATH;
47
- let existingState = {
48
- version: exports.CURRENT_CHAT_STATE_VERSION,
49
- model: selectedModel,
50
- messages: [],
90
+ /**
91
+ * Migrates a chat state object from an old version to the latest version.
92
+ * Add migration logic for each version as needed.
93
+ */
94
+ function migrateChatState(oldState) {
95
+ if (!oldState || Object.keys(oldState).length === 0) {
96
+ return oldState;
97
+ }
98
+ if (!oldState.version) {
99
+ throw new Error("No version found in chat state");
100
+ }
101
+ if (!exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP[oldState.version]) {
102
+ throw new Error(`No migration function found for version: ${oldState.version}`);
103
+ }
104
+ if (oldState.version === exports.LATEST_CHAT_STATE_VERSION) {
105
+ return oldState;
106
+ }
107
+ const migrateFn = exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP[oldState.version];
108
+ const migrated = migrateFn(oldState);
109
+ return {
110
+ version: exports.LATEST_CHAT_STATE_VERSION,
111
+ ...migrated,
51
112
  };
113
+ }
114
+ function saveToDisk(messages, selectedModel, askUserForInput, error) {
115
+ const statePath = exports.CHAT_STATE_PATH;
52
116
  // Ensure directory exists before trying to read/write
53
117
  const dirname = path_1.default.dirname(statePath);
54
118
  if (!fs_1.default.existsSync(dirname)) {
55
119
  fs_1.default.mkdirSync(dirname, { recursive: true });
56
120
  }
57
- if (fs_1.default.existsSync(statePath)) {
58
- existingState = JSON.parse(fs_1.default.readFileSync(statePath, "utf8"));
59
- }
60
- const newState = {
61
- ...existingState,
62
- messages: messages,
63
- model: selectedModel,
64
- };
121
+ // Use the helper to build the new state
122
+ const newState = createChatStateForMessages({
123
+ messages,
124
+ selectedModel,
125
+ askUserForInput,
126
+ error,
127
+ });
65
128
  fs_1.default.writeFileSync(statePath, JSON.stringify(newState, null, 2));
66
129
  }
130
+ function findLatestToolCall(messages, toolName) {
131
+ return messages
132
+ .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())
133
+ .find((m) => m.parts.some((p) => "toolCall" in p && p.toolCall?.name === toolName));
134
+ }
135
+ function findSuccessfulToolResponse(messages, toolCallId) {
136
+ return messages.find((m) => m.parts.some((p) => {
137
+ if ("toolResult" in p) {
138
+ return p.toolCallId === toolCallId && !p.toolResult.isError;
139
+ }
140
+ return false;
141
+ }));
142
+ }
143
+ function getLatestDownloadBuildUrl(messages) {
144
+ const toolCallMessage = findLatestToolCall(messages, "downloadBuild");
145
+ if (!toolCallMessage)
146
+ return null;
147
+ const toolCallPart = toolCallMessage.parts.find((p) => "toolCall" in p && p.toolCall?.name === "downloadBuild");
148
+ if (!toolCallPart || !("toolCall" in toolCallPart))
149
+ return null;
150
+ const toolCallId = toolCallPart.toolCall.id;
151
+ const toolResponseMessage = findSuccessfulToolResponse(messages, toolCallId);
152
+ if (!toolResponseMessage)
153
+ return null;
154
+ const input = toolCallPart.toolCall.input;
155
+ return input.buildUrl;
156
+ }
157
+ function fetchToolCallAvailability(toolRequestId, messages) {
158
+ let hasToolRequest = false;
159
+ let hasToolResponse = false;
160
+ messages.forEach((message) => {
161
+ message.parts.forEach((part) => {
162
+ if ("toolCall" in part && part.toolCall?.id === toolRequestId) {
163
+ hasToolRequest = true;
164
+ }
165
+ if ("toolResult" in part && part.toolCallId === toolRequestId) {
166
+ hasToolResponse = true;
167
+ }
168
+ });
169
+ });
170
+ return { hasToolRequest, hasToolResponse };
171
+ }
@@ -1,9 +1,8 @@
1
- import { ChatStateOnDisk } from "./state";
2
- export type SupportedChatModels = "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro-preview-03-25" | "o4-mini-2025-04-16";
1
+ import { ChatState } from "@empiricalrun/shared-types";
3
2
  type LatestMessage = {
4
3
  role: string;
5
4
  textMessage: string;
6
5
  };
7
- export type ReporterFunction = (state: ChatStateOnDisk<any>, latestHumanReadableMessage: LatestMessage | undefined) => Promise<void>;
6
+ export type ReporterFunction = (state: ChatState, latestHumanReadableMessage: LatestMessage | undefined) => Promise<void>;
8
7
  export {};
9
8
  //# sourceMappingURL=types.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1C,MAAM,MAAM,mBAAmB,GAC3B,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,GAC9B,oBAAoB,CAAC;AAEzB,KAAK,aAAa,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAC7B,KAAK,EAAE,eAAe,CAAC,GAAG,CAAC,EAC3B,0BAA0B,EAAE,aAAa,GAAG,SAAS,KAClD,OAAO,CAAC,IAAI,CAAC,CAAC"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,4BAA4B,CAAC;AAEvD,KAAK,aAAa,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAC7B,KAAK,EAAE,SAAS,EAChB,0BAA0B,EAAE,aAAa,GAAG,SAAS,KAClD,OAAO,CAAC,IAAI,CAAC,CAAC"}
@@ -0,0 +1,14 @@
1
+ import { TraceClient } from "@empiricalrun/llm";
2
+ import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
3
+ import { ReporterFunction } from "./types";
4
+ export declare const log: (...args: any[]) => void;
5
+ export declare function getModelName(model: string): string;
6
+ export declare function handleAgentError({ context, error, chatModel, selectedModel, reporter, trace, }: {
7
+ context: string;
8
+ error: unknown;
9
+ chatModel: IChatModel<any>;
10
+ selectedModel: SupportedChatModels;
11
+ reporter: ReporterFunction;
12
+ trace?: TraceClient;
13
+ }): Promise<void>;
14
+ //# sourceMappingURL=utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAEL,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAKhC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,GAAG,EAAE,SAEjC,CAAC;AAcF,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,OAAO,EACP,KAAK,EACL,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAsBA"}
@@ -0,0 +1,50 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.log = void 0;
4
+ exports.getModelName = getModelName;
5
+ exports.handleAgentError = handleAgentError;
6
+ const chat_1 = require("@empiricalrun/llm/chat");
7
+ const picocolors_1 = require("picocolors");
8
+ const state_1 = require("./state");
9
+ const log = (...args) => {
10
+ console.log((0, picocolors_1.gray)(args.join(" ")));
11
+ };
12
+ exports.log = log;
13
+ function logError(context, error, trace) {
14
+ console.error((0, picocolors_1.gray)(`[Error in ${context}]:`), error instanceof Error ? error.stack || error.message : error);
15
+ trace?.update({
16
+ output: {
17
+ error: error instanceof Error ? error.message : String(error),
18
+ },
19
+ });
20
+ }
21
+ function getModelName(model) {
22
+ const provider = (0, chat_1.getProviderForModel)(model);
23
+ if (provider === "claude")
24
+ return "Claude";
25
+ if (provider === "google")
26
+ return "Gemini";
27
+ if (provider === "openai")
28
+ return "o4";
29
+ return "AI";
30
+ }
31
+ async function handleAgentError({ context, error, chatModel, selectedModel, reporter, trace, }) {
32
+ const errorObject = {
33
+ message: error.message,
34
+ stack: error.stack || "Stack trace not available",
35
+ timestamp: new Date().toISOString(),
36
+ };
37
+ await reporter((0, state_1.chatStateFromModel)({
38
+ chatModel,
39
+ selectedModel,
40
+ error: errorObject,
41
+ }), chatModel.getHumanReadableLatestMessage());
42
+ trace?.update({
43
+ output: {
44
+ error: errorObject,
45
+ },
46
+ });
47
+ if (context) {
48
+ logError(context, error, trace);
49
+ }
50
+ }
@@ -9,7 +9,7 @@ const utils_1 = require("../../browsing/utils");
9
9
  const element_annotation_1 = require("../element-annotation");
10
10
  const run_1 = require("../run");
11
11
  const fixtures_1 = require("./fixtures");
12
- (0, fixtures_1.test)("able to scroll and interact with elements", async ({ page, server }) => {
12
+ fixtures_1.test.skip("able to scroll and interact with elements", async ({ page, server, }) => {
13
13
  await page.goto(`${server.baseURL}/blog-page.html`);
14
14
  const response = await (0, run_1.createTestUsingMasterAgent)({
15
15
  task: `fill test@test.com into the email field and click the submit`,
@@ -21,7 +21,7 @@ const fixtures_1 = require("./fixtures");
21
21
  (0, fixtures_1.expect)(response.code).toContain("await page.getByPlaceholder('Enter your email').fill(\"test@test.com\")");
22
22
  (0, fixtures_1.expect)(response.code).toContain("await page.getByRole('button', { name: 'Subscribe' }).click()");
23
23
  });
24
- (0, fixtures_1.test)("scroll when element does not exist", async ({ page, server }) => {
24
+ fixtures_1.test.skip("scroll when element does not exist", async ({ page, server }) => {
25
25
  await page.goto(`${server.baseURL}/blog-page.html`);
26
26
  const response = await (0, run_1.createTestUsingMasterAgent)({
27
27
  task: `click search button`,
@@ -32,7 +32,7 @@ const fixtures_1 = require("./fixtures");
32
32
  (0, fixtures_1.expect)(response.importPaths.length).toBe(0);
33
33
  (0, fixtures_1.expect)(response.code.length).toBe(0);
34
34
  });
35
- (0, fixtures_1.test)("scroll and click inside div elements", async ({ page, server }) => {
35
+ fixtures_1.test.skip("scroll and click inside div elements", async ({ page, server }) => {
36
36
  await page.goto(`${server.baseURL}/dropdown-scrolls.html`);
37
37
  const response = await (0, run_1.createTestUsingMasterAgent)({
38
38
  task: `click on x-3 inside bmw dropdown, and then,
@@ -49,7 +49,7 @@ click on maverick inside ford dropdown`,
49
49
  (0, fixtures_1.expect)(lines.find((l) => l.match(/^await page.+Ford.+.click/))).toBeTruthy();
50
50
  (0, fixtures_1.expect)(lines.find((l) => l.match(/^await page.+Maverick.+.click/))).toBeTruthy();
51
51
  });
52
- (0, fixtures_1.test)("master agent can click icons accurately", async ({ page, server }) => {
52
+ fixtures_1.test.skip("master agent can click icons accurately", async ({ page, server, }) => {
53
53
  await page.goto(`${server.baseURL}/icons-navbar.html`);
54
54
  await (0, fixtures_1.expect)(page.getByText("select an icon")).toBeVisible();
55
55
  const response = await (0, run_1.createTestUsingMasterAgent)({
@@ -86,7 +86,7 @@ click on maverick inside ford dropdown`,
86
86
  (0, fixtures_1.expect)(response.code).toContain("page.locator");
87
87
  (0, fixtures_1.expect)(response.code).toContain("click()");
88
88
  });
89
- (0, fixtures_1.test)("annotate and enrich annotations correctly", async ({ page, server }) => {
89
+ fixtures_1.test.skip("annotate and enrich annotations correctly", async ({ page, server, }) => {
90
90
  await (0, utils_1.injectPwLocatorGenerator)(page);
91
91
  await page.goto(`${server.baseURL}/iframe-elements.html`);
92
92
  const { annotationKeys: keys } = await (0, element_annotation_1.getAnnotationKeys)({
@@ -105,7 +105,7 @@ click on maverick inside ford dropdown`,
105
105
  // 2 clickable divs: 1 in main frame, 1 in iframe
106
106
  (0, fixtures_1.expect)(keys.filter((k) => k.text.includes("Lorem Ipsum")).length).toBe(2);
107
107
  });
108
- (0, fixtures_1.test)("fill action with multiple pages", async ({ context }) => {
108
+ fixtures_1.test.skip("fill action with multiple pages", async ({ context }) => {
109
109
  const page1 = await context.newPage();
110
110
  const page2 = await context.newPage();
111
111
  const response = await (0, run_1.createTestUsingMasterAgent)({