@empiricalrun/test-gen 0.58.0 → 0.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/CHANGELOG.md +73 -0
  2. package/dist/agent/browsing/run.d.ts +9 -2
  3. package/dist/agent/browsing/run.d.ts.map +1 -1
  4. package/dist/agent/browsing/run.js +30 -30
  5. package/dist/agent/browsing/utils.d.ts +1 -14
  6. package/dist/agent/browsing/utils.d.ts.map +1 -1
  7. package/dist/agent/browsing/utils.js +1 -58
  8. package/dist/agent/chat/agent-loop.d.ts +2 -1
  9. package/dist/agent/chat/agent-loop.d.ts.map +1 -1
  10. package/dist/agent/chat/agent-loop.js +42 -34
  11. package/dist/agent/chat/exports.d.ts +5 -6
  12. package/dist/agent/chat/exports.d.ts.map +1 -1
  13. package/dist/agent/chat/exports.js +13 -42
  14. package/dist/agent/chat/index.d.ts +2 -1
  15. package/dist/agent/chat/index.d.ts.map +1 -1
  16. package/dist/agent/chat/index.js +23 -8
  17. package/dist/agent/chat/models.d.ts +6 -0
  18. package/dist/agent/chat/models.d.ts.map +1 -0
  19. package/dist/agent/chat/models.js +37 -0
  20. package/dist/agent/chat/prompt.d.ts.map +1 -1
  21. package/dist/agent/chat/prompt.js +37 -8
  22. package/dist/agent/chat/state.d.ts +31 -10
  23. package/dist/agent/chat/state.d.ts.map +1 -1
  24. package/dist/agent/chat/state.js +132 -27
  25. package/dist/agent/chat/types.d.ts +2 -3
  26. package/dist/agent/chat/types.d.ts.map +1 -1
  27. package/dist/agent/chat/utils.d.ts +14 -0
  28. package/dist/agent/chat/utils.d.ts.map +1 -0
  29. package/dist/agent/chat/utils.js +50 -0
  30. package/dist/agent/master/browser-tests/index.spec.js +6 -6
  31. package/dist/bin/index.js +12 -2
  32. package/dist/bin/utils/index.d.ts +1 -0
  33. package/dist/bin/utils/index.d.ts.map +1 -1
  34. package/dist/index.d.ts +1 -0
  35. package/dist/index.d.ts.map +1 -1
  36. package/dist/index.js +3 -0
  37. package/dist/test-build/index.js +1 -1
  38. package/dist/tool-call-service/index.d.ts +2 -1
  39. package/dist/tool-call-service/index.d.ts.map +1 -1
  40. package/dist/tool-call-service/index.js +51 -71
  41. package/dist/tool-call-service/utils.d.ts +10 -0
  42. package/dist/tool-call-service/utils.d.ts.map +1 -0
  43. package/dist/tool-call-service/utils.js +23 -0
  44. package/dist/tools/download-build.d.ts +9 -0
  45. package/dist/tools/download-build.d.ts.map +1 -1
  46. package/dist/tools/download-build.js +5 -4
  47. package/dist/tools/str_replace_editor.d.ts.map +1 -1
  48. package/dist/tools/str_replace_editor.js +24 -7
  49. package/dist/tools/test-gen-browser.d.ts.map +1 -1
  50. package/dist/tools/test-gen-browser.js +26 -19
  51. package/dist/tools/test-run.d.ts.map +1 -1
  52. package/dist/tools/test-run.js +8 -13
  53. package/dist/utils/checkpoint.d.ts.map +1 -1
  54. package/dist/utils/checkpoint.js +3 -1
  55. package/dist/utils/exec.d.ts +2 -2
  56. package/dist/utils/exec.d.ts.map +1 -1
  57. package/dist/utils/exec.js +5 -4
  58. package/package.json +5 -4
  59. package/tsconfig.tsbuildinfo +1 -1
package/dist/bin/index.js CHANGED
@@ -35,7 +35,7 @@ function setupProcessListeners(cleanup) {
35
35
  events.forEach((event) => process.removeListener(event, cleanup));
36
36
  };
37
37
  }
38
- async function runChatAgent({ modelInput, chatSessionId, useDiskForChatState, initialPromptPath, }) {
38
+ async function runChatAgent({ modelInput, chatSessionId, useDiskForChatState, initialPromptPath, withRetry, }) {
39
39
  const MODEL_MAPPING = {
40
40
  "claude-3-7": "claude-3-7-sonnet-20250219",
41
41
  "claude-3-5": "claude-3-5-sonnet-20241022",
@@ -71,6 +71,7 @@ async function runChatAgent({ modelInput, chatSessionId, useDiskForChatState, in
71
71
  selectedModel: specifiedModel || defaultModel,
72
72
  useDiskForChatState: useDiskForChatState || false,
73
73
  initialPromptContent,
74
+ withRetry,
74
75
  });
75
76
  }
76
77
  async function runAgentsWorkflow(testGenConfig, testGenToken) {
@@ -184,10 +185,17 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
184
185
  void (0, session_1.updateSessionStatus)(testGenConfig.options?.metadata.testSessionId, {
185
186
  status: "agent_live_session_started",
186
187
  });
188
+ const projectName = await (0, run_1.convertProjectsFilterToProject)({
189
+ pwProjectsFilter: testGenConfig.environment?.playwrightProjects,
190
+ repoDir: process.cwd(),
191
+ testFilePath: specPath,
192
+ });
187
193
  const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
194
+ testCaseName: testCase.name,
195
+ testCaseSuites: testCase.suites,
188
196
  testFilePath: specPath,
189
197
  filePathToUpdate,
190
- pwProjectsFilter: testGenConfig.environment?.playwrightProjects,
198
+ projectName,
191
199
  testGenToken,
192
200
  repoDir: process.cwd(),
193
201
  editFileWithGeneratedCode: true,
@@ -213,6 +221,7 @@ async function main() {
213
221
  .option("--use-disk-for-chat-state", "Save and load chat state from disk")
214
222
  .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-03-25)")
215
223
  .option("--initial-prompt <path>", "Path to an initial prompt file (e.g. prompt.md)")
224
+ .option("--with-retry", "Use the retry strategy")
216
225
  .parse(process.argv);
217
226
  const options = program.opts();
218
227
  const completedOptions = await (0, utils_2.validateAndCompleteCliOptions)(options);
@@ -243,6 +252,7 @@ async function main() {
243
252
  modelInput: completedOptions.chatModel,
244
253
  useDiskForChatState: completedOptions.useDiskForChatState,
245
254
  initialPromptPath: completedOptions.initialPrompt,
255
+ withRetry: completedOptions.withRetry,
246
256
  });
247
257
  return;
248
258
  }
@@ -9,6 +9,7 @@ export interface CliOptions {
9
9
  initialPrompt?: string;
10
10
  chatSessionId?: string;
11
11
  chatModel?: "claude-3-7" | "claude-3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022" | "gemini-2.5-pro" | "gemini-2.5-pro-preview-03-25" | "o4-mini" | "o4-mini-2025-04-16";
12
+ withRetry?: boolean;
12
13
  }
13
14
  export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;
14
15
  export declare function printBanner(): void;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,YAAY,GACZ,4BAA4B,GAC5B,4BAA4B,GAC5B,gBAAgB,GAChB,8BAA8B,GAC9B,SAAS,GACT,oBAAoB,CAAC;CAC1B;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EACN,YAAY,GACZ,YAAY,GACZ,4BAA4B,GAC5B,4BAA4B,GAC5B,gBAAgB,GAChB,8BAA8B,GAC9B,SAAS,GACT,oBAAoB,CAAC;IACzB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
package/dist/index.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import { FrameLocator, Page } from "playwright";
2
2
  import { ScopeVars } from "./types";
3
+ export { downloadBuild } from "./test-build";
3
4
  export declare function createTest(task: string, pageRef: Page | FrameLocator, scope?: ScopeVars): Promise<void>;
4
5
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQhD,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAoBpC,wBAAsB,UAAU,CAC9B,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,IAAI,GAAG,YAAY,EAC5B,KAAK,CAAC,EAAE,SAAS,iBA0ElB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQhD,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAoB7C,wBAAsB,UAAU,CAC9B,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,IAAI,GAAG,YAAY,EAC5B,KAAK,CAAC,EAAE,SAAS,iBA0ElB"}
package/dist/index.js CHANGED
@@ -3,6 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.downloadBuild = void 0;
6
7
  exports.createTest = createTest;
7
8
  const llm_1 = require("@empiricalrun/llm");
8
9
  const cua_1 = require("./agent/cua");
@@ -11,6 +12,8 @@ const scenarios_1 = require("./bin/utils/scenarios");
11
12
  const client_1 = __importDefault(require("./file/client"));
12
13
  const reporter_1 = require("./reporter");
13
14
  const session_1 = require("./session");
15
+ var test_build_1 = require("./test-build");
16
+ Object.defineProperty(exports, "downloadBuild", { enumerable: true, get: function () { return test_build_1.downloadBuild; } });
14
17
  const flushEvents = async () => {
15
18
  await (0, llm_1.flushAllTraces)();
16
19
  };
@@ -23,7 +23,7 @@ async function downloadBuild(buildUrl) {
23
23
  const buildDownloadScript = packageJSON.scripts["download"];
24
24
  if (buildDownloadScript && buildUrl) {
25
25
  logger.log(`Downloading build from ${buildUrl}`);
26
- await (0, exec_1.cmd)(`npm run download ${buildUrl}`.split(" "), {
26
+ await (0, exec_1.cmd)(`npm`, ["run", "download", buildUrl], {
27
27
  env: { ...Object(process.env) },
28
28
  });
29
29
  }
@@ -14,6 +14,7 @@ export declare class ToolCallService {
14
14
  getTools(): Promise<{
15
15
  tools: Tool[];
16
16
  }>;
17
- execute(toolCalls: PendingToolCall[], isRemote: boolean, trace?: TraceClient): Promise<ToolResult[]>;
17
+ sendToQueue(toolCalls: PendingToolCall[]): Promise<void>;
18
+ execute(toolCalls: PendingToolCall[], trace?: TraceClient): Promise<ToolResult[]>;
18
19
  }
19
20
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tool-call-service/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EACL,eAAe,EACf,mBAAmB,EACnB,IAAI,EACJ,UAAU,EACX,MAAM,wBAAwB,CAAC;AAgBhC,YAAY,EAAE,mBAAmB,EAAE,CAAC;AAEpC,KAAK,aAAa,GAAG;IACnB,CAAC,GAAG,EAAE,MAAM,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CACzE,CAAC;AA6BF,qBAAa,eAAe;IAC1B,KAAK,EAAE,IAAI,EAAE,CAAM;IACnB,aAAa,EAAE,aAAa,CAAM;IAClC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,aAAa,EAAE,mBAAmB,CAAC;IACnC,UAAU,EAAE,MAAM,CAAC;gBAEjB,aAAa,EAAE,MAAM,GAAG,IAAI,EAC5B,aAAa,EAAE,mBAAmB,EAClC,UAAU,EAAE,MAAM;IAiBd,QAAQ;;;IAaR,OAAO,CACX,SAAS,EAAE,eAAe,EAAE,EAC5B,QAAQ,EAAE,OAAO,EACjB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,UAAU,EAAE,CAAC;CAyDzB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tool-call-service/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAEL,eAAe,EACf,mBAAmB,EACnB,IAAI,EACJ,UAAU,EACX,MAAM,wBAAwB,CAAC;AAiBhC,YAAY,EAAE,mBAAmB,EAAE,CAAC;AAEpC,KAAK,aAAa,GAAG;IACnB,CAAC,GAAG,EAAE,MAAM,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CACzE,CAAC;AAEF,qBAAa,eAAe;IAC1B,KAAK,EAAE,IAAI,EAAE,CAAM;IACnB,aAAa,EAAE,aAAa,CAAM;IAClC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,aAAa,EAAE,mBAAmB,CAAC;IACnC,UAAU,EAAE,MAAM,CAAC;gBAEjB,aAAa,EAAE,MAAM,GAAG,IAAI,EAC5B,aAAa,EAAE,mBAAmB,EAClC,UAAU,EAAE,MAAM;IAiBd,QAAQ;;;IAaR,WAAW,CAAC,SAAS,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAcxD,OAAO,CACX,SAAS,EAAE,eAAe,EAAE,EAC5B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,UAAU,EAAE,CAAC;CA2CzB"}
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.ToolCallService = void 0;
4
- const client_sqs_1 = require("@aws-sdk/client-sqs");
4
+ const chat_1 = require("@empiricalrun/llm/chat");
5
5
  const commit_and_create_pr_1 = require("../tools/commit-and-create-pr");
6
6
  const diagnosis_fetcher_1 = require("../tools/diagnosis-fetcher");
7
7
  const download_build_1 = require("../tools/download-build");
@@ -12,25 +12,7 @@ const test_gen_browser_1 = require("../tools/test-gen-browser");
12
12
  const test_run_1 = require("../tools/test-run");
13
13
  const test_run_fetcher_1 = require("../tools/test-run-fetcher");
14
14
  const checkpoint_1 = require("../utils/checkpoint");
15
- async function sendToolRequestToRemoteQueue(payload) {
16
- const sqs = new client_sqs_1.SQSClient({
17
- region: process.env.AWS_REGION,
18
- credentials: {
19
- accessKeyId: process.env.AWS_ACCESS_KEY_ID,
20
- secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
21
- },
22
- });
23
- const queueUrl = process.env.TOOL_EXECUTION_SQS_URL;
24
- if (!queueUrl) {
25
- throw new Error("TOOL_EXECUTION_SQS_URL is required for remote execution.");
26
- }
27
- await sqs.send(new client_sqs_1.SendMessageCommand({
28
- QueueUrl: queueUrl,
29
- MessageBody: JSON.stringify(payload),
30
- MessageGroupId: payload.requestId,
31
- MessageDeduplicationId: payload.requestId, // unique id for the tool request
32
- }));
33
- }
15
+ const utils_1 = require("./utils");
34
16
  class ToolCallService {
35
17
  tools = [];
36
18
  toolExecutors = {};
@@ -53,70 +35,68 @@ class ToolCallService {
53
35
  ];
54
36
  }
55
37
  async getTools() {
56
- if (!this.selectedModel.startsWith("claude")) {
38
+ if ((0, chat_1.getProviderForModel)(this.selectedModel) !== "claude") {
57
39
  this.tools.push(...str_replace_editor_1.textEditorTools);
58
40
  }
59
41
  this.tools.forEach((tool) => {
60
42
  this.toolExecutors[tool.schema.name] = tool.execute;
61
43
  });
62
- if (this.selectedModel.startsWith("claude")) {
44
+ if ((0, chat_1.getProviderForModel)(this.selectedModel) === "claude") {
63
45
  this.toolExecutors["str_replace_editor"] = str_replace_editor_1.strReplaceEditorExecutor;
64
46
  }
65
47
  return { tools: this.tools };
66
48
  }
67
- async execute(toolCalls, isRemote, trace) {
68
- if (isRemote && this.chatSessionId) {
69
- await sendToolRequestToRemoteQueue({
70
- toolCalls,
71
- requestId: crypto.randomUUID(),
72
- chatSessionId: this.chatSessionId,
73
- selectedModel: this.selectedModel,
74
- branchName: this.branchName,
75
- });
76
- return toolCalls.map(() => ({
77
- isError: false,
78
- result: `Tool request sent to remote queue to execute.`,
79
- }));
49
+ async sendToQueue(toolCalls) {
50
+ const requestId = toolCalls[0]?.id;
51
+ if (!requestId) {
52
+ throw new Error("Could not find an id for the tool call.");
80
53
  }
81
- else {
82
- const executeSpan = trace?.span({
83
- name: "execute_tools",
84
- input: { toolCalls: toolCalls.map((tc) => ({ name: tc.name })) },
54
+ await (0, utils_1.sendToolRequestToRemoteQueue)({
55
+ toolCalls,
56
+ requestId,
57
+ chatSessionId: this.chatSessionId,
58
+ selectedModel: this.selectedModel,
59
+ branchName: this.branchName,
60
+ });
61
+ }
62
+ async execute(toolCalls, trace) {
63
+ const executeSpan = trace?.span({
64
+ name: "execute_tools",
65
+ input: { toolCalls: toolCalls.map((tc) => ({ name: tc.name })) },
66
+ });
67
+ const toolResults = [];
68
+ for (const toolCall of toolCalls) {
69
+ const span = executeSpan?.span({
70
+ name: `tool: ${toolCall.name}`,
71
+ input: toolCall.input,
85
72
  });
86
- const toolResults = [];
87
- for (const toolCall of toolCalls) {
88
- const span = executeSpan?.span({
89
- name: `tool: ${toolCall.name}`,
90
- input: toolCall.input,
91
- });
92
- const toolExecutor = this.toolExecutors[toolCall.name];
93
- if (!toolExecutor) {
94
- const errorResult = {
95
- isError: true,
96
- result: `Invalid function/tool call: invalid_tool_call not found`,
97
- };
98
- toolResults.push(errorResult);
99
- span?.end({ output: errorResult });
100
- continue;
101
- }
102
- try {
103
- const result = await toolExecutor(toolCall.input, trace);
104
- toolResults.push(result);
105
- span?.end({ output: result });
106
- }
107
- catch (error) {
108
- const errorResult = {
109
- isError: true,
110
- result: error instanceof Error ? error.message : String(error),
111
- };
112
- toolResults.push(errorResult);
113
- span?.end({ output: errorResult });
114
- }
73
+ const toolExecutor = this.toolExecutors[toolCall.name];
74
+ if (!toolExecutor) {
75
+ const errorResult = {
76
+ isError: true,
77
+ result: `Invalid function/tool call: ${toolCall.name} not found`,
78
+ };
79
+ toolResults.push(errorResult);
80
+ span?.end({ output: errorResult });
81
+ continue;
82
+ }
83
+ try {
84
+ const result = await toolExecutor(toolCall.input, trace);
85
+ toolResults.push(result);
86
+ span?.end({ output: result });
87
+ }
88
+ catch (error) {
89
+ const errorResult = {
90
+ isError: true,
91
+ result: error instanceof Error ? error.message : String(error),
92
+ };
93
+ toolResults.push(errorResult);
94
+ span?.end({ output: errorResult });
115
95
  }
116
- await (0, checkpoint_1.createCheckpoint)(toolCalls, this.branchName);
117
- executeSpan?.end({ output: { toolResults } });
118
- return toolResults;
119
96
  }
97
+ await (0, checkpoint_1.createCheckpoint)(toolCalls, this.branchName);
98
+ executeSpan?.end({ output: { toolResults } });
99
+ return toolResults;
120
100
  }
121
101
  }
122
102
  exports.ToolCallService = ToolCallService;
@@ -0,0 +1,10 @@
1
+ import { SupportedChatModels } from "@empiricalrun/llm/chat";
2
+ import { PendingToolCall } from "@empiricalrun/shared-types";
3
+ export declare function sendToolRequestToRemoteQueue(payload: {
4
+ toolCalls: PendingToolCall[];
5
+ requestId: string;
6
+ chatSessionId: number;
7
+ selectedModel: SupportedChatModels;
8
+ branchName: string;
9
+ }): Promise<void>;
10
+ //# sourceMappingURL=utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/tool-call-service/utils.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAE7D,wBAAsB,4BAA4B,CAAC,OAAO,EAAE;IAC1D,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,mBAAmB,CAAC;IACnC,UAAU,EAAE,MAAM,CAAC;CACpB,iBAoBA"}
@@ -0,0 +1,23 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.sendToolRequestToRemoteQueue = sendToolRequestToRemoteQueue;
4
+ const client_sqs_1 = require("@aws-sdk/client-sqs");
5
+ async function sendToolRequestToRemoteQueue(payload) {
6
+ const sqs = new client_sqs_1.SQSClient({
7
+ region: process.env.AWS_REGION,
8
+ credentials: {
9
+ accessKeyId: process.env.AWS_ACCESS_KEY_ID,
10
+ secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
11
+ },
12
+ });
13
+ const queueUrl = process.env.TOOL_EXECUTION_SQS_URL;
14
+ if (!queueUrl) {
15
+ throw new Error("TOOL_EXECUTION_SQS_URL is required for remote execution.");
16
+ }
17
+ await sqs.send(new client_sqs_1.SendMessageCommand({
18
+ QueueUrl: queueUrl,
19
+ MessageBody: JSON.stringify(payload),
20
+ MessageGroupId: payload.requestId,
21
+ MessageDeduplicationId: payload.requestId, // unique id for the tool request
22
+ }));
23
+ }
@@ -1,3 +1,12 @@
1
1
  import type { Tool } from "@empiricalrun/llm/chat";
2
+ import { z } from "zod";
3
+ export declare const downloadBuildToolSchema: z.ZodObject<{
4
+ buildUrl: z.ZodString;
5
+ }, "strip", z.ZodTypeAny, {
6
+ buildUrl: string;
7
+ }, {
8
+ buildUrl: string;
9
+ }>;
10
+ export type DownloadBuildToolInput = z.infer<typeof downloadBuildToolSchema>;
2
11
  export declare const downloadBuildTool: Tool;
3
12
  //# sourceMappingURL=download-build.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"download-build.d.ts","sourceRoot":"","sources":["../../src/tools/download-build.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAKnD,eAAO,MAAM,iBAAiB,EAAE,IAkC/B,CAAC"}
1
+ {"version":3,"file":"download-build.d.ts","sourceRoot":"","sources":["../../src/tools/download-build.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AACnD,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,eAAO,MAAM,uBAAuB;;;;;;EAElC,CAAC;AAEH,MAAM,MAAM,sBAAsB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,uBAAuB,CAAC,CAAC;AAE7E,eAAO,MAAM,iBAAiB,EAAE,IA8B/B,CAAC"}
@@ -1,17 +1,18 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.downloadBuildTool = void 0;
3
+ exports.downloadBuildTool = exports.downloadBuildToolSchema = void 0;
4
4
  const zod_1 = require("zod");
5
5
  const test_build_1 = require("../test-build");
6
+ exports.downloadBuildToolSchema = zod_1.z.object({
7
+ buildUrl: zod_1.z.string().describe("The URL of the build to download"),
8
+ });
6
9
  exports.downloadBuildTool = {
7
10
  schema: {
8
11
  name: "downloadBuild",
9
12
  description: `Download a build from a build URL. If you do not have
10
13
  have a build URL, you can try getting the environment details with the getEnvironment tool.
11
14
  Environment details will include the build URL.`,
12
- parameters: zod_1.z.object({
13
- buildUrl: zod_1.z.string().describe("The URL of the build to download"),
14
- }),
15
+ parameters: exports.downloadBuildToolSchema,
15
16
  },
16
17
  execute: async (input) => {
17
18
  if (!(await (0, test_build_1.hasDownloadScript)())) {
@@ -1 +1 @@
1
- {"version":3,"file":"str_replace_editor.d.ts","sourceRoot":"","sources":["../../src/tools/str_replace_editor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AA2B1D,UAAU,eAAe;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAqED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAwC1D;AAMD;;;GAGG;AACH,wBAAsB,wBAAwB,CAC5C,KAAK,EAAE,eAAe,GACrB,OAAO,CAAC,UAAU,CAAC,CA0KrB;AA+FD,eAAO,MAAM,eAAe,EAAE,IAAI,EAKjC,CAAC"}
1
+ {"version":3,"file":"str_replace_editor.d.ts","sourceRoot":"","sources":["../../src/tools/str_replace_editor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AA2B1D,UAAU,eAAe;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAqED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAwC1D;AAMD;;;GAGG;AACH,wBAAsB,wBAAwB,CAC5C,KAAK,EAAE,eAAe,GACrB,OAAO,CAAC,UAAU,CAAC,CA6LrB;AAiGD,eAAO,MAAM,eAAe,EAAE,IAAI,EAKjC,CAAC"}
@@ -148,7 +148,6 @@ async function strReplaceEditorExecutor(input) {
148
148
  let typeCheckErrors;
149
149
  switch (input.command) {
150
150
  case "view":
151
- // TODO: This assumes repoDir is process.cwd()
152
151
  if (!fs_1.default.existsSync(filePath)) {
153
152
  return {
154
153
  result: "Error: File not found",
@@ -163,6 +162,17 @@ async function strReplaceEditorExecutor(input) {
163
162
  isError: false,
164
163
  };
165
164
  }
165
+ else {
166
+ // Check if file is binary, which is not supported
167
+ const { isBinary } = await import("istextorbinary");
168
+ const binary = isBinary(filePath);
169
+ if (binary) {
170
+ return {
171
+ result: "Error: File is binary, which is not supported",
172
+ isError: true,
173
+ };
174
+ }
175
+ }
166
176
  // Handle file view
167
177
  content = fs_1.default.readFileSync(filePath, "utf8");
168
178
  lines = content.split("\n");
@@ -254,7 +264,13 @@ async function strReplaceEditorExecutor(input) {
254
264
  createBackup(filePath);
255
265
  content = fs_1.default.readFileSync(filePath, "utf8");
256
266
  lines = content.split("\n");
257
- lines.splice(input.insert_line, 0, input.new_str);
267
+ if (input.insert_line < 1) {
268
+ throw new Error("insert_line must be greater than or equal to 1 (line numbers are 1-indexed).");
269
+ }
270
+ if (input.insert_line > lines.length + 1) {
271
+ throw new Error(`The file at ${filePath} has only ${lines.length} lines, so insert_line must be less than or equal to ${lines.length + 1}. At the maximum value of ${lines.length + 1}, you can insert at the end of the file.`);
272
+ }
273
+ lines.splice(input.insert_line - 1, 0, input.new_str);
258
274
  fs_1.default.writeFileSync(filePath, lines.join("\n"));
259
275
  typeCheckErrors = (0, web_1.validateTypescript)(filePath);
260
276
  if (typeCheckErrors.length > 0) {
@@ -297,8 +313,9 @@ const fileViewTool = {
297
313
  name: "fileViewTool",
298
314
  description: `A tool to view the content of a file or directory. If the path points
299
315
  to a directory, the tool will return a list of files in the directory, separated by line breaks.
300
- If the path points to a file, the tool will return the content of the file. File contents
301
- are returned with line numbers, starting from 1.
316
+ If the path points to a file, the tool will return the content of the file.
317
+
318
+ File contents are returned with line numbers, starting from 1.
302
319
 
303
320
  1: line 1
304
321
  2: line 2
@@ -355,14 +372,14 @@ in the file. If old_str is not unique, the tool will return an error.`,
355
372
  const stringInsertTool = {
356
373
  schema: {
357
374
  name: "stringInsertTool",
358
- description: "A tool to insert a string at a specific line in a file.",
375
+ description: "A tool to insert a string at a specific line in a file. Line numbers are 1-indexed, just like the file view tool.",
359
376
  parameters: zod_1.z.object({
360
377
  path: zod_1.z.string().describe("The path to the file."),
361
378
  insert_line: zod_1.z
362
379
  .number()
363
380
  .int()
364
- .min(0)
365
- .describe("The line number after which to insert the text (0 for beginning of file)."),
381
+ .min(1)
382
+ .describe("The line number after which to insert the text (1 for beginning of file)."),
366
383
  new_str: zod_1.z.string().describe("The string to insert."),
367
384
  }),
368
385
  },
@@ -1 +1 @@
1
- {"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAyFnD,eAAO,MAAM,4BAA4B,EAAE,IA8E1C,CAAC"}
1
+ {"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAsFnD,eAAO,MAAM,4BAA4B,EAAE,IAqF1C,CAAC"}
@@ -4,6 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.generateTestWithBrowserAgent = void 0;
7
+ const test_run_1 = require("@empiricalrun/test-run");
7
8
  const promises_1 = __importDefault(require("fs/promises"));
8
9
  const zod_1 = require("zod");
9
10
  const run_1 = require("../agent/browsing/run");
@@ -87,13 +88,23 @@ exports.generateTestWithBrowserAgent = {
87
88
  parameters: BrowserAgentSchema,
88
89
  },
89
90
  execute: async (input, trace) => {
91
+ const repoDir = process.cwd();
90
92
  const { testName, testSuites, fileName, changeToMake, project } = input;
91
- const playwrightConfig = await (0, utils_1.readPlaywrightConfig)(process.cwd());
92
- const validProjectNames = await (0, utils_1.getValidProjectNames)(playwrightConfig);
93
- if (!validProjectNames.includes(project)) {
93
+ try {
94
+ const { projects } = await (0, test_run_1.getAllPlaywrightProjects)(repoDir);
95
+ // TODO: Check that file path is valid for this project
96
+ if (!projects.includes(project)) {
97
+ return {
98
+ isError: true,
99
+ result: `Invalid project name: ${project}. Valid project names are: ${projects.join(", ")}`,
100
+ };
101
+ }
102
+ }
103
+ catch (error) {
104
+ console.error("Error reading playwright config:", error);
94
105
  return {
95
106
  isError: true,
96
- result: `Invalid project name: ${project}. Valid project names are: ${validProjectNames.join(", ")}`,
107
+ result: `Error reading playwright config: ${error}`,
97
108
  };
98
109
  }
99
110
  if (!(0, web_1.hasTestBlock)({ testName, testSuites, filePath: fileName })) {
@@ -104,34 +115,30 @@ exports.generateTestWithBrowserAgent = {
104
115
  }
105
116
  const fileBackup = await promises_1.default.readFile(fileName, "utf-8");
106
117
  try {
107
- await (0, utils_1.replaceTodoWithCreateTest)({
108
- testCaseName: testName,
109
- testCaseSuites: testSuites,
110
- testFilePath: fileName,
111
- });
118
+ await (0, utils_1.replaceTodoWithCreateTest)(fileName);
112
119
  }
113
120
  catch (error) {
121
+ // Undo the TODO -> createTest and test.only changes
122
+ await promises_1.default.writeFile(fileName, fileBackup, "utf-8");
114
123
  return {
115
124
  isError: true,
116
125
  result: `Error running tool: ${error}`,
117
126
  };
118
127
  }
128
+ const testGenToken = (0, scenarios_1.buildTokenFromOptions)({ name: testName, file: fileName, prompt: changeToMake }, { useComputerUseAgent: true });
129
+ console.log("[generateTestWithBrowserAgent] Validations passed, starting agent");
119
130
  const toolResult = await (0, run_1.generateTestsUsingMasterAgent)({
131
+ testCaseName: testName,
132
+ testCaseSuites: testSuites,
120
133
  testFilePath: fileName,
121
134
  filePathToUpdate: fileName,
122
- pwProjectsFilter: [project],
135
+ projectName: project,
123
136
  traceId: trace?.id,
124
- testGenToken: (0, scenarios_1.buildTokenFromOptions)({
125
- name: testName,
126
- file: fileName,
127
- prompt: changeToMake,
128
- }, {
129
- useComputerUseAgent: true,
130
- }),
131
- repoDir: process.cwd(),
137
+ testGenToken,
138
+ repoDir,
132
139
  editFileWithGeneratedCode: false,
133
140
  });
134
- // Undo the TODO -> createTest change
141
+ // Undo the TODO -> createTest changes
135
142
  await promises_1.default.writeFile(fileName, fileBackup, "utf-8");
136
143
  const { isError, error, actionsSummary } = toolResult;
137
144
  if (!isError) {
@@ -1 +1 @@
1
- {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAgDnD,eAAO,MAAM,WAAW,EAAE,IAuDzB,CAAC"}
1
+ {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAgDnD,eAAO,MAAM,WAAW,EAAE,IAiDzB,CAAC"}
@@ -39,32 +39,27 @@ exports.runTestTool = {
39
39
  },
40
40
  execute: async (input) => {
41
41
  let reportUrl = undefined;
42
- let projectName = undefined;
43
- let testRunId = undefined;
42
+ let envOverrides = undefined;
44
43
  if (hasCloudflareCredentials()) {
45
- projectName = "test-gen-chat-agent";
46
- testRunId = Date.now().toString();
44
+ const projectName = "test-gen-chat-agent";
45
+ const testRunId = Date.now().toString();
47
46
  reportUrl = buildReportUrl(projectName, testRunId);
47
+ envOverrides = {
48
+ PROJECT_NAME: projectName,
49
+ TEST_RUN_GITHUB_ACTION_ID: testRunId,
50
+ };
48
51
  }
49
52
  else {
50
53
  console.warn("R2 credentials not found: report artifacts will not be uploaded");
51
54
  }
52
55
  const { testName, suites, fileName, project } = input;
53
56
  try {
54
- // {"project":"chromium","suites":[],"fileName":"tests/quizizz-for-work/group.spec.ts","testName":"Create a group"}
55
- // This runs all tests - TODO: Debug this, should only run the testName
56
57
  const result = await (0, test_run_1.runSingleTest)({
57
58
  testName,
58
59
  suites,
59
60
  fileName,
60
61
  projects: [project],
61
- // Adding these to enforce report artifacts are uploaded
62
- envOverrides: projectName && testRunId
63
- ? {
64
- PROJECT_NAME: projectName,
65
- TEST_RUN_GITHUB_ACTION_ID: testRunId,
66
- }
67
- : undefined,
62
+ envOverrides,
68
63
  });
69
64
  return {
70
65
  result: buildResult({
@@ -1 +1 @@
1
- {"version":3,"file":"checkpoint.d.ts","sourceRoot":"","sources":["../../src/utils/checkpoint.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAIzD,wBAAsB,gBAAgB,CACpC,SAAS,EAAE,eAAe,EAAE,EAC5B,UAAU,EAAE,MAAM,iBAcnB"}
1
+ {"version":3,"file":"checkpoint.d.ts","sourceRoot":"","sources":["../../src/utils/checkpoint.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAIzD,wBAAsB,gBAAgB,CACpC,SAAS,EAAE,eAAe,EAAE,EAC5B,UAAU,EAAE,MAAM,iBAqBnB"}
@@ -5,7 +5,9 @@ const git_1 = require("./git");
5
5
  async function createCheckpoint(toolCalls, branchName) {
6
6
  const filesChanged = await (0, git_1.getFilesChanged)();
7
7
  const toolsWithUpdatedFiles = toolCalls
8
- .filter((tc) => tc.input.path && filesChanged.includes(tc.input.path))
8
+ .filter((tc) => "path" in tc.input &&
9
+ tc.input.path &&
10
+ filesChanged.includes(tc.input.path))
9
11
  .map((toolCall) => ({
10
12
  name: toolCall.name,
11
13
  path: toolCall.input.path,
@@ -1,12 +1,12 @@
1
1
  export declare class ProcessManager {
2
2
  private childProcess;
3
- execute(command: string[], options: {
3
+ execute(command: string, args: string[], options: {
4
4
  env?: Record<string, string>;
5
5
  }): Promise<number>;
6
6
  terminate(): void;
7
7
  isRunning(): boolean;
8
8
  }
9
- export declare function cmd(command: string[], options: {
9
+ export declare function cmd(command: string, args: string[], options: {
10
10
  env?: Record<string, string>;
11
11
  }): Promise<number>;
12
12
  //# sourceMappingURL=exec.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"exec.d.ts","sourceRoot":"","sources":["../../src/utils/exec.ts"],"names":[],"mappings":"AAmBA,qBAAa,cAAc;IACzB,OAAO,CAAC,YAAY,CAA6B;IAE3C,OAAO,CACX,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,EAAE;QAAE,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;KAAE,GACxC,OAAO,CAAC,MAAM,CAAC;IAkDlB,SAAS,IAAI,IAAI;IASjB,SAAS,IAAI,OAAO;CAGrB;AAED,wBAAsB,GAAG,CACvB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,GACxC,OAAO,CAAC,MAAM,CAAC,CAGjB"}
1
+ {"version":3,"file":"exec.d.ts","sourceRoot":"","sources":["../../src/utils/exec.ts"],"names":[],"mappings":"AAmBA,qBAAa,cAAc;IACzB,OAAO,CAAC,YAAY,CAA6B;IAE3C,OAAO,CACX,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,EAAE;QAAE,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;KAAE,GACxC,OAAO,CAAC,MAAM,CAAC;IAmDlB,SAAS,IAAI,IAAI;IASjB,SAAS,IAAI,OAAO;CAGrB;AAED,wBAAsB,GAAG,CACvB,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,GACxC,OAAO,CAAC,MAAM,CAAC,CAGjB"}