@empiricalrun/test-gen 0.55.0 → 0.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.56.0
4
+
5
+ ### Minor Changes
6
+
7
+ - b073084: feat: new APIs and refactor to introduce tool execute service
8
+
9
+ ### Patch Changes
10
+
11
+ - 93d7a0b: feat: add tool call for downloading builds
12
+ - a58ac3f: feat: add dashboard agent integration and optimize tsx handling
13
+ - Updated dependencies [87af227]
14
+ - Updated dependencies [3831109]
15
+ - Updated dependencies [a58ac3f]
16
+ - Updated dependencies [f77e33d]
17
+ - @empiricalrun/llm@0.15.1
18
+ - @empiricalrun/test-run@0.8.1
19
+
3
20
  ## 0.55.0
4
21
 
5
22
  ### Minor Changes
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,iBA2BA;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD,wBAAsB,oBAAoB,CACxC,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,EAAE,CAAC,CAQnB;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAoBb,SAAS;CAKjB"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAwBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,iBA2BA;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CA0B/B;AAWD,wBAAsB,oBAAoB,CACxC,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,EAAE,CAAC,CAQnB;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAoBb,SAAS;CAKjB"}
@@ -8,7 +8,8 @@ const fs_extra_1 = __importDefault(require("fs-extra"));
8
8
  const minimatch_1 = require("minimatch");
9
9
  const path_1 = __importDefault(require("path"));
10
10
  const ts_morph_1 = require("ts-morph");
11
- const api_1 = __importDefault(require("tsx/cjs/api"));
11
+ // For TypeScript type safety
12
+ let tsxImport = null;
12
13
  const logger_1 = require("../../bin/logger");
13
14
  const context_1 = require("../../bin/utils/context");
14
15
  const fs_1 = require("../../bin/utils/fs");
@@ -295,8 +296,25 @@ exports.injectPwLocatorGenerator = injectPwLocatorGenerator;
295
296
  * @return {*} {Promise<PlaywrightTestConfig>}
296
297
  */
297
298
  async function readPlaywrightConfig(repoDir) {
299
+ if (typeof window !== "undefined") {
300
+ throw new Error("readPlaywrightConfig cannot be used in browser environments");
301
+ }
302
+ else {
303
+ // Only initialize on server side
304
+ // This will only execute on the server
305
+ await import("tsx/cjs/api")
306
+ .then((module) => {
307
+ tsxImport = module;
308
+ })
309
+ .catch((err) => {
310
+ console.error("Failed to import tsx:", err);
311
+ });
312
+ }
313
+ if (!tsxImport) {
314
+ throw new Error("tsx module not available");
315
+ }
298
316
  const [lastDir] = repoDir.split("/").reverse();
299
- const playwrightConfig = (await api_1.default.require("./playwright.config.ts", `${repoDir}/${lastDir}`)).default;
317
+ const playwrightConfig = (await tsxImport.require("./playwright.config.ts", `${repoDir}/${lastDir}`)).default;
300
318
  return playwrightConfig;
301
319
  }
302
320
  exports.readPlaywrightConfig = readPlaywrightConfig;
@@ -1,9 +1,11 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
2
  import { IChatModel } from "@empiricalrun/llm/chat";
3
+ import { ToolCallService } from "../../tool-call-service";
3
4
  import { ReporterFunction, SupportedChatModels } from "./types";
4
- export declare function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }: {
5
+ export declare function chatAgentLoop({ chatModel, selectedModel, reporter, trace, toolCallService, }: {
5
6
  chatModel: IChatModel<any>;
6
7
  selectedModel: SupportedChatModels;
8
+ toolCallService: ToolCallService;
7
9
  reporter: ReporterFunction;
8
10
  trace?: TraceClient;
9
11
  }): Promise<void>;
@@ -1 +1 @@
1
- {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EACL,UAAU,EAIX,MAAM,wBAAwB,CAAC;AAOhC,OAAO,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAgBhE,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAiDA"}
1
+ {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EACL,UAAU,EAIX,MAAM,wBAAwB,CAAC;AAGhC,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAG1D,OAAO,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAgBhE,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,EACL,eAAe,GAChB,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,eAAe,CAAC;IACjC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAoCA"}
@@ -3,8 +3,6 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.chatAgentLoop = void 0;
4
4
  const chat_1 = require("@empiricalrun/llm/chat");
5
5
  const picocolors_1 = require("picocolors");
6
- const tool_call_service_1 = require("../../tool-call-service");
7
- const str_replace_editor_1 = require("../../tools/str_replace_editor");
8
6
  const prompt_1 = require("./prompt");
9
7
  const state_1 = require("./state");
10
8
  function getModelName(model) {
@@ -20,31 +18,18 @@ const log = (...args) => {
20
18
  console.log((0, picocolors_1.gray)(args.join(" ")));
21
19
  };
22
20
  const isRemote = process.env.TOOL_EXECUTION_IS_REMOTE === "true" || false;
23
- async function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }) {
21
+ async function chatAgentLoop({ chatModel, selectedModel, reporter, trace, toolCallService, }) {
24
22
  const systemPrompt = await (0, prompt_1.buildSystemPrompt)();
25
23
  trace?.update({ input: { systemPrompt } });
26
- const toolCallService = new tool_call_service_1.ToolCallService();
27
- const { tools } = await toolCallService.getTools(selectedModel);
24
+ const { tools } = await toolCallService.getTools();
28
25
  while (!chatModel.askUserForInput) {
29
26
  const toolCalls = chatModel.getPendingToolCalls();
27
+ console.log("toolCalls", toolCalls);
30
28
  if (toolCalls.length) {
31
- const toolResults = [];
32
- for (const call of toolCalls) {
33
- const args = JSON.stringify(call.input);
34
- log(`Executing tool ${call.name} with args: ${args}`);
35
- let callResponse = await toolCallService.execute({
36
- tool: {
37
- name: call.name,
38
- input: call.input,
39
- },
40
- }, isRemote);
41
- if (callResponse.isError) {
42
- log(`Tool ${call.name} failed: ${callResponse.result}`);
43
- }
44
- else {
45
- log(`Tool ${call.name} completed`);
46
- }
47
- toolResults.push(callResponse);
29
+ const toolResults = await toolCallService.execute(toolCalls, isRemote);
30
+ if (isRemote) {
31
+ log(`Tool call remote execution in progress`);
32
+ break;
48
33
  }
49
34
  chatModel.pushToolResultsMessage(toolCalls, toolResults);
50
35
  }
@@ -62,6 +47,5 @@ async function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }) {
62
47
  const latest = chatModel.getHumanReadableLatestMessage();
63
48
  await reporter((0, state_1.chatStateFromModel)(chatModel, selectedModel), latest);
64
49
  }
65
- (0, str_replace_editor_1.cleanupBackupFiles)(process.cwd());
66
50
  }
67
51
  exports.chatAgentLoop = chatAgentLoop;
@@ -0,0 +1,8 @@
1
+ import { IChatModel } from "@empiricalrun/llm/chat";
2
+ import { chatAgentLoop } from "./agent-loop";
3
+ import { createChatModel } from "./model";
4
+ import { ChatStateOnDisk } from "./state";
5
+ import { ReporterFunction, SupportedChatModels } from "./types";
6
+ export { chatAgentLoop, createChatModel };
7
+ export type { ChatStateOnDisk, IChatModel, ReporterFunction, SupportedChatModels, };
8
+ //# sourceMappingURL=exports.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEpD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAC1C,OAAO,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAEhE,OAAO,EAAE,aAAa,EAAE,eAAe,EAAE,CAAC;AAE1C,YAAY,EACV,eAAe,EACf,UAAU,EACV,gBAAgB,EAChB,mBAAmB,GACpB,CAAC"}
@@ -0,0 +1,7 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createChatModel = exports.chatAgentLoop = void 0;
4
+ const agent_loop_1 = require("./agent-loop");
5
+ Object.defineProperty(exports, "chatAgentLoop", { enumerable: true, get: function () { return agent_loop_1.chatAgentLoop; } });
6
+ const model_1 = require("./model");
7
+ Object.defineProperty(exports, "createChatModel", { enumerable: true, get: function () { return model_1.createChatModel; } });
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAYA,OAAO,EAAoB,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAiBhE,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBAoFA;AAuBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBAyCA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAaA,OAAO,EAAoB,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAiBhE,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBAsFA;AAuBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA2CA"}
@@ -4,6 +4,7 @@ exports.runChatAgentForDashboard = exports.runChatAgentForCLI = void 0;
4
4
  const llm_1 = require("@empiricalrun/llm");
5
5
  const picocolors_1 = require("picocolors");
6
6
  const human_in_the_loop_1 = require("../../human-in-the-loop");
7
+ const tool_call_service_1 = require("../../tool-call-service");
7
8
  const git_1 = require("../../utils/git");
8
9
  const agent_loop_1 = require("./agent-loop");
9
10
  const model_1 = require("./model");
@@ -83,11 +84,13 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
83
84
  }
84
85
  else {
85
86
  // TODO: Should we pass a loader function? That would allow us to show a spinner
87
+ const toolCallService = new tool_call_service_1.ToolCallService(null, selectedModel);
86
88
  await (0, agent_loop_1.chatAgentLoop)({
87
89
  chatModel,
88
90
  selectedModel,
89
91
  reporter: reporterFunc,
90
92
  trace,
93
+ toolCallService,
91
94
  });
92
95
  }
93
96
  }
@@ -125,6 +128,7 @@ async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
125
128
  chatSessionId,
126
129
  },
127
130
  });
131
+ const toolCallService = new tool_call_service_1.ToolCallService(chatSessionId, selectedModel);
128
132
  await (0, git_1.checkoutBranch)(branchName);
129
133
  let chatModel = (0, model_1.createChatModel)(chatState.messages, selectedModel);
130
134
  let reporterFunc = async (chatState, latest) => {
@@ -147,6 +151,7 @@ async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
147
151
  selectedModel,
148
152
  reporter: reporterFunc,
149
153
  trace,
154
+ toolCallService,
150
155
  });
151
156
  await (0, git_1.commitLocalAndPushBranchToRemote)(branchName);
152
157
  }
package/dist/bin/index.js CHANGED
@@ -233,8 +233,10 @@ async function main() {
233
233
  generationId: testGenConfig.options?.metadata.generationId,
234
234
  projectRepoName: testGenConfig.options?.metadata.projectRepoName,
235
235
  });
236
- // Download the build if repo has a download script
237
- await (0, test_build_1.downloadBuild)(testGenConfig.build || {});
236
+ if (testGenConfig.build?.url) {
237
+ // Download the build if repo has a download script
238
+ await (0, test_build_1.downloadBuild)(testGenConfig.build.url);
239
+ }
238
240
  if (completedOptions.useChat) {
239
241
  await runChatAgent({
240
242
  chatSessionId: completedOptions.chatSessionId,
@@ -1,10 +1,3 @@
1
- import { Build } from "@empiricalrun/shared-types";
2
- /**
3
- * method to download the build from the URL provided in the build object
4
- * this is only used in cases like chrome extension as of yet.
5
- * @export
6
- * @param {Build} build
7
- * @return {*} {Promise<void>}
8
- */
9
- export declare function downloadBuild(build: Build): Promise<void>;
1
+ export declare function hasDownloadScript(): Promise<boolean>;
2
+ export declare function downloadBuild(buildUrl: string): Promise<void>;
10
3
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/test-build/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAUnD;;;;;;GAMG;AACH,wBAAsB,aAAa,CAAC,KAAK,EAAE,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAY/D"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/test-build/index.ts"],"names":[],"mappings":"AAeA,wBAAsB,iBAAiB,IAAI,OAAO,CAAC,OAAO,CAAC,CAG1D;AAED,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAUnE"}
@@ -3,26 +3,27 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.downloadBuild = void 0;
6
+ exports.downloadBuild = exports.hasDownloadScript = void 0;
7
7
  const fs_extra_1 = __importDefault(require("fs-extra"));
8
8
  const logger_1 = require("../bin/logger");
9
9
  const exec_1 = require("../utils/exec");
10
- /**
11
- * method to download the build from the URL provided in the build object
12
- * this is only used in cases like chrome extension as of yet.
13
- * @export
14
- * @param {Build} build
15
- * @return {*} {Promise<void>}
16
- */
17
- async function downloadBuild(build) {
18
- const logger = new logger_1.CustomLogger({ useReporter: false });
10
+ async function getPackageJSON() {
19
11
  const packageJSONPath = "package.json";
20
12
  const packageJsonStr = await fs_extra_1.default.readFile(packageJSONPath, "utf-8");
21
- const packageJSONData = JSON.parse(packageJsonStr);
22
- const buildDownloadScript = packageJSONData.scripts["download"];
23
- if (buildDownloadScript && build?.url) {
24
- logger.log(`Downloading build from ${build.url}`);
25
- await (0, exec_1.cmd)(`npm run download ${build.url}`.split(" "), {
13
+ return JSON.parse(packageJsonStr);
14
+ }
15
+ async function hasDownloadScript() {
16
+ const packageJSON = await getPackageJSON();
17
+ return !!packageJSON.scripts["download"];
18
+ }
19
+ exports.hasDownloadScript = hasDownloadScript;
20
+ async function downloadBuild(buildUrl) {
21
+ const logger = new logger_1.CustomLogger({ useReporter: false });
22
+ const packageJSON = await getPackageJSON();
23
+ const buildDownloadScript = packageJSON.scripts["download"];
24
+ if (buildDownloadScript && buildUrl) {
25
+ logger.log(`Downloading build from ${buildUrl}`);
26
+ await (0, exec_1.cmd)(`npm run download ${buildUrl}`.split(" "), {
26
27
  env: { ...Object(process.env) },
27
28
  });
28
29
  }
@@ -1,4 +1,4 @@
1
- import { Tool, ToolResult } from "@empiricalrun/llm/chat";
1
+ import { PendingToolCall, Tool, ToolResult } from "@empiricalrun/llm/chat";
2
2
  import { SupportedChatModels } from "../agent/chat/types";
3
3
  export type { SupportedChatModels };
4
4
  type ToolExecutors = {
@@ -7,15 +7,12 @@ type ToolExecutors = {
7
7
  export declare class ToolCallService {
8
8
  tools: Tool[];
9
9
  toolExecutors: ToolExecutors;
10
- constructor();
11
- getTools(selectedModel: SupportedChatModels): Promise<{
10
+ chatSessionId: number | null;
11
+ selectedModel: SupportedChatModels;
12
+ constructor(chatSessionId: number | null, selectedModel: SupportedChatModels);
13
+ getTools(): Promise<{
12
14
  tools: Tool[];
13
15
  }>;
14
- execute(payload: {
15
- tool: {
16
- name: string;
17
- input: any;
18
- };
19
- }, isRemote: boolean): Promise<ToolResult>;
16
+ execute(toolCalls: PendingToolCall[], isRemote: boolean): Promise<ToolResult[]>;
20
17
  }
21
18
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tool-call-service/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAE1D,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAa1D,YAAY,EAAE,mBAAmB,EAAE,CAAC;AAEpC,KAAK,aAAa,GAAG;IACnB,CAAC,GAAG,EAAE,MAAM,GAAG,CAAC,KAAK,EAAE,GAAG,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CACpD,CAAC;AAqBF,qBAAa,eAAe;IAC1B,KAAK,EAAE,IAAI,EAAE,CAAM;IACnB,aAAa,EAAE,aAAa,CAAM;;IAa5B,QAAQ,CAAC,aAAa,EAAE,mBAAmB;;;IAa3C,OAAO,CACX,OAAO,EAAE;QACP,IAAI,EAAE;YACJ,IAAI,EAAE,MAAM,CAAC;YACb,KAAK,EAAE,GAAG,CAAC;SACZ,CAAC;KACH,EACD,QAAQ,EAAE,OAAO,GAChB,OAAO,CAAC,UAAU,CAAC;CA8BvB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tool-call-service/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAE3E,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAa1D,YAAY,EAAE,mBAAmB,EAAE,CAAC;AAEpC,KAAK,aAAa,GAAG;IACnB,CAAC,GAAG,EAAE,MAAM,GAAG,CAAC,KAAK,EAAE,GAAG,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CACpD,CAAC;AAyBF,qBAAa,eAAe;IAC1B,KAAK,EAAE,IAAI,EAAE,CAAM;IACnB,aAAa,EAAE,aAAa,CAAM;IAClC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,aAAa,EAAE,mBAAmB,CAAC;gBAGjC,aAAa,EAAE,MAAM,GAAG,IAAI,EAC5B,aAAa,EAAE,mBAAmB;IAgB9B,QAAQ;;;IAaR,OAAO,CACX,SAAS,EAAE,eAAe,EAAE,EAC5B,QAAQ,EAAE,OAAO,GAChB,OAAO,CAAC,UAAU,EAAE,CAAC;CAqCzB"}
@@ -4,13 +4,14 @@ exports.ToolCallService = void 0;
4
4
  const client_sqs_1 = require("@aws-sdk/client-sqs");
5
5
  const commit_and_create_pr_1 = require("../tools/commit-and-create-pr");
6
6
  const diagnosis_fetcher_1 = require("../tools/diagnosis-fetcher");
7
+ const download_build_1 = require("../tools/download-build");
7
8
  const environment_crud_1 = require("../tools/environment-crud");
8
9
  const grep_1 = require("../tools/grep");
9
10
  const str_replace_editor_1 = require("../tools/str_replace_editor");
10
11
  const test_gen_browser_1 = require("../tools/test-gen-browser");
11
12
  const test_run_1 = require("../tools/test-run");
12
13
  const test_run_fetcher_1 = require("../tools/test-run-fetcher");
13
- async function sendToolRequestToRemoteQueue(toolName, input) {
14
+ async function sendToolRequestToRemoteQueue(payload) {
14
15
  const sqs = new client_sqs_1.SQSClient({
15
16
  region: process.env.AWS_REGION,
16
17
  credentials: {
@@ -18,18 +19,22 @@ async function sendToolRequestToRemoteQueue(toolName, input) {
18
19
  secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
19
20
  },
20
21
  });
21
- const queueUrl = "https://sqs.us-east-1.amazonaws.com/381492172454/toolRequests.fifo";
22
+ const queueUrl = process.env.TOOL_REQUEST_QUEUE_URL;
22
23
  await sqs.send(new client_sqs_1.SendMessageCommand({
23
24
  QueueUrl: queueUrl,
24
- MessageBody: JSON.stringify({ toolName, input }),
25
- MessageGroupId: toolName, // tool request id
26
- MessageDeduplicationId: toolName, // tool request id
25
+ MessageBody: JSON.stringify(payload),
26
+ MessageGroupId: payload.requestId,
27
+ MessageDeduplicationId: payload.requestId, // unique id for the tool request
27
28
  }));
28
29
  }
29
30
  class ToolCallService {
30
31
  tools = [];
31
32
  toolExecutors = {};
32
- constructor() {
33
+ chatSessionId;
34
+ selectedModel;
35
+ constructor(chatSessionId, selectedModel) {
36
+ this.chatSessionId = chatSessionId;
37
+ this.selectedModel = selectedModel;
33
38
  this.tools = [
34
39
  grep_1.grepTool,
35
40
  test_run_1.runTestTool,
@@ -38,49 +43,58 @@ class ToolCallService {
38
43
  test_gen_browser_1.generateTestWithBrowserAgent,
39
44
  commit_and_create_pr_1.commitAndPushChangesTool,
40
45
  environment_crud_1.getEnvironmentTool,
46
+ download_build_1.downloadBuildTool,
41
47
  ];
42
48
  }
43
- async getTools(selectedModel) {
44
- if (!selectedModel.startsWith("claude")) {
49
+ async getTools() {
50
+ if (!this.selectedModel.startsWith("claude")) {
45
51
  this.tools.push(...str_replace_editor_1.textEditorTools);
46
52
  }
47
53
  this.tools.forEach((tool) => {
48
54
  this.toolExecutors[tool.schema.name] = tool.execute;
49
55
  });
50
- if (selectedModel.startsWith("claude")) {
56
+ if (this.selectedModel.startsWith("claude")) {
51
57
  this.toolExecutors["str_replace_editor"] = str_replace_editor_1.strReplaceEditorExecutor;
52
58
  }
53
59
  return { tools: this.tools };
54
60
  }
55
- async execute(payload, isRemote) {
56
- const { tool } = payload;
57
- const toolExecutor = this.toolExecutors[tool.name];
58
- if (!toolExecutor) {
59
- return {
60
- isError: true,
61
- result: `Invalid function/tool call: invalid_tool_call not found`,
62
- };
61
+ async execute(toolCalls, isRemote) {
62
+ if (isRemote && this.chatSessionId) {
63
+ console.log("Executing tool remotely", toolCalls);
64
+ await sendToolRequestToRemoteQueue({
65
+ toolCalls,
66
+ requestId: crypto.randomUUID(),
67
+ chatSessionId: this.chatSessionId,
68
+ selectedModel: this.selectedModel,
69
+ });
70
+ return toolCalls.map(() => ({
71
+ isError: false,
72
+ result: `Tool request sent to remote queue to execute.`,
73
+ }));
63
74
  }
64
- try {
65
- if (isRemote) {
66
- console.log("Executing tool remotely", tool.name, tool.input);
67
- // push to sqs
68
- await sendToolRequestToRemoteQueue(tool.name, tool.input);
69
- // TODO: Need to stop the agent loop here
70
- return {
71
- isError: false,
72
- result: `Tool request sent to remote queue to execute ${tool.name}.`,
73
- };
75
+ else {
76
+ const toolResults = [];
77
+ for (const toolCall of toolCalls) {
78
+ const toolExecutor = this.toolExecutors[toolCall.name];
79
+ if (!toolExecutor) {
80
+ toolResults.push({
81
+ isError: true,
82
+ result: `Invalid function/tool call: invalid_tool_call not found`,
83
+ });
84
+ continue;
85
+ }
86
+ try {
87
+ const result = await toolExecutor(toolCall.input);
88
+ toolResults.push(result);
89
+ }
90
+ catch (error) {
91
+ toolResults.push({
92
+ isError: true,
93
+ result: error instanceof Error ? error.message : String(error),
94
+ });
95
+ }
74
96
  }
75
- else {
76
- return await toolExecutor(tool.input);
77
- }
78
- }
79
- catch (error) {
80
- return {
81
- isError: true,
82
- result: error instanceof Error ? error.message : String(error),
83
- };
97
+ return toolResults;
84
98
  }
85
99
  }
86
100
  }
@@ -0,0 +1,3 @@
1
+ import type { Tool } from "@empiricalrun/llm/chat";
2
+ export declare const downloadBuildTool: Tool;
3
+ //# sourceMappingURL=download-build.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"download-build.d.ts","sourceRoot":"","sources":["../../src/tools/download-build.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAKnD,eAAO,MAAM,iBAAiB,EAAE,IAkC/B,CAAC"}
@@ -0,0 +1,39 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.downloadBuildTool = void 0;
4
+ const zod_1 = require("zod");
5
+ const test_build_1 = require("../test-build");
6
+ exports.downloadBuildTool = {
7
+ schema: {
8
+ name: "downloadBuild",
9
+ description: `Download a build from a build URL. If you do not have
10
+ have a build URL, you can try getting the environment details with the getEnvironment tool.
11
+ Environment details will include the build URL.`,
12
+ parameters: zod_1.z.object({
13
+ buildUrl: zod_1.z.string().describe("The URL of the build to download"),
14
+ }),
15
+ },
16
+ execute: async (input) => {
17
+ if (!(await (0, test_build_1.hasDownloadScript)())) {
18
+ return {
19
+ isError: true,
20
+ result: `This repo does not have a download script in package.json.
21
+ You probably don't need to worry about this, since it means this repo does not have a build to download.`,
22
+ };
23
+ }
24
+ const { buildUrl } = input;
25
+ try {
26
+ await (0, test_build_1.downloadBuild)(buildUrl);
27
+ return {
28
+ isError: false,
29
+ result: "Build downloaded successfully",
30
+ };
31
+ }
32
+ catch (error) {
33
+ return {
34
+ isError: true,
35
+ result: `Failed to download build: ${error}`,
36
+ };
37
+ }
38
+ },
39
+ };
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/index.ts"],"names":[],"mappings":"AAAA,wBAAsB,oBAAoB,CAAC,CAAC,EAAE,EAC5C,IAAI,EACJ,MAAc,EACd,IAAI,GACL,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,GAAG,CAAC;CACZ,GAAG,OAAO,CAAC,CAAC,CAAC,CAwBb;AAED,wBAAsB,eAAe,CAAC,EACpC,MAAM,EACN,GAAG,EACH,IAAI,GACL,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,GAAG,CAAC;CACZ,oBAWA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/index.ts"],"names":[],"mappings":"AAAA,wBAAsB,oBAAoB,CAAC,CAAC,EAAE,EAC5C,IAAI,EACJ,MAAc,EACd,IAAI,GACL,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,GAAG,CAAC;CACZ,GAAG,OAAO,CAAC,CAAC,CAAC,CAuBb;AAED,wBAAsB,eAAe,CAAC,EACpC,MAAM,EACN,GAAG,EACH,IAAI,GACL,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,GAAG,CAAC;CACZ,oBAWA"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.55.0",
3
+ "version": "0.56.0",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -18,6 +18,10 @@
18
18
  "types": "./dist/agent/chat/state.d.ts",
19
19
  "default": "./dist/agent/chat/state.js"
20
20
  },
21
+ "./chat": {
22
+ "types": "./dist/agent/chat/exports.d.ts",
23
+ "default": "./dist/agent/chat/exports.js"
24
+ },
21
25
  "./utils": {
22
26
  "types": "./dist/utils/index.d.ts",
23
27
  "default": "./dist/utils/index.js"
@@ -61,9 +65,9 @@
61
65
  "tsx": "^4.16.2",
62
66
  "typescript": "^5.3.3",
63
67
  "zod": "^3.23.8",
64
- "@empiricalrun/llm": "^0.15.0",
68
+ "@empiricalrun/llm": "^0.15.1",
65
69
  "@empiricalrun/r2-uploader": "^0.3.8",
66
- "@empiricalrun/test-run": "^0.8.0"
70
+ "@empiricalrun/test-run": "^0.8.1"
67
71
  },
68
72
  "devDependencies": {
69
73
  "@playwright/test": "1.47.1",