@empiricalrun/test-gen 0.51.0 → 0.51.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.51.2
4
+
5
+ ### Patch Changes
6
+
7
+ - ecd3c30: fix: show loader when claude is working
8
+ - ad6c96f: fix: prune test-run tool call response for input tokens
9
+
10
+ ## 0.51.1
11
+
12
+ ### Patch Changes
13
+
14
+ - d04190f: fix: remove process.on listeners to avoid leaks
15
+ - 75c7921: fix: show chat usage summary on ctrl+C
16
+ - Updated dependencies [d04190f]
17
+ - @empiricalrun/test-run@0.7.6
18
+ - @empiricalrun/llm@0.11.1
19
+
3
20
  ## 0.51.0
4
21
 
5
22
  ### Minor Changes
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,GACR,EAAE,iBAAiB;;;GA8EnB"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,GACR,EAAE,iBAAiB;;;GAgFnB"}
@@ -32,13 +32,14 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
32
32
  const testsDirectory = `${repoDir}/tests`;
33
33
  const isTestRunTriggeredForTeardown = teardownFileRegex.test(testFilePath);
34
34
  const teardowns = new utils_1.TeardownManager(testsDirectory);
35
- if (!isTestRunTriggeredForTeardown) {
36
- await teardowns.skipAll();
37
- }
35
+ let removeListeners;
38
36
  const command = `npx playwright test ${testFilePath} --retries 0 --project ${project} --timeout 0 --headed`;
39
37
  let isError = false;
40
38
  let error = "";
41
39
  try {
40
+ if (!isTestRunTriggeredForTeardown) {
41
+ removeListeners = await teardowns.skipAll();
42
+ }
42
43
  await (0, exec_1.cmd)(command.split(" "), {
43
44
  env: {
44
45
  APP_PORT: port.toString(),
@@ -55,7 +56,9 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
55
56
  console.error(error);
56
57
  isError = true;
57
58
  }
58
- if (!isTestRunTriggeredForTeardown) {
59
+ finally {
60
+ // Remove process listeners before unskipping files
61
+ removeListeners?.();
59
62
  teardowns.unskipAll();
60
63
  }
61
64
  // clean up the file if there is any error
@@ -39,7 +39,7 @@ export declare class TeardownManager {
39
39
  private teardownFiles;
40
40
  private getAllTeardownFiles;
41
41
  private skipTeardownFile;
42
- skipAll(): Promise<void>;
42
+ skipAll(): Promise<() => void>;
43
43
  unskipAll(): void;
44
44
  }
45
45
  //# sourceMappingURL=utils.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,iBAyBA;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD,wBAAsB,oBAAoB,CACxC,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,EAAE,CAAC,CAQnB;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,iBAyBA;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD,wBAAsB,oBAAoB,CACxC,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,EAAE,CAAC,CAQnB;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAoBb,SAAS;CAKjB"}
@@ -400,18 +400,14 @@ class TeardownManager {
400
400
  async skipAll() {
401
401
  this.teardownFiles = await this.getAllTeardownFiles();
402
402
  await Promise.all(this.teardownFiles.map(async ({ filePath }) => await this.skipTeardownFile(filePath)));
403
- process.on("beforeExit", () => {
404
- this.unskipAll();
405
- });
406
- process.on("exit", () => {
407
- this.unskipAll();
408
- });
409
- process.on("SIGINT", () => {
410
- this.unskipAll();
411
- });
412
- process.on("SIGTERM", () => {
413
- this.unskipAll();
414
- });
403
+ const setupProcessListeners = (cleanup) => {
404
+ const events = ["beforeExit", "exit", "SIGINT", "SIGTERM"];
405
+ events.forEach((event) => process.on(event, cleanup));
406
+ return () => {
407
+ events.forEach((event) => process.removeListener(event, cleanup));
408
+ };
409
+ };
410
+ return setupProcessListeners(this.unskipAll.bind(this));
415
411
  }
416
412
  unskipAll() {
417
413
  this.teardownFiles.forEach(({ filePath, content }) => {
@@ -1 +1 @@
1
- {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AA8EA,wBAAsB,SAAS,CAAC,EAC9B,SAAwC,EACxC,mBAAmB,GACpB,EAAE;IACD,SAAS,CAAC,EAAE,4BAA4B,GAAG,4BAA4B,CAAC;IACxE,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B;;;;GAoFA"}
1
+ {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AA8EA,wBAAsB,SAAS,CAAC,EAC9B,SAAwC,EACxC,mBAAmB,GACpB,EAAE;IACD,SAAS,CAAC,EAAE,4BAA4B,GAAG,4BAA4B,CAAC;IACxE,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B;;;;GA0GA"}
@@ -72,8 +72,15 @@ const toolExecutors = {
72
72
  str_replace_editor: claude_1.strReplaceEditorTool,
73
73
  };
74
74
  async function chatAgent({ chatModel = "claude-3-7-sonnet-20250219", useDiskForChatState, }) {
75
+ const ora = (await import("ora")).default;
75
76
  let userPrompt = undefined;
76
77
  let chatState = useDiskForChatState ? claude_1.ChatState.load() : new claude_1.ChatState(false);
78
+ const handleSigInt = () => {
79
+ console.log("\nExiting. Usage summary:", chatState.getUsageSummary());
80
+ process.exit(0);
81
+ };
82
+ process.once("SIGINT", handleSigInt);
83
+ process.once("SIGTERM", handleSigInt);
77
84
  if (chatState.askUserForInput) {
78
85
  // Show last message to the user for context when we loaded from disk
79
86
  const messages = chatState.messages;
@@ -89,9 +96,19 @@ async function chatAgent({ chatModel = "claude-3-7-sonnet-20250219", useDiskForC
89
96
  while (!userPrompt?.toLowerCase().includes("stop")) {
90
97
  chatState.saveToDisk();
91
98
  if (chatState.askUserForInput) {
92
- userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
93
- message: "User:",
94
- });
99
+ try {
100
+ userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
101
+ message: "User:",
102
+ });
103
+ }
104
+ catch (e) {
105
+ // https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
106
+ if (e instanceof Error && e.name === "ExitPromptError") {
107
+ console.log("Exiting. Usage summary:", chatState.getUsageSummary());
108
+ process.exit(0);
109
+ }
110
+ throw e;
111
+ }
95
112
  chatState.pushMessage({
96
113
  role: "user",
97
114
  content: [
@@ -105,12 +122,13 @@ async function chatAgent({ chatModel = "claude-3-7-sonnet-20250219", useDiskForC
105
122
  }
106
123
  const toolUse = chatState.getPendingToolCall();
107
124
  if (toolUse) {
108
- console.log("Executing tool:", toolUse.name, "with args:", toolUse.input);
125
+ const spinner = ora(`Executing tool ${toolUse.name} with args: ${JSON.stringify(toolUse.input)}`).start();
109
126
  const toolExecutor = toolExecutors[toolUse.name];
110
127
  if (!toolExecutor) {
111
128
  throw new Error(`Tool ${toolUse.name} not found`);
112
129
  }
113
130
  const toolResult = await toolExecutor(toolUse.input);
131
+ spinner.succeed(`Tool ${toolUse.name} completed`);
114
132
  chatState.pushMessage({
115
133
  role: "user",
116
134
  content: [
@@ -124,6 +142,7 @@ async function chatAgent({ chatModel = "claude-3-7-sonnet-20250219", useDiskForC
124
142
  });
125
143
  continue;
126
144
  }
145
+ const spinner = ora("Claude is working...").start();
127
146
  const response = await (0, claude_1.createClaudeMessage)({
128
147
  systemPrompt,
129
148
  messages: chatState.getMessagesForCreateCompletion(),
@@ -131,6 +150,7 @@ async function chatAgent({ chatModel = "claude-3-7-sonnet-20250219", useDiskForC
131
150
  model: chatModel,
132
151
  withStrReplaceEditor: true,
133
152
  });
153
+ spinner.stop();
134
154
  if (!response) {
135
155
  throw new Error("No response from LLM");
136
156
  }
package/dist/bin/index.js CHANGED
@@ -28,10 +28,13 @@ dotenv_1.default.config({
28
28
  const flushEvents = async () => {
29
29
  await (0, llm_1.flushAllTraces)();
30
30
  };
31
- process.on("beforeExit", async () => await flushEvents());
32
- process.on("exit", async () => await flushEvents());
33
- process.on("SIGINT", async () => await flushEvents());
34
- process.on("SIGTERM", async () => await flushEvents());
31
+ function setupProcessListeners(cleanup) {
32
+ const events = ["beforeExit", "exit", "SIGINT", "SIGTERM"];
33
+ events.forEach((event) => process.on(event, cleanup));
34
+ return () => {
35
+ events.forEach((event) => process.removeListener(event, cleanup));
36
+ };
37
+ }
35
38
  async function runChatAgent(modelInput, useDiskForChatState) {
36
39
  const MODEL_MAPPING = {
37
40
  "claude-3-7": "claude-3-7-sonnet-20250219",
@@ -172,6 +175,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
172
175
  return agent;
173
176
  }
174
177
  (async function main() {
178
+ const removeListeners = setupProcessListeners(flushEvents);
175
179
  console.log(`Running test-gen v${require("../../package.json").version} from ${__dirname}`);
176
180
  const program = new commander_1.Command();
177
181
  program
@@ -228,7 +232,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
228
232
  testName: testGenConfig.testCase.name,
229
233
  });
230
234
  }
231
- // TODO: move these reporters to a better lifecycle
235
+ removeListeners();
232
236
  await (0, llm_1.flushAllTraces)();
233
237
  await (0, logger_1.waitForLogsToFlush)();
234
238
  await (0, session_1.endSession)();
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAYpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAiC3E"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAepC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAyC3E"}
package/dist/index.js CHANGED
@@ -14,42 +14,54 @@ const pw_test_1 = require("./utils/pw-test");
14
14
  const flushEvents = async () => {
15
15
  await (0, llm_1.flushAllTraces)();
16
16
  };
17
- process.on("beforeExit", async () => await flushEvents());
18
- process.on("exit", async () => await flushEvents());
19
- process.on("SIGINT", async () => await flushEvents());
20
- process.on("SIGTERM", async () => await flushEvents());
17
+ function setupProcessListeners(cleanup) {
18
+ const events = ["beforeExit", "exit", "SIGINT", "SIGTERM"];
19
+ events.forEach((event) => process.on(event, cleanup));
20
+ return () => {
21
+ events.forEach((event) => process.removeListener(event, cleanup));
22
+ };
23
+ }
21
24
  async function createTest(task, page, scope) {
22
- const testConfigArg = process.env.TEST_GEN_TOKEN;
23
- const testGenConfig = (0, scenarios_1.loadTestConfigs)(testConfigArg);
24
- (0, reporter_1.setReporterConfig)({
25
- projectRepoName: testGenConfig.options?.metadata.projectRepoName,
26
- testSessionId: testGenConfig.options?.metadata.testSessionId,
27
- generationId: testGenConfig.options?.metadata.generationId,
28
- });
29
- (0, session_1.setSessionDetails)({
30
- sessionId: testGenConfig.options?.metadata.testSessionId,
31
- generationId: testGenConfig.options?.metadata.generationId,
32
- testCaseId: testGenConfig.testCase.id,
33
- projectRepoName: testGenConfig.options?.metadata.projectRepoName,
34
- });
35
- const fileService = new client_1.default();
36
- const { testCase, specPath } = testGenConfig;
37
- const { code, importPaths } = await (0, run_1.createTestUsingMasterAgent)({
38
- testCase,
39
- specPath,
40
- page,
41
- task,
42
- options: {
43
- ...testGenConfig.options,
44
- },
45
- scopeVars: scope,
46
- });
47
- await fileService.updateTest({
48
- task,
49
- generatedCode: code,
50
- importPaths,
51
- });
52
- // skip the rest of the test once generation is over
53
- await (0, pw_test_1.skipTest)();
25
+ const removeListeners = setupProcessListeners(flushEvents);
26
+ try {
27
+ const testConfigArg = process.env.TEST_GEN_TOKEN;
28
+ const testGenConfig = (0, scenarios_1.loadTestConfigs)(testConfigArg);
29
+ (0, reporter_1.setReporterConfig)({
30
+ projectRepoName: testGenConfig.options?.metadata.projectRepoName,
31
+ testSessionId: testGenConfig.options?.metadata.testSessionId,
32
+ generationId: testGenConfig.options?.metadata.generationId,
33
+ });
34
+ (0, session_1.setSessionDetails)({
35
+ sessionId: testGenConfig.options?.metadata.testSessionId,
36
+ generationId: testGenConfig.options?.metadata.generationId,
37
+ testCaseId: testGenConfig.testCase.id,
38
+ projectRepoName: testGenConfig.options?.metadata.projectRepoName,
39
+ });
40
+ const fileService = new client_1.default();
41
+ const { testCase, specPath } = testGenConfig;
42
+ const { code, importPaths } = await (0, run_1.createTestUsingMasterAgent)({
43
+ testCase,
44
+ specPath,
45
+ page,
46
+ task,
47
+ options: {
48
+ ...testGenConfig.options,
49
+ },
50
+ scopeVars: scope,
51
+ });
52
+ await fileService.updateTest({
53
+ task,
54
+ generatedCode: code,
55
+ importPaths,
56
+ });
57
+ // skip the rest of the test once generation is over
58
+ await (0, pw_test_1.skipTest)();
59
+ }
60
+ finally {
61
+ // Ensure listeners are removed even if an error occurs
62
+ removeListeners();
63
+ // Flush events one final time before removing listeners
64
+ await flushEvents();
65
+ }
54
66
  }
55
67
  exports.createTest = createTest;
@@ -0,0 +1,4 @@
1
+ import type { Tool } from "../types";
2
+ export declare function extractPathAfterSourceRepo(fullPath: string): string;
3
+ export declare const testRunTool: Tool;
4
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/test-run-fetcher/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,UAAU,CAAC;AAWrC,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAOnE;AAED,eAAO,MAAM,WAAW,EAAE,IAwHzB,CAAC"}
@@ -0,0 +1,127 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.testRunTool = exports.extractPathAfterSourceRepo = void 0;
4
+ const zod_1 = require("zod");
5
+ const TestRunSchema = zod_1.z.object({
6
+ testRunUrl: zod_1.z
7
+ .string()
8
+ .describe("The full URL of the test run (e.g. https://dash.empirical.run/sortment-tests/test-runs/20269 or with query params like ?status=failed)"),
9
+ });
10
+ function extractPathAfterSourceRepo(fullPath) {
11
+ const parts = fullPath.split("source-repo");
12
+ if (parts.length <= 1) {
13
+ return fullPath;
14
+ }
15
+ else {
16
+ return parts[1].replace(/^\/|\/$/g, "");
17
+ }
18
+ }
19
+ exports.extractPathAfterSourceRepo = extractPathAfterSourceRepo;
20
+ exports.testRunTool = {
21
+ schema: {
22
+ name: "fetchTestRunDetails",
23
+ description: "Fetch details about a test run using its URL",
24
+ parameters: TestRunSchema,
25
+ },
26
+ execute: async (input) => {
27
+ const { testRunUrl } = input;
28
+ // Remove query parameters if they exist
29
+ const urlWithoutParams = testRunUrl.split("?")[0] || testRunUrl;
30
+ // Extract the run ID and repo name from the URL
31
+ const urlParts = urlWithoutParams.split("/");
32
+ const runId = urlParts.pop(); // Last part is the run ID
33
+ const repoName = urlParts[urlParts.length - 2]; // Second to last part is the repo name
34
+ if (!runId || !repoName) {
35
+ throw new Error("Invalid test run URL - could not extract run ID or repo name");
36
+ }
37
+ // Make the API call to fetch test run details
38
+ const response = await fetch(`https://dash.empirical.run/api/test-runs/${runId}?repo_name=${repoName}`, {
39
+ method: "GET",
40
+ headers: {
41
+ Authorization: "weQPMWKT",
42
+ },
43
+ });
44
+ if (!response.ok) {
45
+ return {
46
+ result: `Failed to fetch test run details: ${response.statusText}`,
47
+ isError: true,
48
+ };
49
+ }
50
+ const data = (await response.json());
51
+ // To efficiently use input_tokens, we
52
+ // 1. Truncate stack trace to last 300 characters
53
+ // 2. Remove request/response headers from network metadata
54
+ // 3. Focus on failed tests only
55
+ if (data.data?.test_run?.flattenedSummaryDetails) {
56
+ data.data.test_run.flattenedSummaryDetails =
57
+ data.data.test_run.flattenedSummaryDetails.map((detail) => {
58
+ // Truncate stack trace to last 300 characters
59
+ if (detail.failed_run_metadata?.stack) {
60
+ const stack = detail.failed_run_metadata.stack;
61
+ // TODO: change to last 10 lines
62
+ detail.failed_run_metadata.stack =
63
+ stack.length > 300 ? `...${stack.slice(-300)}` : stack;
64
+ }
65
+ // Remove headers from network metadata
66
+ if (detail.network_metadata?.failed_calls) {
67
+ detail.network_metadata.failed_calls =
68
+ detail.network_metadata.failed_calls.map((call) => ({
69
+ ...call,
70
+ request: { ...call.request, headers: [] },
71
+ response: { ...call.response, headers: [] },
72
+ }));
73
+ }
74
+ if (detail.network_metadata?.failed_calls_within_time_range) {
75
+ detail.network_metadata.failed_calls_within_time_range =
76
+ detail.network_metadata.failed_calls_within_time_range.map((call) => ({
77
+ ...call,
78
+ request: { ...call.request, headers: [] },
79
+ response: { ...call.response, headers: [] },
80
+ }));
81
+ }
82
+ return detail;
83
+ });
84
+ }
85
+ const failedTests = data.data?.test_run?.flattenedSummaryDetails.filter((detail) => detail.status === "failed");
86
+ const failedTestInMarkdown = failedTests
87
+ ?.map((detail) => {
88
+ const { nesting } = detail.failed_run_metadata;
89
+ return {
90
+ testName: nesting.slice(1).join(" > "),
91
+ fileName: extractPathAfterSourceRepo(detail.failed_run_metadata.location.file),
92
+ detail,
93
+ };
94
+ })
95
+ .map((test) => {
96
+ return `
97
+ ### Test name: ${test.testName}
98
+ File: ${test.fileName}
99
+
100
+ Failure data below
101
+
102
+ \`\`\`json
103
+ ${JSON.stringify(test.detail, null, 2)}
104
+ \`\`\`
105
+ `;
106
+ })
107
+ .join("\n\n");
108
+ const markdownResponse = `
109
+ # Test run details
110
+
111
+ ## Run info
112
+ - Run ID: ${runId}
113
+ - Repository: ${repoName}
114
+ - Started at: ${data.data?.test_run?.testRun.run_started_at}
115
+ - Ended at: ${data.data?.test_run?.testRun.run_ended_at}
116
+ - Duration: ${data.data?.test_run?.testRun.duration} seconds
117
+ - Environment name: ${data.data?.test_run?.testRun.environment_name}
118
+
119
+ ## Failed tests
120
+ ${failedTestInMarkdown}
121
+ `;
122
+ return {
123
+ result: markdownResponse,
124
+ isError: false,
125
+ };
126
+ },
127
+ };
@@ -0,0 +1,120 @@
1
+ type Project = {
2
+ id: number;
3
+ org_id: number;
4
+ name: string;
5
+ repo_name: string;
6
+ };
7
+ type TestRun = {
8
+ id: number;
9
+ run_id: number;
10
+ project_id: number;
11
+ total_count: number;
12
+ unique_count: number;
13
+ success_count: number;
14
+ failed_count: number;
15
+ skipped_count: number;
16
+ flaky_count: number;
17
+ run_ended_at: string;
18
+ run_started_at: string;
19
+ created_at: string;
20
+ duration: number;
21
+ test_run_branch: string;
22
+ summary_url: string;
23
+ trigger_type: string;
24
+ state: string;
25
+ build_id: number;
26
+ environment_id: number;
27
+ metadata: Record<string, any>;
28
+ test_run_head_sha: string;
29
+ };
30
+ type TestRunEnriched = TestRun & {
31
+ environment_name: string | null;
32
+ environment_slug: string | null;
33
+ commit: string | null;
34
+ commit_url: string | null;
35
+ build_branch: string | null;
36
+ build_url: string | null;
37
+ };
38
+ type FailedRunMetadata = {
39
+ stack: string;
40
+ trace: string;
41
+ videos: string[];
42
+ nesting: string[];
43
+ location: {
44
+ file: string;
45
+ line: number;
46
+ column: number;
47
+ };
48
+ start_time: string;
49
+ pw_test_id: string;
50
+ varying_retries: boolean;
51
+ };
52
+ type CapturedNetworkFailure = {
53
+ endpoint: string;
54
+ status: number;
55
+ timestamp: string;
56
+ distanceFromFailureInMilliseconds: number;
57
+ method: string;
58
+ request: {
59
+ headers: {
60
+ [any: string]: string;
61
+ }[];
62
+ };
63
+ response: {
64
+ headers: {
65
+ [any: string]: string;
66
+ }[];
67
+ };
68
+ };
69
+ type TestCaseSummaryDetail = {
70
+ id: number;
71
+ test_case_id: number;
72
+ test_run_id: number;
73
+ status: string;
74
+ test_project: string;
75
+ failure_type: string;
76
+ failure_type_description: string;
77
+ last_commit_before_run_started_at: string;
78
+ merged_summary?: {
79
+ content: string;
80
+ } | null;
81
+ successful_run_metadata: {
82
+ videos: string[];
83
+ test_id?: string;
84
+ test_run_id?: string;
85
+ } | null;
86
+ failed_run_metadata: FailedRunMetadata;
87
+ visual_diff_summary: {
88
+ failedStep: string;
89
+ reason: string;
90
+ summary: string;
91
+ videos_picked_for_comparison: {
92
+ failure: string;
93
+ success: string;
94
+ };
95
+ } | null;
96
+ error_stack_summary?: {
97
+ content: string;
98
+ } | null;
99
+ network_metadata: {
100
+ failed_calls: CapturedNetworkFailure[];
101
+ failed_calls_within_time_range: CapturedNetworkFailure[];
102
+ } | null;
103
+ created_at: string;
104
+ slug: string;
105
+ failing_line: string;
106
+ };
107
+ export type GetTestRunApiResponse = {
108
+ data: {
109
+ test_run: {
110
+ project: Project;
111
+ testRun: TestRunEnriched;
112
+ flattenedSummaryDetails: TestCaseSummaryDetail[];
113
+ };
114
+ } | null;
115
+ error?: {
116
+ message: string;
117
+ };
118
+ };
119
+ export {};
120
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/tools/test-run-fetcher/types.ts"],"names":[],"mappings":"AACA,KAAK,OAAO,GAAG;IACb,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,KAAK,OAAO,GAAG;IACb,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,EAAE,MAAM,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC9B,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAAC;AAEF,KAAK,eAAe,GAAG,OAAO,GAAG;IAC/B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B,CAAC;AAEF,KAAK,iBAAiB,GAAG;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,EAAE;QACR,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;IACF,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,OAAO,CAAC;CAC1B,CAAC;AAEF,KAAK,sBAAsB,GAAG;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,iCAAiC,EAAE,MAAM,CAAC;IAC1C,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE;QACP,OAAO,EAAE;YAAE,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAAA;SAAE,EAAE,CAAC;KACtC,CAAC;IACF,QAAQ,EAAE;QACR,OAAO,EAAE;YAAE,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAAA;SAAE,EAAE,CAAC;KACtC,CAAC;CACH,CAAC;AAEF,KAAK,qBAAqB,GAAG;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,wBAAwB,EAAE,MAAM,CAAC;IACjC,iCAAiC,EAAE,MAAM,CAAC;IAC1C,cAAc,CAAC,EAAE;QACf,OAAO,EAAE,MAAM,CAAC;KACjB,GAAG,IAAI,CAAC;IACT,uBAAuB,EAAE;QACvB,MAAM,EAAE,MAAM,EAAE,CAAC;QACjB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,GAAG,IAAI,CAAC;IACT,mBAAmB,EAAE,iBAAiB,CAAC;IACvC,mBAAmB,EAAE;QACnB,UAAU,EAAE,MAAM,CAAC;QACnB,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,CAAC;QAChB,4BAA4B,EAAE;YAC5B,OAAO,EAAE,MAAM,CAAC;YAChB,OAAO,EAAE,MAAM,CAAC;SACjB,CAAC;KACH,GAAG,IAAI,CAAC;IACT,mBAAmB,CAAC,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;KACjB,GAAG,IAAI,CAAC;IACT,gBAAgB,EAAE;QAChB,YAAY,EAAE,sBAAsB,EAAE,CAAC;QACvC,8BAA8B,EAAE,sBAAsB,EAAE,CAAC;KAC1D,GAAG,IAAI,CAAC;IACT,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;CACtB,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE;QACJ,QAAQ,EAAE;YACR,OAAO,EAAE,OAAO,CAAC;YACjB,OAAO,EAAE,eAAe,CAAC;YACzB,uBAAuB,EAAE,qBAAqB,EAAE,CAAC;SAClD,CAAC;KACH,GAAG,IAAI,CAAC;IACT,KAAK,CAAC,EAAE;QACN,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CACH,CAAC"}
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.51.0",
3
+ "version": "0.51.2",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -64,6 +64,7 @@
64
64
  "minimatch": "^10.0.1",
65
65
  "nanoid": "^5.0.7",
66
66
  "openai": "4.87.3",
67
+ "ora": "^8.1.0",
67
68
  "picocolors": "^1.0.1",
68
69
  "prettier": "^3.2.5",
69
70
  "remove-markdown": "^0.5.5",
@@ -73,10 +74,10 @@
73
74
  "tsx": "^4.16.2",
74
75
  "typescript": "^5.3.3",
75
76
  "zod": "^3.23.8",
76
- "@empiricalrun/llm": "^0.11.0",
77
+ "@empiricalrun/llm": "^0.11.1",
77
78
  "@empiricalrun/r2-uploader": "^0.3.8",
78
79
  "@empiricalrun/reporter": "^0.23.2",
79
- "@empiricalrun/test-run": "^0.7.5"
80
+ "@empiricalrun/test-run": "^0.7.6"
80
81
  },
81
82
  "devDependencies": {
82
83
  "@playwright/test": "1.47.1",
@@ -1,3 +0,0 @@
1
- import type { Tool } from "./types";
2
- export declare const testRunTool: Tool;
3
- //# sourceMappingURL=test-run-fetcher.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"test-run-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/test-run-fetcher.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAUpC,eAAO,MAAM,WAAW,EAAE,IA0DzB,CAAC"}
@@ -1,59 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.testRunTool = void 0;
4
- const zod_1 = require("zod");
5
- const TestRunSchema = zod_1.z.object({
6
- testRunUrl: zod_1.z
7
- .string()
8
- .describe("The full URL of the test run (e.g. https://dash.empirical.run/sortment-tests/test-runs/20269 or with query params like ?status=failed)"),
9
- });
10
- exports.testRunTool = {
11
- schema: {
12
- name: "fetchTestRunDetails",
13
- description: "Fetch details about a test run using its URL",
14
- parameters: TestRunSchema,
15
- },
16
- execute: async (input) => {
17
- const { testRunUrl } = input;
18
- // Remove query parameters if they exist
19
- const urlWithoutParams = testRunUrl.split("?")[0] || testRunUrl;
20
- // Extract the run ID and repo name from the URL
21
- const urlParts = urlWithoutParams.split("/");
22
- const runId = urlParts.pop(); // Last part is the run ID
23
- const repoName = urlParts[urlParts.length - 2]; // Second to last part is the repo name
24
- if (!runId || !repoName) {
25
- throw new Error("Invalid test run URL - could not extract run ID or repo name");
26
- }
27
- // Make the API call to fetch test run details
28
- const response = await fetch(`https://dash.empirical.run/api/test-runs/${runId}?repo_name=${repoName}`, {
29
- method: "GET",
30
- headers: {
31
- Authorization: "weQPMWKT",
32
- },
33
- });
34
- if (!response.ok) {
35
- return {
36
- result: `Failed to fetch test run details: ${response.statusText}`,
37
- isError: true,
38
- };
39
- }
40
- const data = await response.json();
41
- // Format the response as markdown
42
- const markdownResponse = `
43
- # Test Run Details
44
-
45
- ## Run Information
46
- - **Run ID**: ${runId}
47
- - **Repository**: ${repoName}
48
-
49
- ## Test Run Data
50
- \`\`\`json
51
- ${JSON.stringify(data, null, 2)}
52
- \`\`\`
53
- `;
54
- return {
55
- result: markdownResponse,
56
- isError: false,
57
- };
58
- },
59
- };