@empiricalrun/test-gen 0.50.4 → 0.51.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.51.1
4
+
5
+ ### Patch Changes
6
+
7
+ - d04190f: fix: remove process.on listeners to avoid leaks
8
+ - 75c7921: fix: show chat usage summary on ctrl+C
9
+ - Updated dependencies [d04190f]
10
+ - @empiricalrun/test-run@0.7.6
11
+ - @empiricalrun/llm@0.11.1
12
+
13
+ ## 0.51.0
14
+
15
+ ### Minor Changes
16
+
17
+ - ac754ae: feat: enable disk persistence for chat state
18
+ - 561aa8e: feat: add usage summary (tokens, cost) for chat agent
19
+
20
+ ### Patch Changes
21
+
22
+ - 3e3d937: fix: add some validations for browser agent tool call
23
+ - Updated dependencies [ac754ae]
24
+ - Updated dependencies [561aa8e]
25
+ - @empiricalrun/llm@0.11.0
26
+
3
27
  ## 0.50.4
4
28
 
5
29
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,GACR,EAAE,iBAAiB;;;GA8EnB"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,GACR,EAAE,iBAAiB;;;GAgFnB"}
@@ -32,13 +32,14 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
32
32
  const testsDirectory = `${repoDir}/tests`;
33
33
  const isTestRunTriggeredForTeardown = teardownFileRegex.test(testFilePath);
34
34
  const teardowns = new utils_1.TeardownManager(testsDirectory);
35
- if (!isTestRunTriggeredForTeardown) {
36
- await teardowns.skipAll();
37
- }
35
+ let removeListeners;
38
36
  const command = `npx playwright test ${testFilePath} --retries 0 --project ${project} --timeout 0 --headed`;
39
37
  let isError = false;
40
38
  let error = "";
41
39
  try {
40
+ if (!isTestRunTriggeredForTeardown) {
41
+ removeListeners = await teardowns.skipAll();
42
+ }
42
43
  await (0, exec_1.cmd)(command.split(" "), {
43
44
  env: {
44
45
  APP_PORT: port.toString(),
@@ -55,7 +56,9 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
55
56
  console.error(error);
56
57
  isError = true;
57
58
  }
58
- if (!isTestRunTriggeredForTeardown) {
59
+ finally {
60
+ // Remove process listeners before unskipping files
61
+ removeListeners?.();
59
62
  teardowns.unskipAll();
60
63
  }
61
64
  // clean up the file if there is any error
@@ -25,6 +25,7 @@ export declare function injectPwLocatorGenerator(page: Page): Promise<void>;
25
25
  * @return {*} {Promise<PlaywrightTestConfig>}
26
26
  */
27
27
  export declare function readPlaywrightConfig(repoDir: string): Promise<PlaywrightTestConfig>;
28
+ export declare function getValidProjectNames(playwrightConfig: PlaywrightTestConfig): Promise<string[]>;
28
29
  /**
29
30
  * detect the project name for the given file in playwright test repo
30
31
  * if project and test file path for running test don't match, then playwright throws error
@@ -38,7 +39,7 @@ export declare class TeardownManager {
38
39
  private teardownFiles;
39
40
  private getAllTeardownFiles;
40
41
  private skipTeardownFile;
41
- skipAll(): Promise<void>;
42
+ skipAll(): Promise<() => void>;
42
43
  unskipAll(): void;
43
44
  }
44
45
  //# sourceMappingURL=utils.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,iBAkBA;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,iBAyBA;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD,wBAAsB,oBAAoB,CACxC,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,EAAE,CAAC,CAQnB;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAoBb,SAAS;CAKjB"}
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.TeardownManager = exports.detectProjectName = exports.readPlaywrightConfig = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.markTestAsOnly = exports.replaceTodoWithCreateTest = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
6
+ exports.TeardownManager = exports.detectProjectName = exports.getValidProjectNames = exports.readPlaywrightConfig = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.markTestAsOnly = exports.replaceTodoWithCreateTest = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
7
7
  const fs_extra_1 = __importDefault(require("fs-extra"));
8
8
  const minimatch_1 = require("minimatch");
9
9
  const path_1 = __importDefault(require("path"));
@@ -92,7 +92,12 @@ async function replaceTodoWithCreateTest({ testFilePath, testCaseName, testCaseS
92
92
  // This method is an alternative to prepareFileForUpdateScenario
93
93
  // TODO: Does not support multiple pages, scoped variables, updates in POM files
94
94
  const fileContent = await fs_extra_1.default.readFile(testFilePath, "utf-8");
95
- await fs_extra_1.default.writeFile(testFilePath, fileContent.replace(/\/\/ TODO\(agent\): (.*)/, (_, todoText) => `await createTest("${todoText.replace(/"/g, '\\"')}", page);`));
95
+ const todoRegex = /\/\/ TODO\(agent\): (.*)/;
96
+ const todoMatch = fileContent.match(todoRegex);
97
+ if (!todoMatch) {
98
+ throw new Error(`No "// TODO(agent):" comment found in file: ${testFilePath}`);
99
+ }
100
+ await fs_extra_1.default.writeFile(testFilePath, fileContent.replace(todoRegex, (_, todoText) => `await createTest("${todoText.replace(/"/g, '\\"')}", page);`));
96
101
  await addImportForCreateTest(testFilePath);
97
102
  await markTestAsOnly({
98
103
  testCaseName,
@@ -302,6 +307,16 @@ function matchAgainstPattern(pattern, filePathToTest) {
302
307
  return (0, minimatch_1.minimatch)(filePathToTest, pattern);
303
308
  }
304
309
  }
310
+ async function getValidProjectNames(playwrightConfig) {
311
+ if (!playwrightConfig.projects) {
312
+ return [];
313
+ }
314
+ const filteredProjectNames = playwrightConfig.projects
315
+ .map((p) => p.name)
316
+ .filter((p) => !!p);
317
+ return filteredProjectNames;
318
+ }
319
+ exports.getValidProjectNames = getValidProjectNames;
305
320
  /**
306
321
  * detect the project name for the given file in playwright test repo
307
322
  * if project and test file path for running test don't match, then playwright throws error
@@ -385,18 +400,14 @@ class TeardownManager {
385
400
  async skipAll() {
386
401
  this.teardownFiles = await this.getAllTeardownFiles();
387
402
  await Promise.all(this.teardownFiles.map(async ({ filePath }) => await this.skipTeardownFile(filePath)));
388
- process.on("beforeExit", () => {
389
- this.unskipAll();
390
- });
391
- process.on("exit", () => {
392
- this.unskipAll();
393
- });
394
- process.on("SIGINT", () => {
395
- this.unskipAll();
396
- });
397
- process.on("SIGTERM", () => {
398
- this.unskipAll();
399
- });
403
+ const setupProcessListeners = (cleanup) => {
404
+ const events = ["beforeExit", "exit", "SIGINT", "SIGTERM"];
405
+ events.forEach((event) => process.on(event, cleanup));
406
+ return () => {
407
+ events.forEach((event) => process.removeListener(event, cleanup));
408
+ };
409
+ };
410
+ return setupProcessListeners(this.unskipAll.bind(this));
400
411
  }
401
412
  unskipAll() {
402
413
  this.teardownFiles.forEach(({ filePath, content }) => {
@@ -1,8 +1,9 @@
1
- import { TraceClient } from "@empiricalrun/llm";
2
- import type { Anthropic } from "@empiricalrun/llm/claude";
3
- export declare function chatAgent({ prompt, chatModel, }: {
4
- prompt: string;
5
- trace?: TraceClient;
1
+ export declare function chatAgent({ chatModel, useDiskForChatState, }: {
6
2
  chatModel?: "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022";
7
- }): Promise<Anthropic.Messages.MessageParam[]>;
3
+ useDiskForChatState?: boolean;
4
+ }): Promise<{
5
+ input: number;
6
+ output: number;
7
+ cost: number;
8
+ }>;
8
9
  //# sourceMappingURL=chat.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AA8E1D,wBAAsB,SAAS,CAAC,EAC9B,MAAM,EACN,SAAwC,GACzC,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,4BAA4B,GAAG,4BAA4B,CAAC;CACzE,8CAiEA"}
1
+ {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AA8EA,wBAAsB,SAAS,CAAC,EAC9B,SAAwC,EACxC,mBAAmB,GACpB,EAAE;IACD,SAAS,CAAC,EAAE,4BAA4B,GAAG,4BAA4B,CAAC;IACxE,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B;;;;GA6FA"}
@@ -71,12 +71,48 @@ const toolExecutors = {
71
71
  ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
72
72
  str_replace_editor: claude_1.strReplaceEditorTool,
73
73
  };
74
- async function chatAgent({ prompt, chatModel = "claude-3-7-sonnet-20250219", }) {
75
- let userPrompt = prompt;
76
- let chatState = new claude_1.ChatState();
77
- chatState.pushTextMessage({ message: { role: "user", content: userPrompt } });
78
- let shouldAskUserForInput = false;
79
- while (!userPrompt.toLowerCase().includes("stop")) {
74
+ async function chatAgent({ chatModel = "claude-3-7-sonnet-20250219", useDiskForChatState, }) {
75
+ let userPrompt = undefined;
76
+ let chatState = useDiskForChatState ? claude_1.ChatState.load() : new claude_1.ChatState(false);
77
+ if (chatState.askUserForInput) {
78
+ // Show last message to the user for context when we loaded from disk
79
+ const messages = chatState.messages;
80
+ const lastMessage = messages[messages.length - 1];
81
+ if (lastMessage && Array.isArray(lastMessage.content)) {
82
+ const textContent = lastMessage.content.find((b) => b.type === "text");
83
+ if (textContent) {
84
+ const role = lastMessage.role.charAt(0).toUpperCase() + lastMessage.role.slice(1);
85
+ console.log(`${role}: ${textContent.text}`);
86
+ }
87
+ }
88
+ }
89
+ while (!userPrompt?.toLowerCase().includes("stop")) {
90
+ chatState.saveToDisk();
91
+ if (chatState.askUserForInput) {
92
+ try {
93
+ userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
94
+ message: "User:",
95
+ });
96
+ }
97
+ catch (e) {
98
+ // https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
99
+ if (e instanceof Error && e.name === "ExitPromptError") {
100
+ console.log("Exiting. Usage summary:", chatState.getUsageSummary());
101
+ process.exit(0);
102
+ }
103
+ throw e;
104
+ }
105
+ chatState.pushMessage({
106
+ role: "user",
107
+ content: [
108
+ {
109
+ type: "text",
110
+ text: userPrompt,
111
+ },
112
+ ],
113
+ });
114
+ continue;
115
+ }
80
116
  const toolUse = chatState.getPendingToolCall();
81
117
  if (toolUse) {
82
118
  console.log("Executing tool:", toolUse.name, "with args:", toolUse.input);
@@ -85,26 +121,22 @@ async function chatAgent({ prompt, chatModel = "claude-3-7-sonnet-20250219", })
85
121
  throw new Error(`Tool ${toolUse.name} not found`);
86
122
  }
87
123
  const toolResult = await toolExecutor(toolUse.input);
88
- chatState.pushToolResultToMessages({
89
- toolCall: toolUse,
90
- isError: toolResult.isError,
91
- result: toolResult.result,
124
+ chatState.pushMessage({
125
+ role: "user",
126
+ content: [
127
+ {
128
+ type: "tool_result",
129
+ tool_use_id: toolUse.id,
130
+ content: toolResult.result,
131
+ is_error: toolResult.isError,
132
+ },
133
+ ],
92
134
  });
93
135
  continue;
94
136
  }
95
- if (shouldAskUserForInput) {
96
- userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
97
- message: "Your response?",
98
- });
99
- chatState.pushTextMessage({
100
- message: { role: "user", content: userPrompt },
101
- });
102
- shouldAskUserForInput = false;
103
- continue;
104
- }
105
- const response = await (0, claude_1.createChatCompletion)({
137
+ const response = await (0, claude_1.createClaudeMessage)({
106
138
  systemPrompt,
107
- messages: chatState.getMessages(),
139
+ messages: chatState.getMessagesForCreateCompletion(),
108
140
  tools: tools.map((tool) => (0, claude_1.convertOpenAISchemaToAnthropic)((0, zod_schema_1.zodToOpenAITool)(tool.schema))),
109
141
  model: chatModel,
110
142
  withStrReplaceEditor: true,
@@ -112,19 +144,14 @@ async function chatAgent({ prompt, chatModel = "claude-3-7-sonnet-20250219", })
112
144
  if (!response) {
113
145
  throw new Error("No response from LLM");
114
146
  }
115
- chatState.pushTextMessage({
116
- message: { role: "assistant", content: response.content },
117
- });
147
+ chatState.pushMessage(response);
118
148
  const textBlock = response.content.find((b) => b.type === "text");
119
- const toolUseBlock = response.content.find((b) => b.type === "tool_use");
120
- console.log("Assistant response:", textBlock?.text);
121
- if (toolUseBlock) {
122
- chatState.addPendingToolCall({ toolCall: toolUseBlock });
123
- }
124
- else {
125
- shouldAskUserForInput = true;
149
+ if (textBlock) {
150
+ console.log("Assistant:", textBlock.text);
126
151
  }
127
152
  }
128
- return chatState.getMessages();
153
+ const usageSummary = chatState.getUsageSummary();
154
+ console.log("Usage summary:", usageSummary);
155
+ return usageSummary;
129
156
  }
130
157
  exports.chatAgent = chatAgent;
package/dist/bin/index.js CHANGED
@@ -28,11 +28,14 @@ dotenv_1.default.config({
28
28
  const flushEvents = async () => {
29
29
  await (0, llm_1.flushAllTraces)();
30
30
  };
31
- process.on("beforeExit", async () => await flushEvents());
32
- process.on("exit", async () => await flushEvents());
33
- process.on("SIGINT", async () => await flushEvents());
34
- process.on("SIGTERM", async () => await flushEvents());
35
- async function runChatAgent(prompt, modelInput) {
31
+ function setupProcessListeners(cleanup) {
32
+ const events = ["beforeExit", "exit", "SIGINT", "SIGTERM"];
33
+ events.forEach((event) => process.on(event, cleanup));
34
+ return () => {
35
+ events.forEach((event) => process.removeListener(event, cleanup));
36
+ };
37
+ }
38
+ async function runChatAgent(modelInput, useDiskForChatState) {
36
39
  const MODEL_MAPPING = {
37
40
  "claude-3-7": "claude-3-7-sonnet-20250219",
38
41
  "3-7": "claude-3-7-sonnet-20250219",
@@ -43,8 +46,8 @@ async function runChatAgent(prompt, modelInput) {
43
46
  throw new Error(`Invalid chat model: ${modelInput}`);
44
47
  }
45
48
  return await (0, chat_1.chatAgent)({
46
- prompt,
47
49
  chatModel: modelInput ? MODEL_MAPPING[modelInput] : undefined,
50
+ useDiskForChatState,
48
51
  });
49
52
  }
50
53
  async function runAgentsWorkflow(testGenConfig, testGenToken) {
@@ -172,6 +175,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
172
175
  return agent;
173
176
  }
174
177
  (async function main() {
178
+ const removeListeners = setupProcessListeners(flushEvents);
175
179
  console.log(`Running test-gen v${require("../../package.json").version} from ${__dirname}`);
176
180
  const program = new commander_1.Command();
177
181
  program
@@ -181,6 +185,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
181
185
  .option("--file <test-file>", "File path of the test case (inside tests dir)")
182
186
  .option("--suites <suites>", "Comma separated list of describe blocks")
183
187
  .option("--use-chat", "Use chat agent (and not the workflow)")
188
+ .option("--use-disk-for-chat-state", "Save and load chat state from disk")
184
189
  .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022)")
185
190
  .parse(process.argv);
186
191
  const options = program.opts();
@@ -202,18 +207,16 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
202
207
  generationId: testGenConfig.options?.metadata.generationId,
203
208
  projectRepoName: testGenConfig.options?.metadata.projectRepoName,
204
209
  });
205
- let testGenFailed = false;
206
- let agentUsed;
207
210
  // Download the build if repo has a download script
208
211
  await (0, test_build_1.downloadBuild)(testGenConfig.build || {});
212
+ if (completedOptions.useChat) {
213
+ await runChatAgent(completedOptions.chatModel, completedOptions.useDiskForChatState);
214
+ return;
215
+ }
216
+ let agentUsed;
217
+ let testGenFailed = false;
209
218
  try {
210
- if (completedOptions.useChat) {
211
- await runChatAgent(completedOptions.prompt, completedOptions.chatModel);
212
- return;
213
- }
214
- else {
215
- agentUsed = await runAgentsWorkflow(testGenConfig, testGenToken);
216
- }
219
+ agentUsed = await runAgentsWorkflow(testGenConfig, testGenToken);
217
220
  }
218
221
  catch (e) {
219
222
  testGenFailed = true;
@@ -229,7 +232,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
229
232
  testName: testGenConfig.testCase.name,
230
233
  });
231
234
  }
232
- // TODO: move these reporters to a better lifecycle
235
+ removeListeners();
233
236
  await (0, llm_1.flushAllTraces)();
234
237
  await (0, logger_1.waitForLogsToFlush)();
235
238
  await (0, session_1.endSession)();
@@ -5,6 +5,7 @@ export interface CliOptions {
5
5
  prompt?: string;
6
6
  suites?: string;
7
7
  useChat?: boolean;
8
+ useDiskForChatState?: boolean;
8
9
  chatModel?: "claude-3-7" | "3-7" | "claude-3-5" | "3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022";
9
10
  }
10
11
  export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,CAAC;CAClC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,CAAC;CAClC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}
@@ -12,9 +12,9 @@ async function validateAndCompleteCliOptions(options) {
12
12
  return options;
13
13
  }
14
14
  let requiredFields = ["name", "file", "prompt"];
15
- // For new chat flow in local CLI usage, only prompt is required
16
15
  if (options.useChat) {
17
- requiredFields = ["prompt"];
16
+ // Chat agent can prompt the user directly, nothing is required in CLI args
17
+ requiredFields = [];
18
18
  }
19
19
  const questions = [];
20
20
  if (!options.name && requiredFields.includes("name")) {
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAYpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAiC3E"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAepC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAyC3E"}
package/dist/index.js CHANGED
@@ -14,42 +14,54 @@ const pw_test_1 = require("./utils/pw-test");
14
14
  const flushEvents = async () => {
15
15
  await (0, llm_1.flushAllTraces)();
16
16
  };
17
- process.on("beforeExit", async () => await flushEvents());
18
- process.on("exit", async () => await flushEvents());
19
- process.on("SIGINT", async () => await flushEvents());
20
- process.on("SIGTERM", async () => await flushEvents());
17
+ function setupProcessListeners(cleanup) {
18
+ const events = ["beforeExit", "exit", "SIGINT", "SIGTERM"];
19
+ events.forEach((event) => process.on(event, cleanup));
20
+ return () => {
21
+ events.forEach((event) => process.removeListener(event, cleanup));
22
+ };
23
+ }
21
24
  async function createTest(task, page, scope) {
22
- const testConfigArg = process.env.TEST_GEN_TOKEN;
23
- const testGenConfig = (0, scenarios_1.loadTestConfigs)(testConfigArg);
24
- (0, reporter_1.setReporterConfig)({
25
- projectRepoName: testGenConfig.options?.metadata.projectRepoName,
26
- testSessionId: testGenConfig.options?.metadata.testSessionId,
27
- generationId: testGenConfig.options?.metadata.generationId,
28
- });
29
- (0, session_1.setSessionDetails)({
30
- sessionId: testGenConfig.options?.metadata.testSessionId,
31
- generationId: testGenConfig.options?.metadata.generationId,
32
- testCaseId: testGenConfig.testCase.id,
33
- projectRepoName: testGenConfig.options?.metadata.projectRepoName,
34
- });
35
- const fileService = new client_1.default();
36
- const { testCase, specPath } = testGenConfig;
37
- const { code, importPaths } = await (0, run_1.createTestUsingMasterAgent)({
38
- testCase,
39
- specPath,
40
- page,
41
- task,
42
- options: {
43
- ...testGenConfig.options,
44
- },
45
- scopeVars: scope,
46
- });
47
- await fileService.updateTest({
48
- task,
49
- generatedCode: code,
50
- importPaths,
51
- });
52
- // skip the rest of the test once generation is over
53
- await (0, pw_test_1.skipTest)();
25
+ const removeListeners = setupProcessListeners(flushEvents);
26
+ try {
27
+ const testConfigArg = process.env.TEST_GEN_TOKEN;
28
+ const testGenConfig = (0, scenarios_1.loadTestConfigs)(testConfigArg);
29
+ (0, reporter_1.setReporterConfig)({
30
+ projectRepoName: testGenConfig.options?.metadata.projectRepoName,
31
+ testSessionId: testGenConfig.options?.metadata.testSessionId,
32
+ generationId: testGenConfig.options?.metadata.generationId,
33
+ });
34
+ (0, session_1.setSessionDetails)({
35
+ sessionId: testGenConfig.options?.metadata.testSessionId,
36
+ generationId: testGenConfig.options?.metadata.generationId,
37
+ testCaseId: testGenConfig.testCase.id,
38
+ projectRepoName: testGenConfig.options?.metadata.projectRepoName,
39
+ });
40
+ const fileService = new client_1.default();
41
+ const { testCase, specPath } = testGenConfig;
42
+ const { code, importPaths } = await (0, run_1.createTestUsingMasterAgent)({
43
+ testCase,
44
+ specPath,
45
+ page,
46
+ task,
47
+ options: {
48
+ ...testGenConfig.options,
49
+ },
50
+ scopeVars: scope,
51
+ });
52
+ await fileService.updateTest({
53
+ task,
54
+ generatedCode: code,
55
+ importPaths,
56
+ });
57
+ // skip the rest of the test once generation is over
58
+ await (0, pw_test_1.skipTest)();
59
+ }
60
+ finally {
61
+ // Ensure listeners are removed even if an error occurs
62
+ removeListeners();
63
+ // Flush events one final time before removing listeners
64
+ await flushEvents();
65
+ }
54
66
  }
55
67
  exports.createTest = createTest;
@@ -1 +1 @@
1
- {"version":3,"file":"browser-agent.d.ts","sourceRoot":"","sources":["../../src/tools/browser-agent.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAmDpC,eAAO,MAAM,gBAAgB,EAAE,IA6C9B,CAAC"}
1
+ {"version":3,"file":"browser-agent.d.ts","sourceRoot":"","sources":["../../src/tools/browser-agent.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAmDpC,eAAO,MAAM,gBAAgB,EAAE,IA4D9B,CAAC"}
@@ -57,11 +57,27 @@ exports.browserAgentTool = {
57
57
  },
58
58
  execute: async (input) => {
59
59
  const { testName, testSuites, fileName, changeToMake, project } = input;
60
- await (0, utils_1.replaceTodoWithCreateTest)({
61
- testCaseName: testName,
62
- testCaseSuites: testSuites,
63
- testFilePath: fileName,
64
- });
60
+ try {
61
+ await (0, utils_1.replaceTodoWithCreateTest)({
62
+ testCaseName: testName,
63
+ testCaseSuites: testSuites,
64
+ testFilePath: fileName,
65
+ });
66
+ }
67
+ catch (error) {
68
+ return {
69
+ isError: true,
70
+ result: `Error running tool: ${error}`,
71
+ };
72
+ }
73
+ const playwrightConfig = await (0, utils_1.readPlaywrightConfig)(process.cwd());
74
+ const validProjectNames = await (0, utils_1.getValidProjectNames)(playwrightConfig);
75
+ if (!validProjectNames.includes(project)) {
76
+ return {
77
+ isError: true,
78
+ result: `Invalid project name: ${project}. Valid project names are: ${validProjectNames.join(", ")}`,
79
+ };
80
+ }
65
81
  const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
66
82
  testFilePath: fileName,
67
83
  filePathToUpdate: fileName,
@@ -1 +1 @@
1
- {"version":3,"file":"repo-tree.d.ts","sourceRoot":"","sources":["../../src/utils/repo-tree.ts"],"names":[],"mappings":"AAGA,eAAO,MAAM,eAAe,qBAO3B,CAAC;AAEF,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,KAAK,UAsE9D"}
1
+ {"version":3,"file":"repo-tree.d.ts","sourceRoot":"","sources":["../../src/utils/repo-tree.ts"],"names":[],"mappings":"AAGA,eAAO,MAAM,eAAe,qBAQ3B,CAAC;AAEF,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,KAAK,UAsE9D"}
@@ -13,6 +13,7 @@ exports.DEFAULT_EXCLUDE = [
13
13
  /\.git/,
14
14
  ".DS_Store",
15
15
  "playwright-report",
16
+ ".empiricalrun",
16
17
  ];
17
18
  function generateAsciiTree(dirPath, options = {}) {
18
19
  const defaultOptions = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.50.4",
3
+ "version": "0.51.1",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -73,10 +73,10 @@
73
73
  "tsx": "^4.16.2",
74
74
  "typescript": "^5.3.3",
75
75
  "zod": "^3.23.8",
76
- "@empiricalrun/llm": "^0.10.3",
76
+ "@empiricalrun/llm": "^0.11.1",
77
77
  "@empiricalrun/r2-uploader": "^0.3.8",
78
78
  "@empiricalrun/reporter": "^0.23.2",
79
- "@empiricalrun/test-run": "^0.7.5"
79
+ "@empiricalrun/test-run": "^0.7.6"
80
80
  },
81
81
  "devDependencies": {
82
82
  "@playwright/test": "1.47.1",