@empiricalrun/test-gen 0.50.4 → 0.51.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/dist/agent/browsing/run.d.ts.map +1 -1
- package/dist/agent/browsing/run.js +7 -4
- package/dist/agent/browsing/utils.d.ts +2 -1
- package/dist/agent/browsing/utils.d.ts.map +1 -1
- package/dist/agent/browsing/utils.js +25 -14
- package/dist/agent/chat.d.ts +7 -6
- package/dist/agent/chat.d.ts.map +1 -1
- package/dist/agent/chat.js +60 -33
- package/dist/bin/index.js +19 -16
- package/dist/bin/utils/index.d.ts +1 -0
- package/dist/bin/utils/index.d.ts.map +1 -1
- package/dist/bin/utils/index.js +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +48 -36
- package/dist/tools/browser-agent.d.ts.map +1 -1
- package/dist/tools/browser-agent.js +21 -5
- package/dist/utils/repo-tree.d.ts.map +1 -1
- package/dist/utils/repo-tree.js +1 -0
- package/package.json +3 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,29 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.51.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- d04190f: fix: remove process.on listeners to avoid leaks
|
|
8
|
+
- 75c7921: fix: show chat usage summary on ctrl+C
|
|
9
|
+
- Updated dependencies [d04190f]
|
|
10
|
+
- @empiricalrun/test-run@0.7.6
|
|
11
|
+
- @empiricalrun/llm@0.11.1
|
|
12
|
+
|
|
13
|
+
## 0.51.0
|
|
14
|
+
|
|
15
|
+
### Minor Changes
|
|
16
|
+
|
|
17
|
+
- ac754ae: feat: enable disk persistence for chat state
|
|
18
|
+
- 561aa8e: feat: add usage summary (tokens, cost) for chat agent
|
|
19
|
+
|
|
20
|
+
### Patch Changes
|
|
21
|
+
|
|
22
|
+
- 3e3d937: fix: add some validations for browser agent tool call
|
|
23
|
+
- Updated dependencies [ac754ae]
|
|
24
|
+
- Updated dependencies [561aa8e]
|
|
25
|
+
- @empiricalrun/llm@0.11.0
|
|
26
|
+
|
|
3
27
|
## 0.50.4
|
|
4
28
|
|
|
5
29
|
### Patch Changes
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,GACR,EAAE,iBAAiB;;;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,GACR,EAAE,iBAAiB;;;GAgFnB"}
|
|
@@ -32,13 +32,14 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
|
|
|
32
32
|
const testsDirectory = `${repoDir}/tests`;
|
|
33
33
|
const isTestRunTriggeredForTeardown = teardownFileRegex.test(testFilePath);
|
|
34
34
|
const teardowns = new utils_1.TeardownManager(testsDirectory);
|
|
35
|
-
|
|
36
|
-
await teardowns.skipAll();
|
|
37
|
-
}
|
|
35
|
+
let removeListeners;
|
|
38
36
|
const command = `npx playwright test ${testFilePath} --retries 0 --project ${project} --timeout 0 --headed`;
|
|
39
37
|
let isError = false;
|
|
40
38
|
let error = "";
|
|
41
39
|
try {
|
|
40
|
+
if (!isTestRunTriggeredForTeardown) {
|
|
41
|
+
removeListeners = await teardowns.skipAll();
|
|
42
|
+
}
|
|
42
43
|
await (0, exec_1.cmd)(command.split(" "), {
|
|
43
44
|
env: {
|
|
44
45
|
APP_PORT: port.toString(),
|
|
@@ -55,7 +56,9 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
|
|
|
55
56
|
console.error(error);
|
|
56
57
|
isError = true;
|
|
57
58
|
}
|
|
58
|
-
|
|
59
|
+
finally {
|
|
60
|
+
// Remove process listeners before unskipping files
|
|
61
|
+
removeListeners?.();
|
|
59
62
|
teardowns.unskipAll();
|
|
60
63
|
}
|
|
61
64
|
// clean up the file if there is any error
|
|
@@ -25,6 +25,7 @@ export declare function injectPwLocatorGenerator(page: Page): Promise<void>;
|
|
|
25
25
|
* @return {*} {Promise<PlaywrightTestConfig>}
|
|
26
26
|
*/
|
|
27
27
|
export declare function readPlaywrightConfig(repoDir: string): Promise<PlaywrightTestConfig>;
|
|
28
|
+
export declare function getValidProjectNames(playwrightConfig: PlaywrightTestConfig): Promise<string[]>;
|
|
28
29
|
/**
|
|
29
30
|
* detect the project name for the given file in playwright test repo
|
|
30
31
|
* if project and test file path for running test don't match, then playwright throws error
|
|
@@ -38,7 +39,7 @@ export declare class TeardownManager {
|
|
|
38
39
|
private teardownFiles;
|
|
39
40
|
private getAllTeardownFiles;
|
|
40
41
|
private skipTeardownFile;
|
|
41
|
-
skipAll(): Promise<void>;
|
|
42
|
+
skipAll(): Promise<() => void>;
|
|
42
43
|
unskipAll(): void;
|
|
43
44
|
}
|
|
44
45
|
//# sourceMappingURL=utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,iBAyBA;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD,wBAAsB,oBAAoB,CACxC,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,EAAE,CAAC,CAQnB;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAoBb,SAAS;CAKjB"}
|
|
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.TeardownManager = exports.detectProjectName = exports.readPlaywrightConfig = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.markTestAsOnly = exports.replaceTodoWithCreateTest = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
|
|
6
|
+
exports.TeardownManager = exports.detectProjectName = exports.getValidProjectNames = exports.readPlaywrightConfig = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.markTestAsOnly = exports.replaceTodoWithCreateTest = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
|
|
7
7
|
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
8
8
|
const minimatch_1 = require("minimatch");
|
|
9
9
|
const path_1 = __importDefault(require("path"));
|
|
@@ -92,7 +92,12 @@ async function replaceTodoWithCreateTest({ testFilePath, testCaseName, testCaseS
|
|
|
92
92
|
// This method is an alternative to prepareFileForUpdateScenario
|
|
93
93
|
// TODO: Does not support multiple pages, scoped variables, updates in POM files
|
|
94
94
|
const fileContent = await fs_extra_1.default.readFile(testFilePath, "utf-8");
|
|
95
|
-
|
|
95
|
+
const todoRegex = /\/\/ TODO\(agent\): (.*)/;
|
|
96
|
+
const todoMatch = fileContent.match(todoRegex);
|
|
97
|
+
if (!todoMatch) {
|
|
98
|
+
throw new Error(`No "// TODO(agent):" comment found in file: ${testFilePath}`);
|
|
99
|
+
}
|
|
100
|
+
await fs_extra_1.default.writeFile(testFilePath, fileContent.replace(todoRegex, (_, todoText) => `await createTest("${todoText.replace(/"/g, '\\"')}", page);`));
|
|
96
101
|
await addImportForCreateTest(testFilePath);
|
|
97
102
|
await markTestAsOnly({
|
|
98
103
|
testCaseName,
|
|
@@ -302,6 +307,16 @@ function matchAgainstPattern(pattern, filePathToTest) {
|
|
|
302
307
|
return (0, minimatch_1.minimatch)(filePathToTest, pattern);
|
|
303
308
|
}
|
|
304
309
|
}
|
|
310
|
+
async function getValidProjectNames(playwrightConfig) {
|
|
311
|
+
if (!playwrightConfig.projects) {
|
|
312
|
+
return [];
|
|
313
|
+
}
|
|
314
|
+
const filteredProjectNames = playwrightConfig.projects
|
|
315
|
+
.map((p) => p.name)
|
|
316
|
+
.filter((p) => !!p);
|
|
317
|
+
return filteredProjectNames;
|
|
318
|
+
}
|
|
319
|
+
exports.getValidProjectNames = getValidProjectNames;
|
|
305
320
|
/**
|
|
306
321
|
* detect the project name for the given file in playwright test repo
|
|
307
322
|
* if project and test file path for running test don't match, then playwright throws error
|
|
@@ -385,18 +400,14 @@ class TeardownManager {
|
|
|
385
400
|
async skipAll() {
|
|
386
401
|
this.teardownFiles = await this.getAllTeardownFiles();
|
|
387
402
|
await Promise.all(this.teardownFiles.map(async ({ filePath }) => await this.skipTeardownFile(filePath)));
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
});
|
|
397
|
-
process.on("SIGTERM", () => {
|
|
398
|
-
this.unskipAll();
|
|
399
|
-
});
|
|
403
|
+
const setupProcessListeners = (cleanup) => {
|
|
404
|
+
const events = ["beforeExit", "exit", "SIGINT", "SIGTERM"];
|
|
405
|
+
events.forEach((event) => process.on(event, cleanup));
|
|
406
|
+
return () => {
|
|
407
|
+
events.forEach((event) => process.removeListener(event, cleanup));
|
|
408
|
+
};
|
|
409
|
+
};
|
|
410
|
+
return setupProcessListeners(this.unskipAll.bind(this));
|
|
400
411
|
}
|
|
401
412
|
unskipAll() {
|
|
402
413
|
this.teardownFiles.forEach(({ filePath, content }) => {
|
package/dist/agent/chat.d.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
import type { Anthropic } from "@empiricalrun/llm/claude";
|
|
3
|
-
export declare function chatAgent({ prompt, chatModel, }: {
|
|
4
|
-
prompt: string;
|
|
5
|
-
trace?: TraceClient;
|
|
1
|
+
export declare function chatAgent({ chatModel, useDiskForChatState, }: {
|
|
6
2
|
chatModel?: "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022";
|
|
7
|
-
|
|
3
|
+
useDiskForChatState?: boolean;
|
|
4
|
+
}): Promise<{
|
|
5
|
+
input: number;
|
|
6
|
+
output: number;
|
|
7
|
+
cost: number;
|
|
8
|
+
}>;
|
|
8
9
|
//# sourceMappingURL=chat.d.ts.map
|
package/dist/agent/chat.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AA8EA,wBAAsB,SAAS,CAAC,EAC9B,SAAwC,EACxC,mBAAmB,GACpB,EAAE;IACD,SAAS,CAAC,EAAE,4BAA4B,GAAG,4BAA4B,CAAC;IACxE,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B;;;;GA6FA"}
|
package/dist/agent/chat.js
CHANGED
|
@@ -71,12 +71,48 @@ const toolExecutors = {
|
|
|
71
71
|
...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
|
|
72
72
|
str_replace_editor: claude_1.strReplaceEditorTool,
|
|
73
73
|
};
|
|
74
|
-
async function chatAgent({
|
|
75
|
-
let userPrompt =
|
|
76
|
-
let chatState = new claude_1.ChatState();
|
|
77
|
-
chatState.
|
|
78
|
-
|
|
79
|
-
|
|
74
|
+
async function chatAgent({ chatModel = "claude-3-7-sonnet-20250219", useDiskForChatState, }) {
|
|
75
|
+
let userPrompt = undefined;
|
|
76
|
+
let chatState = useDiskForChatState ? claude_1.ChatState.load() : new claude_1.ChatState(false);
|
|
77
|
+
if (chatState.askUserForInput) {
|
|
78
|
+
// Show last message to the user for context when we loaded from disk
|
|
79
|
+
const messages = chatState.messages;
|
|
80
|
+
const lastMessage = messages[messages.length - 1];
|
|
81
|
+
if (lastMessage && Array.isArray(lastMessage.content)) {
|
|
82
|
+
const textContent = lastMessage.content.find((b) => b.type === "text");
|
|
83
|
+
if (textContent) {
|
|
84
|
+
const role = lastMessage.role.charAt(0).toUpperCase() + lastMessage.role.slice(1);
|
|
85
|
+
console.log(`${role}: ${textContent.text}`);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
while (!userPrompt?.toLowerCase().includes("stop")) {
|
|
90
|
+
chatState.saveToDisk();
|
|
91
|
+
if (chatState.askUserForInput) {
|
|
92
|
+
try {
|
|
93
|
+
userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
|
|
94
|
+
message: "User:",
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
catch (e) {
|
|
98
|
+
// https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
|
|
99
|
+
if (e instanceof Error && e.name === "ExitPromptError") {
|
|
100
|
+
console.log("Exiting. Usage summary:", chatState.getUsageSummary());
|
|
101
|
+
process.exit(0);
|
|
102
|
+
}
|
|
103
|
+
throw e;
|
|
104
|
+
}
|
|
105
|
+
chatState.pushMessage({
|
|
106
|
+
role: "user",
|
|
107
|
+
content: [
|
|
108
|
+
{
|
|
109
|
+
type: "text",
|
|
110
|
+
text: userPrompt,
|
|
111
|
+
},
|
|
112
|
+
],
|
|
113
|
+
});
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
80
116
|
const toolUse = chatState.getPendingToolCall();
|
|
81
117
|
if (toolUse) {
|
|
82
118
|
console.log("Executing tool:", toolUse.name, "with args:", toolUse.input);
|
|
@@ -85,26 +121,22 @@ async function chatAgent({ prompt, chatModel = "claude-3-7-sonnet-20250219", })
|
|
|
85
121
|
throw new Error(`Tool ${toolUse.name} not found`);
|
|
86
122
|
}
|
|
87
123
|
const toolResult = await toolExecutor(toolUse.input);
|
|
88
|
-
chatState.
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
124
|
+
chatState.pushMessage({
|
|
125
|
+
role: "user",
|
|
126
|
+
content: [
|
|
127
|
+
{
|
|
128
|
+
type: "tool_result",
|
|
129
|
+
tool_use_id: toolUse.id,
|
|
130
|
+
content: toolResult.result,
|
|
131
|
+
is_error: toolResult.isError,
|
|
132
|
+
},
|
|
133
|
+
],
|
|
92
134
|
});
|
|
93
135
|
continue;
|
|
94
136
|
}
|
|
95
|
-
|
|
96
|
-
userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
|
|
97
|
-
message: "Your response?",
|
|
98
|
-
});
|
|
99
|
-
chatState.pushTextMessage({
|
|
100
|
-
message: { role: "user", content: userPrompt },
|
|
101
|
-
});
|
|
102
|
-
shouldAskUserForInput = false;
|
|
103
|
-
continue;
|
|
104
|
-
}
|
|
105
|
-
const response = await (0, claude_1.createChatCompletion)({
|
|
137
|
+
const response = await (0, claude_1.createClaudeMessage)({
|
|
106
138
|
systemPrompt,
|
|
107
|
-
messages: chatState.
|
|
139
|
+
messages: chatState.getMessagesForCreateCompletion(),
|
|
108
140
|
tools: tools.map((tool) => (0, claude_1.convertOpenAISchemaToAnthropic)((0, zod_schema_1.zodToOpenAITool)(tool.schema))),
|
|
109
141
|
model: chatModel,
|
|
110
142
|
withStrReplaceEditor: true,
|
|
@@ -112,19 +144,14 @@ async function chatAgent({ prompt, chatModel = "claude-3-7-sonnet-20250219", })
|
|
|
112
144
|
if (!response) {
|
|
113
145
|
throw new Error("No response from LLM");
|
|
114
146
|
}
|
|
115
|
-
chatState.
|
|
116
|
-
message: { role: "assistant", content: response.content },
|
|
117
|
-
});
|
|
147
|
+
chatState.pushMessage(response);
|
|
118
148
|
const textBlock = response.content.find((b) => b.type === "text");
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
if (toolUseBlock) {
|
|
122
|
-
chatState.addPendingToolCall({ toolCall: toolUseBlock });
|
|
123
|
-
}
|
|
124
|
-
else {
|
|
125
|
-
shouldAskUserForInput = true;
|
|
149
|
+
if (textBlock) {
|
|
150
|
+
console.log("Assistant:", textBlock.text);
|
|
126
151
|
}
|
|
127
152
|
}
|
|
128
|
-
|
|
153
|
+
const usageSummary = chatState.getUsageSummary();
|
|
154
|
+
console.log("Usage summary:", usageSummary);
|
|
155
|
+
return usageSummary;
|
|
129
156
|
}
|
|
130
157
|
exports.chatAgent = chatAgent;
|
package/dist/bin/index.js
CHANGED
|
@@ -28,11 +28,14 @@ dotenv_1.default.config({
|
|
|
28
28
|
const flushEvents = async () => {
|
|
29
29
|
await (0, llm_1.flushAllTraces)();
|
|
30
30
|
};
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
31
|
+
function setupProcessListeners(cleanup) {
|
|
32
|
+
const events = ["beforeExit", "exit", "SIGINT", "SIGTERM"];
|
|
33
|
+
events.forEach((event) => process.on(event, cleanup));
|
|
34
|
+
return () => {
|
|
35
|
+
events.forEach((event) => process.removeListener(event, cleanup));
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
async function runChatAgent(modelInput, useDiskForChatState) {
|
|
36
39
|
const MODEL_MAPPING = {
|
|
37
40
|
"claude-3-7": "claude-3-7-sonnet-20250219",
|
|
38
41
|
"3-7": "claude-3-7-sonnet-20250219",
|
|
@@ -43,8 +46,8 @@ async function runChatAgent(prompt, modelInput) {
|
|
|
43
46
|
throw new Error(`Invalid chat model: ${modelInput}`);
|
|
44
47
|
}
|
|
45
48
|
return await (0, chat_1.chatAgent)({
|
|
46
|
-
prompt,
|
|
47
49
|
chatModel: modelInput ? MODEL_MAPPING[modelInput] : undefined,
|
|
50
|
+
useDiskForChatState,
|
|
48
51
|
});
|
|
49
52
|
}
|
|
50
53
|
async function runAgentsWorkflow(testGenConfig, testGenToken) {
|
|
@@ -172,6 +175,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
|
|
|
172
175
|
return agent;
|
|
173
176
|
}
|
|
174
177
|
(async function main() {
|
|
178
|
+
const removeListeners = setupProcessListeners(flushEvents);
|
|
175
179
|
console.log(`Running test-gen v${require("../../package.json").version} from ${__dirname}`);
|
|
176
180
|
const program = new commander_1.Command();
|
|
177
181
|
program
|
|
@@ -181,6 +185,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
|
|
|
181
185
|
.option("--file <test-file>", "File path of the test case (inside tests dir)")
|
|
182
186
|
.option("--suites <suites>", "Comma separated list of describe blocks")
|
|
183
187
|
.option("--use-chat", "Use chat agent (and not the workflow)")
|
|
188
|
+
.option("--use-disk-for-chat-state", "Save and load chat state from disk")
|
|
184
189
|
.option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022)")
|
|
185
190
|
.parse(process.argv);
|
|
186
191
|
const options = program.opts();
|
|
@@ -202,18 +207,16 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
|
|
|
202
207
|
generationId: testGenConfig.options?.metadata.generationId,
|
|
203
208
|
projectRepoName: testGenConfig.options?.metadata.projectRepoName,
|
|
204
209
|
});
|
|
205
|
-
let testGenFailed = false;
|
|
206
|
-
let agentUsed;
|
|
207
210
|
// Download the build if repo has a download script
|
|
208
211
|
await (0, test_build_1.downloadBuild)(testGenConfig.build || {});
|
|
212
|
+
if (completedOptions.useChat) {
|
|
213
|
+
await runChatAgent(completedOptions.chatModel, completedOptions.useDiskForChatState);
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
let agentUsed;
|
|
217
|
+
let testGenFailed = false;
|
|
209
218
|
try {
|
|
210
|
-
|
|
211
|
-
await runChatAgent(completedOptions.prompt, completedOptions.chatModel);
|
|
212
|
-
return;
|
|
213
|
-
}
|
|
214
|
-
else {
|
|
215
|
-
agentUsed = await runAgentsWorkflow(testGenConfig, testGenToken);
|
|
216
|
-
}
|
|
219
|
+
agentUsed = await runAgentsWorkflow(testGenConfig, testGenToken);
|
|
217
220
|
}
|
|
218
221
|
catch (e) {
|
|
219
222
|
testGenFailed = true;
|
|
@@ -229,7 +232,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
|
|
|
229
232
|
testName: testGenConfig.testCase.name,
|
|
230
233
|
});
|
|
231
234
|
}
|
|
232
|
-
|
|
235
|
+
removeListeners();
|
|
233
236
|
await (0, llm_1.flushAllTraces)();
|
|
234
237
|
await (0, logger_1.waitForLogsToFlush)();
|
|
235
238
|
await (0, session_1.endSession)();
|
|
@@ -5,6 +5,7 @@ export interface CliOptions {
|
|
|
5
5
|
prompt?: string;
|
|
6
6
|
suites?: string;
|
|
7
7
|
useChat?: boolean;
|
|
8
|
+
useDiskForChatState?: boolean;
|
|
8
9
|
chatModel?: "claude-3-7" | "3-7" | "claude-3-5" | "3-5" | "claude-3-7-sonnet-20250219" | "claude-3-5-sonnet-20241022";
|
|
9
10
|
}
|
|
10
11
|
export declare function validateAndCompleteCliOptions(options: CliOptions): Promise<CliOptions>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,CAAC;CAClC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,SAAS,CAAC,EACN,YAAY,GACZ,KAAK,GACL,YAAY,GACZ,KAAK,GACL,4BAA4B,GAC5B,4BAA4B,CAAC;CAClC;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB"}
|
package/dist/bin/utils/index.js
CHANGED
|
@@ -12,9 +12,9 @@ async function validateAndCompleteCliOptions(options) {
|
|
|
12
12
|
return options;
|
|
13
13
|
}
|
|
14
14
|
let requiredFields = ["name", "file", "prompt"];
|
|
15
|
-
// For new chat flow in local CLI usage, only prompt is required
|
|
16
15
|
if (options.useChat) {
|
|
17
|
-
|
|
16
|
+
// Chat agent can prompt the user directly, nothing is required in CLI args
|
|
17
|
+
requiredFields = [];
|
|
18
18
|
}
|
|
19
19
|
const questions = [];
|
|
20
20
|
if (!options.name && requiredFields.includes("name")) {
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAepC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAyC3E"}
|
package/dist/index.js
CHANGED
|
@@ -14,42 +14,54 @@ const pw_test_1 = require("./utils/pw-test");
|
|
|
14
14
|
const flushEvents = async () => {
|
|
15
15
|
await (0, llm_1.flushAllTraces)();
|
|
16
16
|
};
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
function setupProcessListeners(cleanup) {
|
|
18
|
+
const events = ["beforeExit", "exit", "SIGINT", "SIGTERM"];
|
|
19
|
+
events.forEach((event) => process.on(event, cleanup));
|
|
20
|
+
return () => {
|
|
21
|
+
events.forEach((event) => process.removeListener(event, cleanup));
|
|
22
|
+
};
|
|
23
|
+
}
|
|
21
24
|
async function createTest(task, page, scope) {
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
testCase,
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
25
|
+
const removeListeners = setupProcessListeners(flushEvents);
|
|
26
|
+
try {
|
|
27
|
+
const testConfigArg = process.env.TEST_GEN_TOKEN;
|
|
28
|
+
const testGenConfig = (0, scenarios_1.loadTestConfigs)(testConfigArg);
|
|
29
|
+
(0, reporter_1.setReporterConfig)({
|
|
30
|
+
projectRepoName: testGenConfig.options?.metadata.projectRepoName,
|
|
31
|
+
testSessionId: testGenConfig.options?.metadata.testSessionId,
|
|
32
|
+
generationId: testGenConfig.options?.metadata.generationId,
|
|
33
|
+
});
|
|
34
|
+
(0, session_1.setSessionDetails)({
|
|
35
|
+
sessionId: testGenConfig.options?.metadata.testSessionId,
|
|
36
|
+
generationId: testGenConfig.options?.metadata.generationId,
|
|
37
|
+
testCaseId: testGenConfig.testCase.id,
|
|
38
|
+
projectRepoName: testGenConfig.options?.metadata.projectRepoName,
|
|
39
|
+
});
|
|
40
|
+
const fileService = new client_1.default();
|
|
41
|
+
const { testCase, specPath } = testGenConfig;
|
|
42
|
+
const { code, importPaths } = await (0, run_1.createTestUsingMasterAgent)({
|
|
43
|
+
testCase,
|
|
44
|
+
specPath,
|
|
45
|
+
page,
|
|
46
|
+
task,
|
|
47
|
+
options: {
|
|
48
|
+
...testGenConfig.options,
|
|
49
|
+
},
|
|
50
|
+
scopeVars: scope,
|
|
51
|
+
});
|
|
52
|
+
await fileService.updateTest({
|
|
53
|
+
task,
|
|
54
|
+
generatedCode: code,
|
|
55
|
+
importPaths,
|
|
56
|
+
});
|
|
57
|
+
// skip the rest of the test once generation is over
|
|
58
|
+
await (0, pw_test_1.skipTest)();
|
|
59
|
+
}
|
|
60
|
+
finally {
|
|
61
|
+
// Ensure listeners are removed even if an error occurs
|
|
62
|
+
removeListeners();
|
|
63
|
+
// Flush events one final time before removing listeners
|
|
64
|
+
await flushEvents();
|
|
65
|
+
}
|
|
54
66
|
}
|
|
55
67
|
exports.createTest = createTest;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"browser-agent.d.ts","sourceRoot":"","sources":["../../src/tools/browser-agent.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"browser-agent.d.ts","sourceRoot":"","sources":["../../src/tools/browser-agent.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAmDpC,eAAO,MAAM,gBAAgB,EAAE,IA4D9B,CAAC"}
|
|
@@ -57,11 +57,27 @@ exports.browserAgentTool = {
|
|
|
57
57
|
},
|
|
58
58
|
execute: async (input) => {
|
|
59
59
|
const { testName, testSuites, fileName, changeToMake, project } = input;
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
60
|
+
try {
|
|
61
|
+
await (0, utils_1.replaceTodoWithCreateTest)({
|
|
62
|
+
testCaseName: testName,
|
|
63
|
+
testCaseSuites: testSuites,
|
|
64
|
+
testFilePath: fileName,
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
catch (error) {
|
|
68
|
+
return {
|
|
69
|
+
isError: true,
|
|
70
|
+
result: `Error running tool: ${error}`,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
const playwrightConfig = await (0, utils_1.readPlaywrightConfig)(process.cwd());
|
|
74
|
+
const validProjectNames = await (0, utils_1.getValidProjectNames)(playwrightConfig);
|
|
75
|
+
if (!validProjectNames.includes(project)) {
|
|
76
|
+
return {
|
|
77
|
+
isError: true,
|
|
78
|
+
result: `Invalid project name: ${project}. Valid project names are: ${validProjectNames.join(", ")}`,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
65
81
|
const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
|
|
66
82
|
testFilePath: fileName,
|
|
67
83
|
filePathToUpdate: fileName,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"repo-tree.d.ts","sourceRoot":"","sources":["../../src/utils/repo-tree.ts"],"names":[],"mappings":"AAGA,eAAO,MAAM,eAAe,
|
|
1
|
+
{"version":3,"file":"repo-tree.d.ts","sourceRoot":"","sources":["../../src/utils/repo-tree.ts"],"names":[],"mappings":"AAGA,eAAO,MAAM,eAAe,qBAQ3B,CAAC;AAEF,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,KAAK,UAsE9D"}
|
package/dist/utils/repo-tree.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.51.1",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -73,10 +73,10 @@
|
|
|
73
73
|
"tsx": "^4.16.2",
|
|
74
74
|
"typescript": "^5.3.3",
|
|
75
75
|
"zod": "^3.23.8",
|
|
76
|
-
"@empiricalrun/llm": "^0.
|
|
76
|
+
"@empiricalrun/llm": "^0.11.1",
|
|
77
77
|
"@empiricalrun/r2-uploader": "^0.3.8",
|
|
78
78
|
"@empiricalrun/reporter": "^0.23.2",
|
|
79
|
-
"@empiricalrun/test-run": "^0.7.
|
|
79
|
+
"@empiricalrun/test-run": "^0.7.6"
|
|
80
80
|
},
|
|
81
81
|
"devDependencies": {
|
|
82
82
|
"@playwright/test": "1.47.1",
|