@empiricalrun/test-gen 0.75.0 → 0.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/dist/agent/base/index.d.ts +26 -19
  3. package/dist/agent/base/index.d.ts.map +1 -1
  4. package/dist/agent/base/index.js +88 -56
  5. package/dist/agent/chat/agent-loop.d.ts +4 -3
  6. package/dist/agent/chat/agent-loop.d.ts.map +1 -1
  7. package/dist/agent/chat/agent-loop.js +4 -10
  8. package/dist/agent/chat/exports.d.ts +4 -2
  9. package/dist/agent/chat/exports.d.ts.map +1 -1
  10. package/dist/agent/chat/exports.js +8 -7
  11. package/dist/agent/chat/index.d.ts +6 -10
  12. package/dist/agent/chat/index.d.ts.map +1 -1
  13. package/dist/agent/chat/index.js +129 -196
  14. package/dist/agent/chat/prompt/index.d.ts +5 -4
  15. package/dist/agent/chat/prompt/index.d.ts.map +1 -1
  16. package/dist/agent/chat/prompt/index.js +79 -68
  17. package/dist/agent/chat/state.d.ts +1 -2
  18. package/dist/agent/chat/state.d.ts.map +1 -1
  19. package/dist/agent/chat/state.js +2 -2
  20. package/dist/agent/chat/utils.d.ts +2 -3
  21. package/dist/agent/chat/utils.d.ts.map +1 -1
  22. package/dist/agent/chat/utils.js +1 -2
  23. package/dist/agent/cli.d.ts +11 -0
  24. package/dist/agent/cli.d.ts.map +1 -0
  25. package/dist/agent/cli.js +209 -0
  26. package/dist/agent/code-review/index.d.ts +7 -0
  27. package/dist/agent/code-review/index.d.ts.map +1 -0
  28. package/dist/agent/code-review/index.js +65 -0
  29. package/dist/agent/code-review/prompt.d.ts +1 -1
  30. package/dist/agent/code-review/prompt.d.ts.map +1 -1
  31. package/dist/agent/code-review/prompt.js +52 -16
  32. package/dist/agent/index.d.ts +10 -0
  33. package/dist/agent/index.d.ts.map +1 -0
  34. package/dist/agent/index.js +19 -0
  35. package/dist/agent/triage/index.d.ts +7 -0
  36. package/dist/agent/triage/index.d.ts.map +1 -0
  37. package/dist/agent/triage/index.js +102 -0
  38. package/dist/agent/video-analysis/index.d.ts +7 -0
  39. package/dist/agent/video-analysis/index.d.ts.map +1 -0
  40. package/dist/agent/video-analysis/index.js +35 -0
  41. package/dist/bin/index.js +6 -6
  42. package/dist/file-info/adapters/github/index.d.ts.map +1 -1
  43. package/dist/file-info/adapters/github/index.js +1 -2
  44. package/dist/file-info/adapters/github/reader.d.ts +4 -9
  45. package/dist/file-info/adapters/github/reader.d.ts.map +1 -1
  46. package/dist/file-info/adapters/github/reader.js +163 -134
  47. package/dist/tools/create-pull-request/index.d.ts.map +1 -0
  48. package/dist/tools/{definitions/commit-and-create-pr.js → create-pull-request/index.js} +30 -1
  49. package/dist/tools/create-pull-request/utils.d.ts +21 -0
  50. package/dist/tools/create-pull-request/utils.d.ts.map +1 -0
  51. package/dist/tools/create-pull-request/utils.js +83 -0
  52. package/dist/tools/definitions/extract-frames-from-video.d.ts +39 -0
  53. package/dist/tools/definitions/extract-frames-from-video.d.ts.map +1 -0
  54. package/dist/tools/definitions/extract-frames-from-video.js +60 -0
  55. package/dist/tools/definitions/fetch-video-analysis.d.ts +4 -4
  56. package/dist/tools/executor/index.d.ts +1 -1
  57. package/dist/tools/executor/index.d.ts.map +1 -1
  58. package/dist/tools/executor/index.js +18 -4
  59. package/dist/tools/extract-frames-from-video/index.d.ts +7 -0
  60. package/dist/tools/extract-frames-from-video/index.d.ts.map +1 -0
  61. package/dist/tools/extract-frames-from-video/index.js +145 -0
  62. package/dist/tools/fetch-session-diff/index.d.ts +3 -0
  63. package/dist/tools/fetch-session-diff/index.d.ts.map +1 -0
  64. package/dist/tools/fetch-session-diff/index.js +46 -0
  65. package/dist/tools/fetch-video-analysis/index.d.ts.map +1 -1
  66. package/dist/tools/fetch-video-analysis/index.js +18 -7
  67. package/dist/tools/fetch-video-analysis/utils.d.ts +5 -2
  68. package/dist/tools/fetch-video-analysis/utils.d.ts.map +1 -1
  69. package/dist/tools/fetch-video-analysis/utils.js +34 -11
  70. package/dist/tools/fetch-video-analysis/video-analysis.d.ts +2 -2
  71. package/dist/tools/fetch-video-analysis/video-analysis.d.ts.map +1 -1
  72. package/dist/tools/fetch-video-analysis/video-analysis.js +24 -8
  73. package/dist/tools/index.d.ts +28 -2
  74. package/dist/tools/index.d.ts.map +1 -1
  75. package/dist/tools/index.js +46 -28
  76. package/dist/tools/review-pull-request/index.d.ts +3 -0
  77. package/dist/tools/review-pull-request/index.d.ts.map +1 -0
  78. package/dist/tools/review-pull-request/index.js +103 -0
  79. package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
  80. package/dist/tools/test-run-fetcher/index.js +4 -14
  81. package/dist/tools/utils/urls.d.ts +5 -0
  82. package/dist/tools/utils/urls.d.ts.map +1 -0
  83. package/dist/tools/utils/urls.js +19 -0
  84. package/dist/tools/view-failed-test-run-report/index.d.ts.map +1 -1
  85. package/dist/tools/view-failed-test-run-report/index.js +3 -15
  86. package/dist/utils/file.d.ts +1 -0
  87. package/dist/utils/file.d.ts.map +1 -1
  88. package/dist/utils/file.js +45 -1
  89. package/dist/{tools/fetch-video-analysis → utils}/local-ffmpeg-client.d.ts +4 -0
  90. package/dist/utils/local-ffmpeg-client.d.ts.map +1 -0
  91. package/dist/{tools/fetch-video-analysis → utils}/local-ffmpeg-client.js +63 -11
  92. package/package.json +2 -2
  93. package/tsconfig.tsbuildinfo +1 -1
  94. package/dist/agent/chat/utils/tool-calls.d.ts +0 -21
  95. package/dist/agent/chat/utils/tool-calls.d.ts.map +0 -1
  96. package/dist/agent/chat/utils/tool-calls.js +0 -64
  97. package/dist/tools/commit-and-create-pr/index.d.ts.map +0 -1
  98. package/dist/tools/commit-and-create-pr/index.js +0 -83
  99. package/dist/tools/definitions/commit-and-create-pr.d.ts +0 -3
  100. package/dist/tools/definitions/commit-and-create-pr.d.ts.map +0 -1
  101. package/dist/tools/fetch-video-analysis/local-ffmpeg-client.d.ts.map +0 -1
  102. /package/dist/tools/{commit-and-create-pr → create-pull-request}/index.d.ts +0 -0
@@ -0,0 +1,209 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.fetchEnvironmentVariables = fetchEnvironmentVariables;
4
+ exports.runChatAgentForCLI = runChatAgentForCLI;
5
+ const llm_1 = require("@empiricalrun/llm");
6
+ const chat_1 = require("@empiricalrun/llm/chat");
7
+ const picocolors_1 = require("picocolors");
8
+ const client_1 = require("../dashboard/client");
9
+ const reader_1 = require("../file-info/adapters/file-system/reader");
10
+ const human_in_the_loop_1 = require("../human-in-the-loop");
11
+ const validation_1 = require("../recorder/validation");
12
+ const executor_1 = require("../tools/executor");
13
+ const git_1 = require("../tools/executor/utils/git");
14
+ const filesystem_cache_1 = require("./chat/filesystem-cache");
15
+ const state_1 = require("./chat/state");
16
+ const utils_1 = require("./chat/utils");
17
+ const index_1 = require("./index");
18
+ function stopCriteria(userPrompt) {
19
+ return userPrompt?.toLowerCase() === "stop";
20
+ }
21
+ function concludeAgent(chatModel, useDiskForChatState, selectedModel, error) {
22
+ console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + (0, state_1.getUsageSummary)(chatModel))}`);
23
+ if (useDiskForChatState) {
24
+ (0, state_1.saveToDisk)(chatModel.messages, selectedModel, chatModel.askUserForInput, error);
25
+ }
26
+ }
27
+ async function fetchEnvironmentVariables() {
28
+ const projectApiKey = process.env.EMPIRICALRUN_API_KEY;
29
+ let apiClient;
30
+ let queryParams = {};
31
+ if (projectApiKey) {
32
+ apiClient = new client_1.DashboardAPIClient({
33
+ authType: "project-api-key",
34
+ projectApiKey: process.env.EMPIRICALRUN_API_KEY,
35
+ });
36
+ }
37
+ else {
38
+ apiClient = new client_1.DashboardAPIClient({
39
+ authType: "user-access-token",
40
+ });
41
+ const repoName = await (0, validation_1.validatePackageJson)(process.cwd());
42
+ queryParams = { project_repo_name: repoName };
43
+ }
44
+ const data = await apiClient.request("/api/environment-variables", { method: "GET", params: queryParams });
45
+ if (!data.data || !data.data.environment_variables) {
46
+ console.error("Failed to fetch environment variables:", data);
47
+ throw new Error("Failed to fetch environment variables");
48
+ }
49
+ const envVars = data.data.environment_variables.reduce((acc, envVar) => {
50
+ acc[envVar.name] = envVar.value;
51
+ return acc;
52
+ }, {});
53
+ return envVars;
54
+ }
55
+ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, agentMode, resetChat, useFSCache, }) {
56
+ let chatState;
57
+ const enableStreaming = !useFSCache;
58
+ const cache = useFSCache ? new filesystem_cache_1.FilesystemLLMCache() : undefined;
59
+ if (resetChat) {
60
+ (0, state_1.clearChatState)();
61
+ }
62
+ if (useDiskForChatState) {
63
+ chatState = (0, state_1.loadChatState)();
64
+ }
65
+ // TODO: Store branch name in chat state so that we don't recreate it every time
66
+ const randomId = crypto.randomUUID().substring(0, 8);
67
+ const branchName = `branch-${randomId}`;
68
+ await (0, git_1.checkoutBranch)(branchName, process.cwd());
69
+ let messagesLoadedFromDisk = chatState?.messages || [];
70
+ let chatModel = (0, chat_1.createChatModel)(messagesLoadedFromDisk, selectedModel, undefined, cache);
71
+ chatModel.validateEnvVarsForAuth();
72
+ if (initialPromptContent && chatModel.messages.length === 0) {
73
+ chatModel.pushUserMessage(initialPromptContent, []);
74
+ }
75
+ else if (initialPromptContent && chatModel.messages.length > 0) {
76
+ console.warn(`Ignoring initial prompt because we have existing messages.`);
77
+ }
78
+ if (chatModel.askUserForInput) {
79
+ // Show last message to the user for context when we loaded from disk
80
+ const latest = chatModel.getHumanReadableLatestMessage();
81
+ if (latest) {
82
+ console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
83
+ }
84
+ }
85
+ if (chatState && chatState.error) {
86
+ // Reset error state as we are attempting a retry
87
+ chatState.error = null;
88
+ }
89
+ const handleSigInt = () => {
90
+ concludeAgent(chatModel, useDiskForChatState, selectedModel, null);
91
+ process.exit(0);
92
+ };
93
+ process.once("SIGINT", handleSigInt);
94
+ process.once("SIGTERM", handleSigInt);
95
+ let userPrompt;
96
+ let reporterFunc = async (chatState, latest) => {
97
+ if (useDiskForChatState) {
98
+ (0, state_1.saveToDisk)(chatState.messages, selectedModel, chatState.askUserForInput, chatState.error);
99
+ }
100
+ if (latest) {
101
+ if (!enableStreaming) {
102
+ console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
103
+ }
104
+ else {
105
+ process.stdout.write(`\n`);
106
+ }
107
+ }
108
+ };
109
+ const trace = (0, llm_1.createLangfuseTrace)({
110
+ name: "chat_agent",
111
+ input: initialPromptContent || "",
112
+ tags: [selectedModel, "chat_agent"],
113
+ });
114
+ if (trace) {
115
+ const traceUrl = trace.getTraceUrl();
116
+ console.log(`Starting ${selectedModel}: ${traceUrl}`);
117
+ }
118
+ let authType = "user-access-token";
119
+ if (process.env.EMPIRICALRUN_API_KEY) {
120
+ authType = "project-api-key";
121
+ }
122
+ const apiClient = new client_1.DashboardAPIClient({
123
+ authType,
124
+ });
125
+ const fileInfoBuilder = () => (0, reader_1.getFileInfoFromFS)(process.cwd());
126
+ const agentParams = {
127
+ selectedModel,
128
+ };
129
+ const agent = index_1.MODE_TO_AGENT_MAP[agentMode](agentParams);
130
+ while (!stopCriteria(userPrompt)) {
131
+ if (chatModel.askUserForInput) {
132
+ try {
133
+ userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
134
+ message: "User:",
135
+ });
136
+ }
137
+ catch (e) {
138
+ // https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
139
+ if (e instanceof Error && e.name === "ExitPromptError") {
140
+ concludeAgent(chatModel, useDiskForChatState, selectedModel, null);
141
+ process.exit(0);
142
+ }
143
+ concludeAgent(chatModel, useDiskForChatState, selectedModel, {
144
+ message: e.message,
145
+ stack: e.stack || "Stack trace not available",
146
+ timestamp: new Date().toISOString(),
147
+ });
148
+ throw e;
149
+ }
150
+ if (!stopCriteria(userPrompt)) {
151
+ const { text, attachments } = (0, utils_1.extractAttachments)(userPrompt);
152
+ chatModel.pushUserMessage(text, attachments);
153
+ }
154
+ }
155
+ else {
156
+ const toolExecutor = new executor_1.ToolExecutor({
157
+ chatSession: null,
158
+ branchName,
159
+ repoPath: process.cwd(),
160
+ apiClient,
161
+ trace,
162
+ featureFlags: [],
163
+ environmentOverrides: await fetchEnvironmentVariables(),
164
+ });
165
+ await agent.runLoop({
166
+ messages: chatModel.messages,
167
+ reporter: reporterFunc,
168
+ streamingMessageReporter: (() => {
169
+ if (!enableStreaming) {
170
+ return;
171
+ }
172
+ let hasStarted = false;
173
+ let startedRole = undefined;
174
+ return async (delta, snapshot, thinking) => {
175
+ if (delta) {
176
+ const role = thinking ? "Thinking" : "Assistant";
177
+ if (!hasStarted) {
178
+ process.stdout.write(`${(0, picocolors_1.blue)(role)}: `);
179
+ hasStarted = true;
180
+ startedRole = role;
181
+ }
182
+ else if (hasStarted && role !== startedRole) {
183
+ // Changing from thinking -> text block
184
+ process.stdout.write("\n");
185
+ process.stdout.write(`${(0, picocolors_1.blue)(role)}: `);
186
+ startedRole = role;
187
+ }
188
+ process.stdout.write(delta);
189
+ }
190
+ };
191
+ })(),
192
+ trace,
193
+ repoInfoBuilder: fileInfoBuilder,
194
+ onPendingToolCall: async (toolCalls) => {
195
+ const toolResults = await toolExecutor.execute(toolCalls);
196
+ chatModel.pushToolResultsMessage(toolCalls, toolResults);
197
+ },
198
+ });
199
+ // Update the chatModel with the agent's final state for next iteration
200
+ if (agent.messages) {
201
+ chatModel = (0, chat_1.createChatModel)(agent.messages, selectedModel, undefined, cache);
202
+ }
203
+ }
204
+ }
205
+ trace?.update({ output: { messages: chatModel.messages } });
206
+ await llm_1.langfuseInstance?.flushAsync();
207
+ const usageSummary = (0, state_1.getUsageSummary)(chatModel);
208
+ console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
209
+ }
@@ -0,0 +1,7 @@
1
+ import type { ToolDefinition } from "@empiricalrun/shared-types";
2
+ import { BaseAgent } from "../base";
3
+ export declare class CodeReviewAgent extends BaseAgent {
4
+ protected getTools(): ToolDefinition[];
5
+ protected buildSystemPrompt(): Promise<string>;
6
+ }
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAGjE,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,qBAAa,eAAgB,SAAQ,SAAS;IAC5C,SAAS,CAAC,QAAQ,IAAI,cAAc,EAAE;cAItB,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC;CAsDrD"}
@@ -0,0 +1,65 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CodeReviewAgent = void 0;
4
+ const tools_1 = require("../../tools");
5
+ const base_1 = require("../base");
6
+ class CodeReviewAgent extends base_1.BaseAgent {
7
+ getTools() {
8
+ return [tools_1.fetchSessionDiffTool];
9
+ }
10
+ async buildSystemPrompt() {
11
+ return `
12
+ You are an expert code reviewer that specializes in reviewing Playwright test code. You are
13
+ provided with tools to fetch diff for a code review, where a test has been added, test modified,
14
+ or some configuration has changed.
15
+
16
+ # Your goals
17
+ - Identify code smells in test code - see below
18
+ - Call out test data assumptions or lack of clean up
19
+
20
+ # Output format
21
+ - You are expected to return two sections in your response: describe_code_change and code_review_comments
22
+ - describe_code_change: A brief summary of what the code change is doing. This should be 4-6 sentences in a bullet list.
23
+ - code_review_comments: A bulleted list of code review comments that catch for any of the specific bits below or other
24
+ red flags you might see in the code. Each comment should be 1-2 sentences.
25
+
26
+ Return these as XML tags with markdown inside them
27
+
28
+ <describe_code_change>
29
+ - ...
30
+ </describe_code_change>
31
+
32
+ <code_review_comments>
33
+ - ...
34
+ </code_review_comments>
35
+
36
+ # Specific bits to catch in the code review
37
+
38
+ ## Code smells to look for
39
+ - Any form of try-catch or exception handling is a code smell in test code. If there's an
40
+ exception, the test should fail
41
+ - Any conditionals (if, switch, ternary) in test code is a code smell. Tests are expected to be
42
+ deterministic. If you see conditionals, check if there's a comment explaining why it's needed.
43
+ Critically review the comment -- if it's not convincing, call it out as a code smell.
44
+
45
+ ## Ensure Playwright best practices
46
+ - Use locators instead of selectors: waitForSelector, $, $$ are bad - use locators instead (e.g. locator.waitFor)
47
+ - If the test relies on some Playwright APIs that do not auto-wait (e.g. isVisible(), count()), we need to ensure
48
+ they are used AFTER some action that ensures the page has loaded. If nothing, at least it should have a waitForTimeout
49
+ - Don't use waitForLoadState or networkidle - these are not required since Playwright auto-waits after navigations
50
+
51
+ ## Call out test data assumptions
52
+ - If new test data is created (e.g. creating a new entity in the app, doing some actions on it) - it should be cleaned up
53
+ at the end of the test. If not, call it out.
54
+ - If the test data cannot be cleaned up, are we using some random names to ensure no conflicts in future test runs?
55
+ - If the test assumes some data exists (e.g. a user with a specific email) - call it out. It might fail across other
56
+ environments.
57
+ - No hard coded URLs - use relative URLs instead - that can work across environments.
58
+ - Dependency on static data that can change across environments (e.g. number of rows in a table) should be avoided.
59
+
60
+ ## Remove debug artifacts
61
+ - If there are console.logs or page.screenshot usage, call it out. They should be removed before merging.
62
+ `;
63
+ }
64
+ }
65
+ exports.CodeReviewAgent = CodeReviewAgent;
@@ -1,2 +1,2 @@
1
- export declare const PROMPT = "\nIdentify code smells in tests\n- try-catch\n- conditionals added without a comment\n\nPlaywright gotchas\n- isVisible(), count() do not auto-wait\n\nPlaywright code smells\n- waitForLoadState is not required\n - Especially with \"networkidle\", because modern webapps keep doing network activity\n\nIdentify test assumptions\n- What data does the test require?\n- Clean up test entities\n";
1
+ export declare const SYSTEM_PROMPT = "\nYou are an expert code reviewer that specializes in reviewing Playwright test code. You are\nprovided with tools to fetch diff for a code review, where a test has been added, test modified,\nor some configuration has changed.\n\n# Your goals\n- Identify code smells in test code - see below\n- Call out test data assumptions or lack of clean up\n\n# Output format\n- You are expected to return two sections in your response: describe_code_change and code_review_comments\n- describe_code_change: A brief summary of what the code change is doing. This should be 4-6 sentences in a bullet list.\n- code_review_comments: A bulleted list of code review comments that catch for any of the specific bits below or other\n red flags you might see in the code. Each comment should be 1-2 sentences.\n\nReturn these as XML tags with markdown inside them\n\n<describe_code_change>\n- ...\n</describe_code_change>\n\n<code_review_comments>\n- ...\n</code_review_comments>\n\n# Specific bits to catch in the code review\n\n## Code smells to look for\n- Any form of try-catch or exception handling is a code smell in test code. If there's an\n exception, the test should fail\n- Any conditionals (if, switch, ternary) in test code is a code smell. Tests are expected to be\n deterministic. If you see conditionals, check if there's a comment explaining why it's needed.\n Critically review the comment -- if it's not convincing, call it out as a code smell.\n\n## Ensure Playwright best practices\n- Use locators instead of selectors: waitForSelector, $, $$ are bad - use locators instead (e.g. locator.waitFor)\n- If the test relies on some Playwright APIs that do not auto-wait (e.g. isVisible(), count()), we need to ensure \n they are used AFTER some action that ensures the page has loaded. If nothing, at least it should have a waitForTimeout\n- Don't use waitForLoadState or networkidle - these are not required since Playwright auto-waits after navigations\n\n## Call out test data assumptions\n- If new test data is created (e.g. creating a new entity in the app, doing some actions on it) - it should be cleaned up\n at the end of the test. If not, call it out.\n- If the test data cannot be cleaned up, are we using some random names to ensure no conflicts in future test runs?\n- If the test assumes some data exists (e.g. a user with a specific email) - call it out. It might fail across other\n environments.\n- No hard coded URLs - use relative URLs instead - that can work across environments.\n- Dependency on static data that can change across environments (e.g. number of rows in a table) should be avoided.\n\n## Remove debug artifacts\n- If there are console.logs or page.screenshot usage, call it out. They should be removed before merging.\n";
2
2
  //# sourceMappingURL=prompt.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/prompt.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,MAAM,8YAelB,CAAC"}
1
+ {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/prompt.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,aAAa,4sFAmDzB,CAAC"}
@@ -1,19 +1,55 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.PROMPT = void 0;
4
- exports.PROMPT = `
5
- Identify code smells in tests
6
- - try-catch
7
- - conditionals added without a comment
8
-
9
- Playwright gotchas
10
- - isVisible(), count() do not auto-wait
11
-
12
- Playwright code smells
13
- - waitForLoadState is not required
14
- - Especially with "networkidle", because modern webapps keep doing network activity
15
-
16
- Identify test assumptions
17
- - What data does the test require?
18
- - Clean up test entities
3
+ exports.SYSTEM_PROMPT = void 0;
4
+ exports.SYSTEM_PROMPT = `
5
+ You are an expert code reviewer that specializes in reviewing Playwright test code. You are
6
+ provided with tools to fetch diff for a code review, where a test has been added, test modified,
7
+ or some configuration has changed.
8
+
9
+ # Your goals
10
+ - Identify code smells in test code - see below
11
+ - Call out test data assumptions or lack of clean up
12
+
13
+ # Output format
14
+ - You are expected to return two sections in your response: describe_code_change and code_review_comments
15
+ - describe_code_change: A brief summary of what the code change is doing. This should be 4-6 sentences in a bullet list.
16
+ - code_review_comments: A bulleted list of code review comments that catch for any of the specific bits below or other
17
+ red flags you might see in the code. Each comment should be 1-2 sentences.
18
+
19
+ Return these as XML tags with markdown inside them
20
+
21
+ <describe_code_change>
22
+ - ...
23
+ </describe_code_change>
24
+
25
+ <code_review_comments>
26
+ - ...
27
+ </code_review_comments>
28
+
29
+ # Specific bits to catch in the code review
30
+
31
+ ## Code smells to look for
32
+ - Any form of try-catch or exception handling is a code smell in test code. If there's an
33
+ exception, the test should fail
34
+ - Any conditionals (if, switch, ternary) in test code is a code smell. Tests are expected to be
35
+ deterministic. If you see conditionals, check if there's a comment explaining why it's needed.
36
+ Critically review the comment -- if it's not convincing, call it out as a code smell.
37
+
38
+ ## Ensure Playwright best practices
39
+ - Use locators instead of selectors: waitForSelector, $, $$ are bad - use locators instead (e.g. locator.waitFor)
40
+ - If the test relies on some Playwright APIs that do not auto-wait (e.g. isVisible(), count()), we need to ensure
41
+ they are used AFTER some action that ensures the page has loaded. If nothing, at least it should have a waitForTimeout
42
+ - Don't use waitForLoadState or networkidle - these are not required since Playwright auto-waits after navigations
43
+
44
+ ## Call out test data assumptions
45
+ - If new test data is created (e.g. creating a new entity in the app, doing some actions on it) - it should be cleaned up
46
+ at the end of the test. If not, call it out.
47
+ - If the test data cannot be cleaned up, are we using some random names to ensure no conflicts in future test runs?
48
+ - If the test assumes some data exists (e.g. a user with a specific email) - call it out. It might fail across other
49
+ environments.
50
+ - No hard coded URLs - use relative URLs instead - that can work across environments.
51
+ - Dependency on static data that can change across environments (e.g. number of rows in a table) should be avoided.
52
+
53
+ ## Remove debug artifacts
54
+ - If there are console.logs or page.screenshot usage, call it out. They should be removed before merging.
19
55
  `;
@@ -0,0 +1,10 @@
1
+ import type { AgentModeEnum } from "@empiricalrun/shared-types";
2
+ import { type AgentParams, BaseAgent } from "./base";
3
+ import { ChatAgent } from "./chat";
4
+ import { CodeReviewAgent } from "./code-review";
5
+ import { TriageAgent } from "./triage";
6
+ import { VideoAnalysisAgent } from "./video-analysis";
7
+ export declare const MODE_TO_AGENT_MAP: Record<AgentModeEnum, (params: AgentParams) => BaseAgent>;
8
+ export { BaseAgent, ChatAgent, CodeReviewAgent, TriageAgent, VideoAnalysisAgent, };
9
+ export type { AgentParams };
10
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAEhE,OAAO,EAAE,KAAK,WAAW,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACrD,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEtD,eAAO,MAAM,iBAAiB,EAAE,MAAM,CACpC,aAAa,EACb,CAAC,MAAM,EAAE,WAAW,KAAK,SAAS,CAMnC,CAAC;AAEF,OAAO,EACL,SAAS,EACT,SAAS,EACT,eAAe,EACf,WAAW,EACX,kBAAkB,GACnB,CAAC;AACF,YAAY,EAAE,WAAW,EAAE,CAAC"}
@@ -0,0 +1,19 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.VideoAnalysisAgent = exports.TriageAgent = exports.CodeReviewAgent = exports.ChatAgent = exports.BaseAgent = exports.MODE_TO_AGENT_MAP = void 0;
4
+ const base_1 = require("./base");
5
+ Object.defineProperty(exports, "BaseAgent", { enumerable: true, get: function () { return base_1.BaseAgent; } });
6
+ const chat_1 = require("./chat");
7
+ Object.defineProperty(exports, "ChatAgent", { enumerable: true, get: function () { return chat_1.ChatAgent; } });
8
+ const code_review_1 = require("./code-review");
9
+ Object.defineProperty(exports, "CodeReviewAgent", { enumerable: true, get: function () { return code_review_1.CodeReviewAgent; } });
10
+ const triage_1 = require("./triage");
11
+ Object.defineProperty(exports, "TriageAgent", { enumerable: true, get: function () { return triage_1.TriageAgent; } });
12
+ const video_analysis_1 = require("./video-analysis");
13
+ Object.defineProperty(exports, "VideoAnalysisAgent", { enumerable: true, get: function () { return video_analysis_1.VideoAnalysisAgent; } });
14
+ exports.MODE_TO_AGENT_MAP = {
15
+ triage: (params) => new triage_1.TriageAgent(params),
16
+ chat: (params) => new chat_1.ChatAgent(params),
17
+ video: (params) => new video_analysis_1.VideoAnalysisAgent(params),
18
+ "code-review": (params) => new code_review_1.CodeReviewAgent(params),
19
+ };
@@ -0,0 +1,7 @@
1
+ import type { ToolDefinition } from "@empiricalrun/shared-types";
2
+ import { BaseAgent } from "../base";
3
+ export declare class TriageAgent extends BaseAgent {
4
+ protected getTools(): ToolDefinition[];
5
+ protected buildSystemPrompt(repoContext?: string): Promise<string>;
6
+ }
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/triage/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAsBjE,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,qBAAa,WAAY,SAAQ,SAAS;IACxC,SAAS,CAAC,QAAQ,IAAI,cAAc,EAAE;cAiCtB,iBAAiB,CAAC,WAAW,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAkEzE"}
@@ -0,0 +1,102 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.TriageAgent = void 0;
4
+ const tools_1 = require("../../tools");
5
+ const base_1 = require("../base");
6
+ class TriageAgent extends base_1.BaseAgent {
7
+ getTools() {
8
+ const tools = [
9
+ // Common tools
10
+ tools_1.runTestTool,
11
+ tools_1.grepTool,
12
+ tools_1.fetchDiagnosisReportTool,
13
+ tools_1.listEnvironmentsTool,
14
+ tools_1.downloadBuildTool,
15
+ tools_1.fetchFileTool,
16
+ tools_1.traceDotZipTool,
17
+ // Triage specific tools
18
+ tools_1.listIssuesTool,
19
+ tools_1.createIssueTool,
20
+ tools_1.updateIssueTool,
21
+ tools_1.viewFailedTestRunReportTool,
22
+ tools_1.fetchVideoAnalysis,
23
+ tools_1.fetchLastSuccessfulTestRunTool,
24
+ tools_1.sendTriageSummaryTool,
25
+ // Model-specific tools
26
+ ...(0, tools_1.textEditorToolsForModel)(this.selectedModel),
27
+ ];
28
+ // Feature flag: video analysis (if not already included)
29
+ if (this.featureFlags?.includes("useVideoAnalysis") &&
30
+ !tools.some((tool) => tool.schema.name === "fetchVideoAnalysis")) {
31
+ tools.push(tools_1.fetchVideoAnalysis);
32
+ }
33
+ return tools;
34
+ }
35
+ async buildSystemPrompt(repoContext) {
36
+ if (!repoContext) {
37
+ throw new Error(`Triage agent needs repo context`);
38
+ }
39
+ return `
40
+ You are a helpful assistant that help with analysis of Playwright test reports. Your goal is to help the user analyse a test report and identify the root cause of the test failures, and log the unique failuers as issues so that the user can keep a track and fix them.
41
+
42
+ You are working on a test code repository that contains Playwright tests and other related files. Your working directory has been checked out on a git branch.
43
+
44
+ # Your capabilities
45
+
46
+ When provided with a test report URL, you can use these capabilities to triage the test failures in the report:
47
+
48
+ ## Fetch and view the test report
49
+
50
+ - Use viewFailedTestRunReportTool tool to get more information about all tests that failed in the run
51
+
52
+ ## Analyze each test case
53
+
54
+ You are provided with multiple tools to help you understand each failing test case better. Understanding each test case allows you to identify the root cause and create more accurate issues. These tools can also be called in parallel.
55
+
56
+ - Each test case generates artifacts: images, videos, playwright trace zip file. With your tools, you can fetch image, analyze the video frames and trace.zip to find out failing network requests and console logs
57
+ - Each video represents one browser tab of the test case (so multiple videos implies the test had multiple tabs or browser windows)
58
+ - Read the error stack and test file to understand what the test is doing
59
+ - Fetch the last successful run of the test case to understand the earlier flow. This report will contain image and video URLs that can also be analyzed with your available tools.
60
+ - If you think the issue is explained by a timing or intermittent issue, you can also re-run the test case
61
+
62
+ ## Listing, updating and creating issues
63
+
64
+ - Test failures will become issues that can be assigned to developers to fix the app or update the test. Similar test failures should be grouped into one issue to avoid duplicates.
65
+ - Before you create a new issue, you MUST list existing issues that have been created for this repo, to avoid creating duplicate issues.
66
+ - If you find duplicates, use the update issue tool to update the existing issue with new information from the test report
67
+ - When you are creating a new issue, use the description and title to clearly call out the error reason (share error stack, error message, relevant lines of code, etc.) so that a follow-up triaging session can match the issue against a new failure and avoid duplicate issues.
68
+ - What makes a good issue: accurate classification between app or test issue, accurate grouping, and a good auto-fix prompt - see more about this below
69
+
70
+ ## Classify tests as app or test issues
71
+ - An app issue is an issue in the application that is being tested. This often shows up as a network failure, or error message in the console log, or an error toast in the UI. Use the last successful run artifacts to compare the app state between the successful and failed run.
72
+ - A test issue is an issue in the test code. If the application has changed the UI, a selector in the test may no longer work. Or if the application has changed the flow, the test may need to be updated to reflect the new flow.
73
+
74
+ ## Grouping test failures
75
+ - Before you create issues, group the failures together so that we create useful issues
76
+ - What makes a good group: failures that have the same root cause - because of similar error stacks - and can be fixed with the same change to the app or test
77
+ - Both "type of failure" and "proposed fix" are important to determine if two failures belong to the same group
78
+ - Example: if two tests fail with strict mode violations, but for 2 different selectors, they are different groups because the proposed fixes are different
79
+ - What does not make a good group: the location of the test or the name of the test. Two tests that are located in the same file or have similar names should ONLY be grouped together if the root cause of failures is same
80
+
81
+ ## Crafting a good auto-fix prompt for test issues
82
+ - When you create issues with type "test", you are expected to share a test_issue_prompt which is your proposed change to the test to adapt to the new app state.
83
+ - This prompt is handed over to another agent to update the test code, and your prompt is the ONLY context that the agent has to update the test.
84
+ - Therefore, your prompt must contain:
85
+ - Which test cases to be updated - with test and describe block names, file name
86
+ - What failed in the test - error message, error stack, relevant lines of code, or bits to locate the failure
87
+ - Your suggested change to the test
88
+ - Emphasis to re-run the test after making the change, to ensure that the change works
89
+
90
+ ## Conclusion
91
+ - After you are done with triaging and creating issues, summarize the work done with a list of created issues for the user to review. Don't be too verbose - a bullet list of issues created or updated, with a small description is enough.
92
+ - It is important to show proof that you have gone through all of the failures in the test run report, so use numbers to call out 1. total failures, and 2. failures associated with each issue.
93
+
94
+ # Repo context
95
+ ${repoContext}
96
+
97
+ # Reference
98
+ Today's date is ${new Date().toDateString()}
99
+ `;
100
+ }
101
+ }
102
+ exports.TriageAgent = TriageAgent;
@@ -0,0 +1,7 @@
1
+ import type { ToolDefinition } from "@empiricalrun/shared-types";
2
+ import { BaseAgent } from "../base";
3
+ export declare class VideoAnalysisAgent extends BaseAgent {
4
+ protected getTools(): ToolDefinition[];
5
+ protected buildSystemPrompt(): Promise<string>;
6
+ }
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/video-analysis/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAGjE,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,qBAAa,kBAAmB,SAAQ,SAAS;IAC/C,SAAS,CAAC,QAAQ,IAAI,cAAc,EAAE;cAItB,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC;CAwBrD"}
@@ -0,0 +1,35 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.VideoAnalysisAgent = void 0;
4
+ const tools_1 = require("../../tools");
5
+ const base_1 = require("../base");
6
+ class VideoAnalysisAgent extends base_1.BaseAgent {
7
+ getTools() {
8
+ return [tools_1.extractFramesFromVideo];
9
+ }
10
+ async buildSystemPrompt() {
11
+ return `
12
+ You are a video analysis agent specialized in analyzing screen recordings and user interface interactions.
13
+
14
+ Available Tools:
15
+ - extract_frames: Extract frames from videos for detailed visual analysis
16
+ - fetch_video_analysis: Get comprehensive video analysis summary (legacy)
17
+
18
+ When analyzing videos:
19
+ 1. Use extract_frames to get individual frames for detailed analysis
20
+ 2. Analyze each frame for UI elements, user actions, and state changes
21
+ 3. Provide specific observations about what's happening in each frame
22
+ 4. The Summary should be in a bullet point format
23
+ 5. Reference frame IDs when discussing specific moments: "In frame_abc123_001, I can see..."
24
+
25
+ Your analysis should be:
26
+ - Detailed and specific about UI elements and interactions
27
+ - Sequential, following the flow of actions in the video
28
+
29
+ After the final summary you need to include the key frame IDs that best represent the important moments in the video.
30
+
31
+ Example Frame Id Reference: <frame_abc123_001>
32
+ `;
33
+ }
34
+ }
35
+ exports.VideoAnalysisAgent = VideoAnalysisAgent;
package/dist/bin/index.js CHANGED
@@ -7,8 +7,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
7
7
  const commander_1 = require("commander");
8
8
  const dotenv_1 = __importDefault(require("dotenv"));
9
9
  const fs_1 = __importDefault(require("fs"));
10
- const chat_1 = require("../agent/chat");
11
10
  const models_1 = require("../agent/chat/models");
11
+ const cli_1 = require("../agent/cli");
12
12
  const auth_1 = require("../auth");
13
13
  const client_1 = require("../dashboard/client");
14
14
  const recorder_1 = require("../recorder");
@@ -20,13 +20,13 @@ const utils_1 = require("./utils");
20
20
  dotenv_1.default.config({
21
21
  path: [".env.local", ".env"],
22
22
  });
23
- async function runChatAgent({ modelInput, useDiskForChatState, prompt: initialPromptContent, useTriage, resetChat, useFSCache, }) {
23
+ async function runChatAgent({ modelInput, useDiskForChatState, prompt: initialPromptContent, agentMode = "chat", resetChat, useFSCache, }) {
24
24
  const resolvedModel = (0, models_1.resolveChatModelBasedOnInput)(modelInput);
25
- return await (0, chat_1.runChatAgentForCLI)({
25
+ return await (0, cli_1.runChatAgentForCLI)({
26
26
  selectedModel: resolvedModel,
27
27
  useDiskForChatState: useDiskForChatState || false,
28
28
  initialPromptContent,
29
- useTriage: useTriage || false,
29
+ agentMode,
30
30
  resetChat: resetChat || false,
31
31
  useFSCache: useFSCache || false,
32
32
  });
@@ -178,7 +178,7 @@ async function main() {
178
178
  .option("--model <model>", "LLM to use (gpt-5, claude-4 or gemini-2.5)")
179
179
  .option("--use-disk", "Save and load chat state from disk")
180
180
  .option("--prompt <string>", "String to pass as user prompt")
181
- .option("--use-triage", "run the model in triage mode, different set of tools")
181
+ .option("--agent-mode <mode>", "Mode of the agent: 'chat' or 'triage' or 'video' or 'code-review' (Defaults to 'chat')")
182
182
  .option("--use-cache", "Use filesystem cache for LLM responses (Claude-only, and will disable streaming)")
183
183
  .option("--reset-chat", "Clear any saved chat state (last-chat.json) before starting")
184
184
  .action(async (options) => {
@@ -186,7 +186,7 @@ async function main() {
186
186
  modelInput: options.model,
187
187
  useDiskForChatState: options.useDisk,
188
188
  prompt: options.prompt,
189
- useTriage: options.useTriage,
189
+ agentMode: options.agentMode,
190
190
  resetChat: options.resetChat,
191
191
  useFSCache: options.useCache,
192
192
  });
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/file-info/adapters/github/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AAE7E,OAAO,EAEL,KAAK,qBAAqB,EAC3B,MAAM,+CAA+C,CAAC;AAEvD,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAIjD,wBAAsB,mBAAmB,CAAC,EACxC,KAAK,EACL,QAAQ,EACR,SAAS,EACT,UAAU,EACV,UAAU,GACX,EAAE;IACD,KAAK,EAAE,qBAAqB,CAAC;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,mBAAmB,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB,GAAG,OAAO,CAAC,UAAU,CAAC,CA2BtB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/file-info/adapters/github/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AAE7E,OAAO,EAEL,KAAK,qBAAqB,EAC3B,MAAM,+CAA+C,CAAC;AAEvD,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAEjD,wBAAsB,mBAAmB,CAAC,EACxC,KAAK,EACL,QAAQ,EACR,SAAS,EACT,UAAU,EACV,UAAU,GACX,EAAE;IACD,KAAK,EAAE,qBAAqB,CAAC;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,mBAAmB,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB,GAAG,OAAO,CAAC,UAAU,CAAC,CA2BtB"}
@@ -6,10 +6,9 @@ const helpers_1 = require("../../../tools/file-operations/shared/helpers");
6
6
  const reader_1 = require("./reader");
7
7
  var reader_2 = require("./reader");
8
8
  Object.defineProperty(exports, "getFileInfoFromGitHub", { enumerable: true, get: function () { return reader_2.getFileInfoFromGitHub; } });
9
- const REPO_OWNER = "empirical-run";
10
9
  async function viewFileUsingGitHub({ input, repoName, apiClient, branchName, baseBranch, }) {
11
10
  const filePath = input.path;
12
- const githubReader = new reader_1.GitHubFileReader(repoName, apiClient, REPO_OWNER);
11
+ const githubReader = new reader_1.GitHubFileReader(repoName, apiClient);
13
12
  const fileData = await githubReader.readFile(filePath, branchName, baseBranch);
14
13
  if (!fileData) {
15
14
  return {