@empiricalrun/test-gen 0.75.0 → 0.77.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. package/CHANGELOG.md +48 -0
  2. package/dist/agent/base/index.d.ts +32 -21
  3. package/dist/agent/base/index.d.ts.map +1 -1
  4. package/dist/agent/base/index.js +100 -57
  5. package/dist/agent/browsing/run.d.ts +1 -2
  6. package/dist/agent/browsing/run.d.ts.map +1 -1
  7. package/dist/agent/browsing/run.js +3 -9
  8. package/dist/agent/browsing/utils.d.ts +2 -9
  9. package/dist/agent/browsing/utils.d.ts.map +1 -1
  10. package/dist/agent/browsing/utils.js +5 -109
  11. package/dist/agent/chat/agent-loop.d.ts +8 -7
  12. package/dist/agent/chat/agent-loop.d.ts.map +1 -1
  13. package/dist/agent/chat/agent-loop.js +7 -18
  14. package/dist/agent/chat/exports.d.ts +9 -6
  15. package/dist/agent/chat/exports.d.ts.map +1 -1
  16. package/dist/agent/chat/exports.js +11 -13
  17. package/dist/agent/chat/index.d.ts +6 -10
  18. package/dist/agent/chat/index.d.ts.map +1 -1
  19. package/dist/agent/chat/index.js +117 -196
  20. package/dist/agent/chat/models.d.ts +0 -2
  21. package/dist/agent/chat/models.d.ts.map +1 -1
  22. package/dist/agent/chat/models.js +12 -26
  23. package/dist/agent/chat/prompt/pw-utils-docs.d.ts +1 -1
  24. package/dist/agent/chat/prompt/pw-utils-docs.d.ts.map +1 -1
  25. package/dist/agent/chat/prompt/pw-utils-docs.js +52 -0
  26. package/dist/agent/chat/prompt/repo.d.ts.map +1 -1
  27. package/dist/agent/chat/prompt/repo.js +11 -22
  28. package/dist/agent/chat/prompt/test-case-def.d.ts +2 -0
  29. package/dist/agent/chat/prompt/test-case-def.d.ts.map +1 -0
  30. package/dist/agent/chat/prompt/test-case-def.js +44 -0
  31. package/dist/agent/chat/state.d.ts +8 -8
  32. package/dist/agent/chat/state.d.ts.map +1 -1
  33. package/dist/agent/chat/state.js +17 -47
  34. package/dist/agent/chat/utils.d.ts +4 -5
  35. package/dist/agent/chat/utils.d.ts.map +1 -1
  36. package/dist/agent/chat/utils.js +15 -9
  37. package/dist/agent/cli.d.ts +11 -0
  38. package/dist/agent/cli.d.ts.map +1 -0
  39. package/dist/agent/cli.js +213 -0
  40. package/dist/agent/code-review/executor/index.d.ts +5 -0
  41. package/dist/agent/code-review/executor/index.d.ts.map +1 -0
  42. package/dist/agent/code-review/executor/index.js +13 -0
  43. package/dist/agent/code-review/index.d.ts +12 -0
  44. package/dist/agent/code-review/index.d.ts.map +1 -0
  45. package/dist/agent/code-review/index.js +159 -0
  46. package/dist/agent/code-review/parser.d.ts +5 -0
  47. package/dist/agent/code-review/parser.d.ts.map +1 -0
  48. package/dist/agent/code-review/parser.js +70 -0
  49. package/dist/agent/code-review/types.d.ts +36 -0
  50. package/dist/agent/code-review/types.d.ts.map +1 -0
  51. package/dist/agent/code-review/types.js +13 -0
  52. package/dist/agent/cua/index.d.ts.map +1 -1
  53. package/dist/agent/cua/index.js +18 -2
  54. package/dist/agent/cua/model.d.ts.map +1 -1
  55. package/dist/agent/cua/model.js +4 -1
  56. package/dist/agent/cua/pw-codegen/pw-pause/index.d.ts.map +1 -1
  57. package/dist/agent/index.d.ts +10 -0
  58. package/dist/agent/index.d.ts.map +1 -0
  59. package/dist/agent/index.js +19 -0
  60. package/dist/agent/triage/index.d.ts +7 -0
  61. package/dist/agent/triage/index.d.ts.map +1 -0
  62. package/dist/agent/triage/index.js +103 -0
  63. package/dist/agent/video-analysis/executor/index.d.ts +5 -0
  64. package/dist/agent/video-analysis/executor/index.d.ts.map +1 -0
  65. package/dist/agent/video-analysis/executor/index.js +10 -0
  66. package/dist/agent/video-analysis/index.d.ts +7 -0
  67. package/dist/agent/video-analysis/index.d.ts.map +1 -0
  68. package/dist/agent/video-analysis/index.js +60 -0
  69. package/dist/artifacts/index.d.ts +1 -1
  70. package/dist/artifacts/index.d.ts.map +1 -1
  71. package/dist/artifacts/index.js +3 -1
  72. package/dist/artifacts/utils.d.ts.map +1 -1
  73. package/dist/bin/index.js +68 -23
  74. package/dist/constants/index.d.ts +14 -0
  75. package/dist/constants/index.d.ts.map +1 -1
  76. package/dist/constants/index.js +33 -1
  77. package/dist/file/server.d.ts +1 -3
  78. package/dist/file/server.d.ts.map +1 -1
  79. package/dist/file/server.js +0 -13
  80. package/dist/file-info/adapters/file-system/index.d.ts.map +1 -1
  81. package/dist/file-info/adapters/file-system/reader.d.ts.map +1 -1
  82. package/dist/file-info/adapters/file-system/reader.js +8 -1
  83. package/dist/file-info/adapters/github/index.d.ts.map +1 -1
  84. package/dist/file-info/adapters/github/index.js +1 -2
  85. package/dist/file-info/adapters/github/reader.d.ts +4 -9
  86. package/dist/file-info/adapters/github/reader.d.ts.map +1 -1
  87. package/dist/file-info/adapters/github/reader.js +166 -134
  88. package/dist/index.d.ts.map +1 -1
  89. package/dist/tools/analyse-video/index.d.ts +5 -0
  90. package/dist/tools/analyse-video/index.d.ts.map +1 -0
  91. package/dist/tools/analyse-video/index.js +50 -0
  92. package/dist/tools/create-pull-request/index.d.ts.map +1 -0
  93. package/dist/tools/{definitions/commit-and-create-pr.js → create-pull-request/index.js} +28 -1
  94. package/dist/tools/create-pull-request/utils.d.ts +21 -0
  95. package/dist/tools/create-pull-request/utils.d.ts.map +1 -0
  96. package/dist/tools/create-pull-request/utils.js +83 -0
  97. package/dist/tools/definitions/{fetch-video-analysis.d.ts → analyse-video.d.ts} +17 -12
  98. package/dist/tools/definitions/analyse-video.d.ts.map +1 -0
  99. package/dist/tools/definitions/analyse-video.js +60 -0
  100. package/dist/tools/definitions/review-pull-request.d.ts +3 -0
  101. package/dist/tools/definitions/review-pull-request.d.ts.map +1 -0
  102. package/dist/tools/definitions/review-pull-request.js +16 -0
  103. package/dist/tools/definitions/str_replace_editor.d.ts +1 -0
  104. package/dist/tools/definitions/str_replace_editor.d.ts.map +1 -1
  105. package/dist/tools/definitions/str_replace_editor.js +4 -1
  106. package/dist/tools/definitions/test-gen-browser.d.ts +0 -3
  107. package/dist/tools/definitions/test-gen-browser.d.ts.map +1 -1
  108. package/dist/tools/definitions/test-gen-browser.js +33 -8
  109. package/dist/tools/delete-file/index.d.ts.map +1 -1
  110. package/dist/tools/delete-file/index.js +1 -19
  111. package/dist/tools/executor/base.d.ts +32 -0
  112. package/dist/tools/executor/base.d.ts.map +1 -0
  113. package/dist/tools/executor/base.js +114 -0
  114. package/dist/tools/executor/index.d.ts +3 -22
  115. package/dist/tools/executor/index.d.ts.map +1 -1
  116. package/dist/tools/executor/index.js +13 -92
  117. package/dist/tools/executor/utils/checkpoint.d.ts +1 -1
  118. package/dist/tools/executor/utils/checkpoint.d.ts.map +1 -1
  119. package/dist/tools/executor/utils/checkpoint.js +6 -2
  120. package/dist/tools/executor/utils/git.d.ts +2 -2
  121. package/dist/tools/executor/utils/git.d.ts.map +1 -1
  122. package/dist/tools/executor/utils/git.js +7 -3
  123. package/dist/tools/executor/utils/index.d.ts.map +1 -1
  124. package/dist/tools/executor/utils/index.js +1 -1
  125. package/dist/tools/fetch-session-diff/index.d.ts +3 -0
  126. package/dist/tools/fetch-session-diff/index.d.ts.map +1 -0
  127. package/dist/tools/fetch-session-diff/index.js +46 -0
  128. package/dist/tools/file-operations/create.d.ts.map +1 -1
  129. package/dist/tools/file-operations/create.js +1 -4
  130. package/dist/tools/file-operations/index.d.ts +2 -1
  131. package/dist/tools/file-operations/index.d.ts.map +1 -1
  132. package/dist/tools/file-operations/index.js +4 -1
  133. package/dist/tools/file-operations/insert.d.ts +1 -2
  134. package/dist/tools/file-operations/insert.d.ts.map +1 -1
  135. package/dist/tools/file-operations/insert.js +1 -4
  136. package/dist/tools/file-operations/replace.d.ts.map +1 -1
  137. package/dist/tools/file-operations/replace.js +1 -4
  138. package/dist/tools/grep/index.d.ts.map +1 -1
  139. package/dist/tools/grep/index.js +18 -11
  140. package/dist/tools/index.d.ts +28 -2
  141. package/dist/tools/index.d.ts.map +1 -1
  142. package/dist/tools/index.js +52 -33
  143. package/dist/tools/merge-conflicts/index.d.ts.map +1 -1
  144. package/dist/tools/merge-conflicts/index.js +1 -1
  145. package/dist/tools/rename-file/index.js +1 -1
  146. package/dist/tools/review-pull-request/index.d.ts +3 -0
  147. package/dist/tools/review-pull-request/index.d.ts.map +1 -0
  148. package/dist/tools/review-pull-request/index.js +89 -0
  149. package/dist/tools/run-test.d.ts.map +1 -1
  150. package/dist/tools/run-test.js +25 -3
  151. package/dist/tools/test-gen-browser.d.ts.map +1 -1
  152. package/dist/tools/test-gen-browser.js +51 -47
  153. package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
  154. package/dist/tools/test-run-fetcher/index.js +4 -14
  155. package/dist/tools/utils/urls.d.ts +5 -0
  156. package/dist/tools/utils/urls.d.ts.map +1 -0
  157. package/dist/tools/utils/urls.js +19 -0
  158. package/dist/tools/view-failed-test-run-report/index.d.ts.map +1 -1
  159. package/dist/tools/view-failed-test-run-report/index.js +3 -15
  160. package/dist/utils/artifact-paths.d.ts +20 -0
  161. package/dist/utils/artifact-paths.d.ts.map +1 -0
  162. package/dist/utils/artifact-paths.js +16 -0
  163. package/dist/utils/dedup-image-fs.d.ts +2 -16
  164. package/dist/utils/dedup-image-fs.d.ts.map +1 -1
  165. package/dist/utils/dedup-image-fs.js +12 -16
  166. package/dist/utils/dedup-image.d.ts +1 -14
  167. package/dist/utils/dedup-image.d.ts.map +1 -1
  168. package/dist/utils/dedup-image.js +7 -62
  169. package/dist/{tools/fetch-video-analysis/local-ffmpeg-client.d.ts → utils/ffmpeg/index.d.ts} +9 -6
  170. package/dist/utils/ffmpeg/index.d.ts.map +1 -0
  171. package/dist/utils/ffmpeg/index.js +415 -0
  172. package/dist/utils/file.d.ts +1 -0
  173. package/dist/utils/file.d.ts.map +1 -1
  174. package/dist/utils/file.js +45 -1
  175. package/dist/utils/find-threshold.d.ts +8 -0
  176. package/dist/utils/find-threshold.d.ts.map +1 -0
  177. package/dist/utils/find-threshold.js +55 -0
  178. package/dist/utils/hash.d.ts +2 -0
  179. package/dist/utils/hash.d.ts.map +1 -0
  180. package/dist/utils/hash.js +24 -0
  181. package/dist/utils/model.d.ts +1 -1
  182. package/dist/utils/model.d.ts.map +1 -1
  183. package/dist/utils/model.js +7 -5
  184. package/dist/utils/repo-tree.d.ts +0 -1
  185. package/dist/utils/repo-tree.d.ts.map +1 -1
  186. package/dist/utils/repo-tree.js +2 -14
  187. package/dist/utils/slug.js +1 -1
  188. package/dist/video-core/agent-orchestrator.d.ts +14 -0
  189. package/dist/video-core/agent-orchestrator.d.ts.map +1 -0
  190. package/dist/video-core/agent-orchestrator.js +78 -0
  191. package/dist/video-core/analysis-server.d.ts +24 -0
  192. package/dist/video-core/analysis-server.d.ts.map +1 -0
  193. package/dist/video-core/analysis-server.js +398 -0
  194. package/dist/video-core/analysis-viewer.html +1374 -0
  195. package/dist/video-core/index.d.ts +44 -0
  196. package/dist/video-core/index.d.ts.map +1 -0
  197. package/dist/video-core/index.js +204 -0
  198. package/dist/video-core/model-limits.d.ts +4 -0
  199. package/dist/video-core/model-limits.d.ts.map +1 -0
  200. package/dist/video-core/model-limits.js +67 -0
  201. package/dist/video-core/storage-manager.d.ts +5 -0
  202. package/dist/video-core/storage-manager.d.ts.map +1 -0
  203. package/dist/video-core/storage-manager.js +55 -0
  204. package/dist/video-core/types.d.ts +13 -0
  205. package/dist/video-core/types.d.ts.map +1 -0
  206. package/dist/video-core/types.js +2 -0
  207. package/dist/video-core/utils.d.ts +25 -0
  208. package/dist/video-core/utils.d.ts.map +1 -0
  209. package/dist/video-core/utils.js +211 -0
  210. package/dist/video-core/xml-parser.d.ts +3 -0
  211. package/dist/video-core/xml-parser.d.ts.map +1 -0
  212. package/dist/video-core/xml-parser.js +27 -0
  213. package/package.json +5 -6
  214. package/tsconfig.tsbuildinfo +1 -1
  215. package/dist/agent/chat/prompt/index.d.ts +0 -5
  216. package/dist/agent/chat/prompt/index.d.ts.map +0 -1
  217. package/dist/agent/chat/prompt/index.js +0 -189
  218. package/dist/agent/chat/utils/tool-calls.d.ts +0 -21
  219. package/dist/agent/chat/utils/tool-calls.d.ts.map +0 -1
  220. package/dist/agent/chat/utils/tool-calls.js +0 -64
  221. package/dist/agent/code-review/prompt.d.ts +0 -2
  222. package/dist/agent/code-review/prompt.d.ts.map +0 -1
  223. package/dist/agent/code-review/prompt.js +0 -19
  224. package/dist/agent/diagnosis-agent/index.d.ts +0 -11
  225. package/dist/agent/diagnosis-agent/index.d.ts.map +0 -1
  226. package/dist/agent/diagnosis-agent/index.js +0 -88
  227. package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts +0 -10
  228. package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts.map +0 -1
  229. package/dist/agent/diagnosis-agent/strict-mode-violation.js +0 -30
  230. package/dist/tools/commit-and-create-pr/index.d.ts.map +0 -1
  231. package/dist/tools/commit-and-create-pr/index.js +0 -83
  232. package/dist/tools/definitions/commit-and-create-pr.d.ts +0 -3
  233. package/dist/tools/definitions/commit-and-create-pr.d.ts.map +0 -1
  234. package/dist/tools/definitions/fetch-video-analysis.d.ts.map +0 -1
  235. package/dist/tools/definitions/fetch-video-analysis.js +0 -61
  236. package/dist/tools/fetch-video-analysis/index.d.ts +0 -5
  237. package/dist/tools/fetch-video-analysis/index.d.ts.map +0 -1
  238. package/dist/tools/fetch-video-analysis/index.js +0 -138
  239. package/dist/tools/fetch-video-analysis/local-ffmpeg-client.d.ts.map +0 -1
  240. package/dist/tools/fetch-video-analysis/local-ffmpeg-client.js +0 -247
  241. package/dist/tools/fetch-video-analysis/open-ai.d.ts +0 -6
  242. package/dist/tools/fetch-video-analysis/open-ai.d.ts.map +0 -1
  243. package/dist/tools/fetch-video-analysis/open-ai.js +0 -37
  244. package/dist/tools/fetch-video-analysis/utils.d.ts +0 -13
  245. package/dist/tools/fetch-video-analysis/utils.d.ts.map +0 -1
  246. package/dist/tools/fetch-video-analysis/utils.js +0 -98
  247. package/dist/tools/fetch-video-analysis/video-analysis.d.ts +0 -7
  248. package/dist/tools/fetch-video-analysis/video-analysis.d.ts.map +0 -1
  249. package/dist/tools/fetch-video-analysis/video-analysis.js +0 -54
  250. package/dist/tools/file-operations/shared/git-helper.d.ts +0 -4
  251. package/dist/tools/file-operations/shared/git-helper.d.ts.map +0 -1
  252. package/dist/tools/file-operations/shared/git-helper.js +0 -29
  253. package/eslint.config.mjs +0 -43
  254. /package/dist/tools/{commit-and-create-pr → create-pull-request}/index.d.ts +0 -0
@@ -0,0 +1,159 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CodeReviewAgent = exports.CodeReviewSeverity = exports.CodeReviewVerdict = void 0;
4
+ const tools_1 = require("../../tools");
5
+ const fetch_session_diff_1 = require("../../tools/fetch-session-diff");
6
+ const base_1 = require("../base");
7
+ const parser_1 = require("./parser");
8
+ const types_1 = require("./types");
9
+ Object.defineProperty(exports, "CodeReviewSeverity", { enumerable: true, get: function () { return types_1.CodeReviewSeverity; } });
10
+ Object.defineProperty(exports, "CodeReviewVerdict", { enumerable: true, get: function () { return types_1.CodeReviewVerdict; } });
11
+ class CodeReviewAgent extends base_1.BaseAgent {
12
+ getTools() {
13
+ const custom = [
14
+ fetch_session_diff_1.fetchSessionDiffTool,
15
+ ...(0, tools_1.textViewToolsForModel)(this.selectedModel),
16
+ ];
17
+ return {
18
+ custom,
19
+ builtInTextEditor: (0, tools_1.hasBuiltInTextEditor)(this.selectedModel),
20
+ };
21
+ }
22
+ getResult() {
23
+ const messages = this.messages || [];
24
+ const lastMessage = messages.length
25
+ ? messages[messages.length - 1]
26
+ : undefined;
27
+ const lastMessageTextPart = lastMessage
28
+ ? lastMessage.parts
29
+ .filter((p) => "text" in p)
30
+ .find((p) => "text" in p && !!p.text)
31
+ : undefined;
32
+ const textPart = lastMessageTextPart;
33
+ const text = textPart?.text.trim();
34
+ if (!text) {
35
+ return undefined;
36
+ }
37
+ return (0, parser_1.convertXmlToV2Format)(text);
38
+ }
39
+ async buildSystemPrompt(repoContext) {
40
+ return `
41
+ You are an expert code reviewer that specializes in reviewing Playwright test code. You are
42
+ provided with tools to fetch diff and pull-request metadata for a code review, where a test has been added, test modified,
43
+ or some configuration has changed.
44
+
45
+ # Your goals
46
+ - Understand the purpose and scope of the code change. You can use available tools to gather context of the change.
47
+ - Identify critical issues that must be fixed before the code can be safely merged.
48
+ - Detect code smells, anti-patterns, and non-deterministic behaviors that reduce test reliability - see below.
49
+ - Call out test data assumptions or lack of clean up.
50
+ - Suggest improvements and best practices to enhance maintainability and readability.
51
+ - Form a definite conclusion on whether the code can be merged or not.
52
+ - Share your findings and conclusion in the structured format shared below
53
+
54
+ # Output format
55
+ - You are expected to return the following sections in your response: last_commit, describe_code_change, line_comments, verdict and version
56
+ - The last commit comes from the session diff tool call along with pull request metadata -- reproduce the commit sha as it is, without any additions (ignore the last commit timestamp) or bullet points
57
+ - describe_code_change: A brief summary of what the code change is doing. This should be 4-6 sentences in a bullet list, formatted in markdown where each bullet must begin with a hyphen followed by a space (- ).
58
+ Do not use any other character for bullets.
59
+ - line_comments: Individual comments for specific issues found in the code, attributed to one or more lines of problematic code. There can be multiple separate issues for each line, share them using separate comments of varying severity.
60
+ Each comment should correspond to one issue and include the following tags:
61
+ - file: The relative path to the file from repository root
62
+ - line-start: Starting line number of the issue
63
+ - line-end: Ending line number of the issue (same as line-start for if issue is in a single line of code)
64
+ - severity: Either "merge-blocking" or "warning". It is possible to have multiple issues in the same set of lines of either nature, in that case add multiple line_comments for those lines.
65
+ Here severity denotes the nature of the issue - any issue that is preventing the code from being safe to merge and should be considered high priority is "merge-blocking". Look for any of the specific
66
+ bits below or other red flags you might see in the code. Each comment should be 1-2 sentences. If no blocking issues are found, a review will have no "merge-blocking" line comments.
67
+ Alternatively, the severity should be "warning" for situations where best practices were not followed or contain minor issues or warnings that can be safely ignored ie
68
+ - message: 1-2 sentences describing the specific issue and suggested fixes or improvements.
69
+ - verdict: "Approved" if code can be merged to production ie there are no "merge-blocking" line-comments or "Rejected" if the issues cannot be safely ignored. "Approved" or
70
+ "Rejected" are the only two possible values for this field.
71
+
72
+ Return these as XML tags with markdown inside them
73
+
74
+ <last_commit>
75
+ ...
76
+ </last_commit>
77
+
78
+ <describe_code_change>
79
+ ...
80
+ </describe_code_change>
81
+
82
+ <line_comments>
83
+ <comment>
84
+ <file>..</file>
85
+ <line-start>..</line-start>
86
+ <line-end>..</line-end>
87
+ <severity>..</severity>
88
+ <message>..</message>
89
+ </comment>
90
+
91
+ <comment>
92
+ <file>..</file>
93
+ <line-start>..</line-start>
94
+ <line-end>..</line-end>
95
+ <severity>..</severity>
96
+ <message>..</message>
97
+ </comment>
98
+ </line_comments>
99
+
100
+ <verdict>
101
+ ...
102
+ </verdict>
103
+
104
+ # Severity: Merge blocking
105
+
106
+ ## Functionality regression
107
+ - If the change is modifying an existing test, we need to ensure the functionality of the original test
108
+ is maintained in the new version. No hacking our way to get a green test!
109
+
110
+ ## Exception handling
111
+ - Any form of try-catch or exception handling is a code smell in test code. If there's an
112
+ exception, the test should fail
113
+
114
+ ## Conditionals
115
+ - Any conditionals (if, switch, ternary) in test code is a code smell. Tests are expected to be
116
+ deterministic. If you see conditionals, check if there's a comment explaining why it's needed.
117
+ Critically review the comment -- if it's not convincing, call it out as a code smell.
118
+
119
+ ## Playwright common mistakes
120
+ - Don't use waitForLoadState or networkidle - these are not required since Playwright auto-waits after navigations. networkidle
121
+ can cause failures because modern web apps often have background network activity, which never settles.
122
+ - Use locators instead of selectors: waitForSelector, $, $$ are bad - use locators instead (e.g. locator.waitFor)
123
+ - If the test relies on some Playwright APIs that do not auto-wait (e.g. isVisible(), count()), we need to ensure
124
+ they are used AFTER some action that ensures the page has loaded. If nothing, at least it should have a waitForTimeout
125
+
126
+ ## Deprecated patterns
127
+ - test.describe.serial(...) is not prefered: use test.describe.configure({ mode: "serial" }) if the tests need to be serial
128
+
129
+ ## Repo conventions
130
+ - Tests are located in files in the tests/ directory (e.g. in tests/example.spec.ts)
131
+ - Helper methods (that are imported in the tests) should be in pages/ directory (e.g. pages/common.ts)
132
+ - Helper methods should be functional - not classes (conventional class-based page object models are NOT recommended - use functions!)
133
+
134
+ # Severity: Warning
135
+
136
+ ## Call out test data assumptions
137
+ - If new test data is created (e.g. creating a new entity in the app, doing some actions on it) - it should be cleaned up
138
+ at the end of the test. If not, call it out.
139
+ - If the test data cannot be cleaned up, are we using some random names to ensure no conflicts in future test runs?
140
+ - If the test assumes some data exists (e.g. a user with a specific email) - call it out. It might fail across other
141
+ environments.
142
+ - No hard coded URLs - use relative URLs instead - that can work across environments.
143
+ - Dependency on static data that can change across environments (e.g. number of rows in a table) should be avoided.
144
+
145
+ ## Remove debug artifacts
146
+ - If there are console.logs or page.screenshot usage, call it out.
147
+
148
+ ## Extra waits
149
+ - Wait for timeout for static values are bad, but sometimes needed. Some apps are flaky and need additional waiting.
150
+
151
+ ## Element locators
152
+ - CSS selectors can be brittle - prefer user facing selectors like getByRole, getByText
153
+
154
+ # Repo context
155
+ ${repoContext}
156
+ `;
157
+ }
158
+ }
159
+ exports.CodeReviewAgent = CodeReviewAgent;
@@ -0,0 +1,5 @@
1
+ import { type CodeReviewResultV2 } from "./types";
2
+ export type { CodeReviewLineComment, CodeReviewResultV0, CodeReviewResultV1, CodeReviewResultV2, } from "./types";
3
+ export { CodeReviewSeverity, CodeReviewVerdict } from "./types";
4
+ export declare function convertXmlToV2Format(output: string): CodeReviewResultV2;
5
+ //# sourceMappingURL=parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,kBAAkB,EAGxB,MAAM,SAAS,CAAC;AAEjB,YAAY,EACV,qBAAqB,EACrB,kBAAkB,EAClB,kBAAkB,EAClB,kBAAkB,GACnB,MAAM,SAAS,CAAC;AAEjB,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAC;AAchE,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,MAAM,GAAG,kBAAkB,CAmFvE"}
@@ -0,0 +1,70 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CodeReviewVerdict = exports.CodeReviewSeverity = void 0;
4
+ exports.convertXmlToV2Format = convertXmlToV2Format;
5
+ const types_1 = require("./types");
6
+ var types_2 = require("./types");
7
+ Object.defineProperty(exports, "CodeReviewSeverity", { enumerable: true, get: function () { return types_2.CodeReviewSeverity; } });
8
+ Object.defineProperty(exports, "CodeReviewVerdict", { enumerable: true, get: function () { return types_2.CodeReviewVerdict; } });
9
+ function inferVerdictFromCommentsV2(lineComments) {
10
+ const hasMergeBlockingIssues = lineComments.some((comment) => comment.severity === types_1.CodeReviewSeverity.MergeBlocking);
11
+ return hasMergeBlockingIssues
12
+ ? types_1.CodeReviewVerdict.Rejected
13
+ : types_1.CodeReviewVerdict.Approved;
14
+ }
15
+ function convertXmlToV2Format(output) {
16
+ const lastCommitMatch = output.match(/<last_commit>([\s\S]*?)<\/last_commit>/i);
17
+ const codeChangeMatch = output.match(/<describe_code_change>([\s\S]*?)<\/describe_code_change>/i);
18
+ const verdictMatch = output.match(/<verdict>([\s\S]*?)<\/verdict>/i);
19
+ const lineComments = [];
20
+ const lineCommentsMatch = output.match(/<line_comments>([\s\S]*?)<\/line_comments>/i);
21
+ if (lineCommentsMatch) {
22
+ const commentsContent = lineCommentsMatch[1];
23
+ const commentMatches = commentsContent.match(/<comment>([\s\S]*?)<\/comment>/gi);
24
+ if (commentMatches) {
25
+ for (const commentMatch of commentMatches) {
26
+ const fileMatch = commentMatch.match(/<file>([\s\S]*?)<\/file>/i);
27
+ const lineStartMatch = commentMatch.match(/<line-start>([\s\S]*?)<\/line-start>/i);
28
+ const lineEndMatch = commentMatch.match(/<line-end>([\s\S]*?)<\/line-end>/i);
29
+ const severityMatch = commentMatch.match(/<severity>([\s\S]*?)<\/severity>/i);
30
+ const messageMatch = commentMatch.match(/<message>([\s\S]*?)<\/message>/i);
31
+ if (fileMatch &&
32
+ lineStartMatch &&
33
+ lineEndMatch &&
34
+ severityMatch &&
35
+ messageMatch) {
36
+ const severityText = severityMatch[1].trim();
37
+ const severity = severityText === "merge-blocking"
38
+ ? types_1.CodeReviewSeverity.MergeBlocking
39
+ : severityText === "warning"
40
+ ? types_1.CodeReviewSeverity.Warning
41
+ : null;
42
+ if (severity !== null) {
43
+ lineComments.push({
44
+ file: fileMatch[1].trim(),
45
+ line_start: parseInt(lineStartMatch[1].trim(), 10),
46
+ line_end: parseInt(lineEndMatch[1].trim(), 10),
47
+ severity: severity,
48
+ message: messageMatch[1].trim(),
49
+ });
50
+ }
51
+ }
52
+ }
53
+ }
54
+ }
55
+ const lastCommit = lastCommitMatch[1].trim();
56
+ const describeCodeChange = codeChangeMatch[1].trim();
57
+ const verdict = verdictMatch?.[1]?.trim();
58
+ const finalVerdict = verdict
59
+ ? verdict === types_1.CodeReviewVerdict.Approved
60
+ ? types_1.CodeReviewVerdict.Approved
61
+ : types_1.CodeReviewVerdict.Rejected
62
+ : inferVerdictFromCommentsV2(lineComments);
63
+ return {
64
+ version: "2.0",
65
+ last_commit: lastCommit,
66
+ describe_code_change: describeCodeChange,
67
+ line_comments: lineComments,
68
+ verdict: finalVerdict,
69
+ };
70
+ }
@@ -0,0 +1,36 @@
1
+ export declare enum CodeReviewVerdict {
2
+ Approved = "Approved",
3
+ Rejected = "Rejected"
4
+ }
5
+ export declare enum CodeReviewSeverity {
6
+ MergeBlocking = "merge-blocking",
7
+ Warning = "warning"
8
+ }
9
+ export type CodeReviewResultV1 = {
10
+ version: "1.0";
11
+ last_commit: string | null;
12
+ code_review_comments: string | null;
13
+ describe_code_change: string | null;
14
+ merge_blocking_issues: string | null;
15
+ best_practices_and_warnings: string | null;
16
+ verdict: "Approved" | "Rejected" | null;
17
+ };
18
+ export type CodeReviewLineComment = {
19
+ file: string;
20
+ line_start: number;
21
+ line_end: number;
22
+ severity: CodeReviewSeverity;
23
+ message: string;
24
+ };
25
+ export type CodeReviewResultV2 = {
26
+ version: "2.0";
27
+ last_commit: string;
28
+ describe_code_change: string;
29
+ line_comments: CodeReviewLineComment[];
30
+ verdict: CodeReviewVerdict;
31
+ };
32
+ export type CodeReviewResultV0 = {
33
+ version: "0.1";
34
+ result: string;
35
+ };
36
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/types.ts"],"names":[],"mappings":"AAAA,oBAAY,iBAAiB;IAC3B,QAAQ,aAAa;IACrB,QAAQ,aAAa;CACtB;AAED,oBAAY,kBAAkB;IAC5B,aAAa,mBAAmB;IAChC,OAAO,YAAY;CACpB;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,OAAO,EAAE,KAAK,CAAC;IACf,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,qBAAqB,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,2BAA2B,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,OAAO,EAAE,UAAU,GAAG,UAAU,GAAG,IAAI,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,kBAAkB,CAAC;IAC7B,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,OAAO,EAAE,KAAK,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,aAAa,EAAE,qBAAqB,EAAE,CAAC;IACvC,OAAO,EAAE,iBAAiB,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,OAAO,EAAE,KAAK,CAAC;IAEf,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC"}
@@ -0,0 +1,13 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CodeReviewSeverity = exports.CodeReviewVerdict = void 0;
4
+ var CodeReviewVerdict;
5
+ (function (CodeReviewVerdict) {
6
+ CodeReviewVerdict["Approved"] = "Approved";
7
+ CodeReviewVerdict["Rejected"] = "Rejected";
8
+ })(CodeReviewVerdict || (exports.CodeReviewVerdict = CodeReviewVerdict = {}));
9
+ var CodeReviewSeverity;
10
+ (function (CodeReviewSeverity) {
11
+ CodeReviewSeverity["MergeBlocking"] = "merge-blocking";
12
+ CodeReviewSeverity["Warning"] = "warning";
13
+ })(CodeReviewSeverity || (exports.CodeReviewSeverity = CodeReviewSeverity = {}));
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAmClC,MAAM,MAAM,kBAAkB,GAAG,KAAK,CAClC;IACE,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,iBAAiB,CAAC;CAC/B,CACJ,CAAC;AAEF,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,8BAAsC,GACvC,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,8BAA8B,CAAC,EAAE,OAAO,CAAC;CAC1C,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,KAAK,EAAE,KAAK,CAAC;CACd,CAAC,CAuND"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAoDlC,MAAM,MAAM,kBAAkB,GAAG,KAAK,CAClC;IACE,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,iBAAiB,CAAC;CAC/B,CACJ,CAAC;AAEF,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,8BAAsC,GACvC,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,8BAA8B,CAAC,EAAE,OAAO,CAAC;CAC1C,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,KAAK,EAAE,KAAK,CAAC;CACd,CAAC,CAuND"}
@@ -28,6 +28,22 @@ function artifact(screenshot, name) {
28
28
  data: Buffer.from(screenshot, "base64"),
29
29
  };
30
30
  }
31
+ function stateOfTheBrowser(page) {
32
+ const browserContext = page.context();
33
+ const pages = browserContext.pages();
34
+ return `
35
+ ## Browser window
36
+
37
+ ### Current page (what you are working on)
38
+ Current page URL: ${page.url()}
39
+ Current page title: ${page.title()}
40
+
41
+ ### All pages
42
+ Number of open pages: ${pages.length}
43
+
44
+ URLs and titles:
45
+ ${pages.map((p) => ` - ${p.url()} - ${p.title()}`).join("\n")}`;
46
+ }
31
47
  async function createTestUsingComputerUseAgent({ page, task, trace, prefersElementFromPointCodegen = false, }) {
32
48
  const codegen = await getCodegenInstance(prefersElementFromPointCodegen);
33
49
  await codegen.initialize(page);
@@ -53,7 +69,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, prefersEleme
53
69
  content: [
54
70
  {
55
71
  type: "input_text",
56
- text: `Task to execute: ${task}\n\nCurrent page URL: ${page.url()}`,
72
+ text: `Task to execute: ${task}\n\n${stateOfTheBrowser(page)}`,
57
73
  },
58
74
  {
59
75
  type: "input_image",
@@ -177,7 +193,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, prefersEleme
177
193
  content: [
178
194
  {
179
195
  type: "input_text",
180
- text: `Action executed: ${executedActionSummary || "None"}\nCurrent page URL: ${page.url()}`,
196
+ text: `Action executed: ${executedActionSummary || "None"}\n\n${stateOfTheBrowser(page)}`,
181
197
  },
182
198
  ],
183
199
  },
@@ -1 +1 @@
1
- {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;;;EAQrE"}
1
+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AAiClD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;;;EAQrE"}
@@ -9,7 +9,10 @@ For example, if the user message says "Click on Submit button", then
9
9
  you click on the submit button -- even if it looks like a scary action.
10
10
 
11
11
  If you have been asked to retrieve text or verify something on the UI, then communicate
12
- that in your responses so that the user can see your thinking process in its entirety.`;
12
+ that in your responses so that the user can see your thinking process in its entirety.
13
+
14
+ Your work is limited to the current browser page (tab) that you are provided with. You will
15
+ have to conclude your actions before the user can ask you to do actions on different pages (tabs).`;
13
16
  const pageGotoTool = {
14
17
  type: "function",
15
18
  name: "page_goto",
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/agent/cua/pw-codegen/pw-pause/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAGvC,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAKjD,OAAO,EAAE,2BAA2B,EAAE,sBAAsB,EAAE,MAAM,SAAS,CAAC;AAE9E,wBAAsB,kBAAkB,CAAC,OAAO,EAAE,MAAM,oBAwCvD;AAED,qBAAa,sBAAuB,YAAW,qBAAqB;IAClE,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,IAAI,CAAmB;IAC/B,OAAO,CAAC,MAAM,CAA4C;IAC1D,OAAO,CAAC,iBAAiB,CAAqB;;YAMhC,QAAQ;IAUhB,UAAU,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBrC,sBAAsB,CAAC,IAAI,EAAE,IAAI;IAqBjC,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAI7B,oBAAoB,IAAI,OAAO,CAAC,MAAM,CAAC;CAU9C"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/agent/cua/pw-codegen/pw-pause/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAGvC,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAMjD,OAAO,EAAE,2BAA2B,EAAE,sBAAsB,EAAE,MAAM,SAAS,CAAC;AAE9E,wBAAsB,kBAAkB,CAAC,OAAO,EAAE,MAAM,oBAwCvD;AAED,qBAAa,sBAAuB,YAAW,qBAAqB;IAClE,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,IAAI,CAAmB;IAC/B,OAAO,CAAC,MAAM,CAA4C;IAC1D,OAAO,CAAC,iBAAiB,CAAqB;;YAMhC,QAAQ;IAUhB,UAAU,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBrC,sBAAsB,CAAC,IAAI,EAAE,IAAI;IAqBjC,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAI7B,oBAAoB,IAAI,OAAO,CAAC,MAAM,CAAC;CAU9C"}
@@ -0,0 +1,10 @@
1
+ import type { AgentModeEnum } from "@empiricalrun/shared-types";
2
+ import { type AgentParams, BaseAgent } from "./base";
3
+ import { ChatAgent } from "./chat";
4
+ import { CodeReviewAgent } from "./code-review";
5
+ import { TriageAgent } from "./triage";
6
+ import { VideoAnalysisAgent } from "./video-analysis";
7
+ export declare const MODE_TO_AGENT_MAP: Record<AgentModeEnum, (params: AgentParams) => BaseAgent>;
8
+ export { BaseAgent, ChatAgent, CodeReviewAgent, TriageAgent, VideoAnalysisAgent, };
9
+ export type { AgentParams };
10
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAEhE,OAAO,EAAE,KAAK,WAAW,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACrD,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEtD,eAAO,MAAM,iBAAiB,EAAE,MAAM,CACpC,aAAa,EACb,CAAC,MAAM,EAAE,WAAW,KAAK,SAAS,CAMnC,CAAC;AAEF,OAAO,EACL,SAAS,EACT,SAAS,EACT,eAAe,EACf,WAAW,EACX,kBAAkB,GACnB,CAAC;AACF,YAAY,EAAE,WAAW,EAAE,CAAC"}
@@ -0,0 +1,19 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.VideoAnalysisAgent = exports.TriageAgent = exports.CodeReviewAgent = exports.ChatAgent = exports.BaseAgent = exports.MODE_TO_AGENT_MAP = void 0;
4
+ const base_1 = require("./base");
5
+ Object.defineProperty(exports, "BaseAgent", { enumerable: true, get: function () { return base_1.BaseAgent; } });
6
+ const chat_1 = require("./chat");
7
+ Object.defineProperty(exports, "ChatAgent", { enumerable: true, get: function () { return chat_1.ChatAgent; } });
8
+ const code_review_1 = require("./code-review");
9
+ Object.defineProperty(exports, "CodeReviewAgent", { enumerable: true, get: function () { return code_review_1.CodeReviewAgent; } });
10
+ const triage_1 = require("./triage");
11
+ Object.defineProperty(exports, "TriageAgent", { enumerable: true, get: function () { return triage_1.TriageAgent; } });
12
+ const video_analysis_1 = require("./video-analysis");
13
+ Object.defineProperty(exports, "VideoAnalysisAgent", { enumerable: true, get: function () { return video_analysis_1.VideoAnalysisAgent; } });
14
+ exports.MODE_TO_AGENT_MAP = {
15
+ triage: (params) => new triage_1.TriageAgent(params),
16
+ chat: (params) => new chat_1.ChatAgent(params),
17
+ video: (params) => new video_analysis_1.VideoAnalysisAgent(params),
18
+ "code-review": (params) => new code_review_1.CodeReviewAgent(params),
19
+ };
@@ -0,0 +1,7 @@
1
+ import type { ToolsForLLM } from "@empiricalrun/shared-types";
2
+ import { BaseAgent } from "../base";
3
+ export declare class TriageAgent extends BaseAgent {
4
+ protected getTools(): ToolsForLLM;
5
+ protected buildSystemPrompt(repoContext?: string): Promise<string>;
6
+ }
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/triage/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAoB9D,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAGpC,qBAAa,WAAY,SAAQ,SAAS;IACxC,SAAS,CAAC,QAAQ,IAAI,WAAW;cA2BjB,iBAAiB,CAAC,WAAW,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAoEzE"}
@@ -0,0 +1,103 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.TriageAgent = void 0;
4
+ const tools_1 = require("../../tools");
5
+ const base_1 = require("../base");
6
+ const test_case_def_1 = require("../chat/prompt/test-case-def");
7
+ class TriageAgent extends base_1.BaseAgent {
8
+ getTools() {
9
+ const tools = [
10
+ // Common tools
11
+ tools_1.runTestTool,
12
+ tools_1.grepTool,
13
+ tools_1.fetchDiagnosisReportTool,
14
+ tools_1.listEnvironmentsTool,
15
+ tools_1.downloadBuildTool,
16
+ tools_1.fetchFileTool,
17
+ tools_1.traceDotZipTool,
18
+ // Triage specific tools
19
+ tools_1.listIssuesTool,
20
+ tools_1.createIssueTool,
21
+ tools_1.updateIssueTool,
22
+ tools_1.viewFailedTestRunReportTool,
23
+ tools_1.analyseVideo,
24
+ tools_1.fetchLastSuccessfulTestRunTool,
25
+ tools_1.sendTriageSummaryTool,
26
+ // Model-specific tools
27
+ ...(0, tools_1.textEditorToolsForModel)(this.selectedModel),
28
+ ];
29
+ return {
30
+ custom: tools,
31
+ builtInTextEditor: (0, tools_1.hasBuiltInTextEditor)(this.selectedModel),
32
+ };
33
+ }
34
+ async buildSystemPrompt(repoContext) {
35
+ if (!repoContext) {
36
+ throw new Error(`Triage agent needs repo context`);
37
+ }
38
+ return `
39
+ You are a helpful assistant that help with analysis of Playwright test reports. Your goal is to help the user analyse a test report and identify the root cause of the test failures, and log the unique failuers as issues so that the user can keep a track and fix them.
40
+
41
+ You are working on a test code repository that contains Playwright tests and other related files. Your working directory has been checked out on a git branch.
42
+
43
+ # Your capabilities
44
+
45
+ When provided with a test report URL, you can use these capabilities to triage the test failures in the report:
46
+
47
+ ## Fetch and view the test report
48
+
49
+ - Use viewFailedTestRunReportTool tool to get more information about all tests that failed in the run
50
+
51
+ ## Analyze each test case
52
+
53
+ You are provided with multiple tools to help you understand each failing test case better. Understanding each test case allows you to identify the root cause and create more accurate issues. These tools can also be called in parallel.
54
+
55
+ - Each test case generates artifacts: images, videos, playwright trace zip file. With your tools, you can fetch image, analyze the video frames and trace.zip to find out failing network requests and console logs
56
+ - Each video represents one browser tab of the test case (so multiple videos implies the test had multiple tabs or browser windows)
57
+ - Read the error stack and test file to understand what the test is doing
58
+ - Fetch the last successful run of the test case to understand the earlier flow. This report will contain image and video URLs that can also be analyzed with your available tools.
59
+ - If you think the issue is explained by a timing or intermittent issue, you can also re-run the test case
60
+
61
+ ## Listing, updating and creating issues
62
+
63
+ - Test failures will become issues that can be assigned to developers to fix the app or update the test. Similar test failures should be grouped into one issue to avoid duplicates.
64
+ - Before you create a new issue, you MUST list existing issues that have been created for this repo, to avoid creating duplicate issues.
65
+ - If you find duplicates, use the update issue tool to update the existing issue with new information from the test report
66
+ - When you are creating a new issue, use the description and title to clearly call out the error reason (share error stack, error message, relevant lines of code, etc.) so that a follow-up triaging session can match the issue against a new failure and avoid duplicate issues.
67
+ - What makes a good issue: accurate classification between app or test issue, accurate grouping, and a good auto-fix prompt - see more about this below
68
+
69
+ ## Classify tests as app or test issues
70
+ - An app issue is an issue in the application that is being tested. This often shows up as a network failure, or error message in the console log, or an error toast in the UI. Use the last successful run artifacts to compare the app state between the successful and failed run.
71
+ - A test issue is an issue in the test code. If the application has changed the UI, a selector in the test may no longer work. Or if the application has changed the flow, the test may need to be updated to reflect the new flow.
72
+
73
+ ## Grouping test failures
74
+ - Before you create issues, group the failures together so that we create useful issues
75
+ - What makes a good group: failures that have the same root cause - because of similar error stacks - and can be fixed with the same change to the app or test
76
+ - Both "type of failure" and "proposed fix" are important to determine if two failures belong to the same group
77
+ - Example: if two tests fail with strict mode violations, but for 2 different selectors, they are different groups because the proposed fixes are different
78
+ - What does not make a good group: the location of the test or the name of the test. Two tests that are located in the same file or have similar names should ONLY be grouped together if the root cause of failures is same
79
+
80
+ ## Crafting a good auto-fix prompt for test issues
81
+ - When you create issues with type "test", you are expected to share a test_issue_prompt which is your proposed change to the test to adapt to the new app state.
82
+ - This prompt is handed over to another agent to update the test code, and your prompt is the ONLY context that the agent has to update the test.
83
+ - Therefore, your prompt must contain:
84
+ - Which test cases to be updated - with test and describe block names, file name
85
+ - What failed in the test - error message, error stack, relevant lines of code, or bits to locate the failure
86
+ - Your suggested change to the test
87
+ - Emphasis to re-run the test after making the change, to ensure that the change works
88
+
89
+ ## Conclusion
90
+ - After you are done with triaging and creating issues, summarize the work done with a list of created issues for the user to review. Don't be too verbose - a bullet list of issues created or updated, with a small description is enough.
91
+ - It is important to show proof that you have gone through all of the failures in the test run report, so use numbers to call out 1. total failures, and 2. failures associated with each issue.
92
+
93
+ ${test_case_def_1.testCasesDefinitionPrompt}
94
+
95
+ # Repo context
96
+ ${repoContext}
97
+
98
+ # Reference
99
+ Today's date is ${new Date().toDateString()}
100
+ `;
101
+ }
102
+ }
103
+ exports.TriageAgent = TriageAgent;
@@ -0,0 +1,5 @@
1
+ import { BaseToolExecutor, BaseToolExecutorProps } from "../../../tools/executor/base";
2
+ export declare class VideoAnalysisToolExecutor extends BaseToolExecutor {
3
+ constructor(params: Omit<BaseToolExecutorProps, "tools">);
4
+ }
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/agent/video-analysis/executor/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,gBAAgB,EAChB,qBAAqB,EACtB,MAAM,8BAA8B,CAAC;AAEtC,qBAAa,yBAA0B,SAAQ,gBAAgB;gBACjD,MAAM,EAAE,IAAI,CAAC,qBAAqB,EAAE,OAAO,CAAC;CAGzD"}
@@ -0,0 +1,10 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.VideoAnalysisToolExecutor = void 0;
4
+ const base_1 = require("../../../tools/executor/base");
5
+ class VideoAnalysisToolExecutor extends base_1.BaseToolExecutor {
6
+ constructor(params) {
7
+ super({ ...params, tools: [] });
8
+ }
9
+ }
10
+ exports.VideoAnalysisToolExecutor = VideoAnalysisToolExecutor;
@@ -0,0 +1,7 @@
1
+ import type { ToolsForLLM } from "@empiricalrun/shared-types";
2
+ import { BaseAgent } from "../base";
3
+ export declare class VideoAnalysisAgent extends BaseAgent {
4
+ protected getTools(): ToolsForLLM;
5
+ protected buildSystemPrompt(): Promise<string>;
6
+ }
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/video-analysis/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAC9D,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,qBAAa,kBAAmB,SAAQ,SAAS;IAC/C,SAAS,CAAC,QAAQ,IAAI,WAAW;cAOjB,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC;CA+CrD"}
@@ -0,0 +1,60 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.VideoAnalysisAgent = void 0;
4
+ const base_1 = require("../base");
5
+ class VideoAnalysisAgent extends base_1.BaseAgent {
6
+ getTools() {
7
+ return {
8
+ custom: [],
9
+ builtInTextEditor: false,
10
+ };
11
+ }
12
+ async buildSystemPrompt() {
13
+ return `
14
+ You are a video analysis agent specialized in analyzing screen recordings and user interface interactions.
15
+
16
+ You will receive individual video frames with their Frame IDs as user input for detailed visual analysis.
17
+
18
+ When analyzing the provided frames:
19
+ 1. Analyze each frame for UI elements, user actions, and state changes
20
+ 2. Provide specific observations about what's happening in each frame
21
+ 3. The Summary should be in the given XML format
22
+
23
+ Your analysis should be:
24
+ - Detailed and specific about UI elements and interactions
25
+ - Sequential, following the flow of actions in the video
26
+
27
+ CRITICAL: You MUST use the EXACT frame IDs that are provided with each frame. Each frame will be labeled with text like "Frame ID: frame_000000" - use this exact ID in your <key_frame> tags.
28
+
29
+ Note: The Last frame from the attachments should always be included in the <key_frame> tag
30
+
31
+ # Output format
32
+ <summary>
33
+ <section>
34
+ <key_frame>frame_id</key_frame>
35
+ <description>text description of the frame</description>
36
+ </section>
37
+ <section>
38
+ <key_frame>frame_id</key_frame>
39
+ <description>text description of the frame</description>
40
+ </section>
41
+ .
42
+ .
43
+ .
44
+ <section>
45
+ <key_frame>frame_id</key_frame>
46
+ <description>text description of the frame</description>
47
+ </section>
48
+ </summary>
49
+
50
+ ## Example
51
+ If the attachments include "frame_000000.png", "frame_000078.png", and "frame_000156.png", then:
52
+ - To reference the first frame, use: <key_frame>frame_000000</key_frame>
53
+ - To reference the second frame, use: <key_frame>frame_000078</key_frame>
54
+ - To reference the third frame, use: <key_frame>frame_000156</key_frame>
55
+
56
+ WRONG: <key_frame>frame_000001</key_frame> (unless there's actually a file named frame_000001.png in the attachments)
57
+ `;
58
+ }
59
+ }
60
+ exports.VideoAnalysisAgent = VideoAnalysisAgent;
@@ -46,7 +46,7 @@ export declare class UploadArtifactsQueue {
46
46
  private artifactResults;
47
47
  private uploadPromise;
48
48
  constructor(baseRepoPath: string, toolCallId: string);
49
- addTask(artifacts: ArtifactInput[]): Promise<void>;
49
+ addTask(artifacts: ArtifactInput[]): Promise<Artifact[]>;
50
50
  waitForCompletion(): Promise<Artifact[]>;
51
51
  }
52
52
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/artifacts/index.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,QAAQ,EACR,aAAa,EAId,MAAM,4BAA4B,CAAC;AAwBpC,wBAAgB,2BAA2B,uBAM1C;AA6HD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,aAAa,EAAE,EACvB,OAAO,EAAE,MAAM,EACf,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAsErB;AAED,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,eAAe,CAAkB;IACzC,OAAO,CAAC,aAAa,CAA8B;gBAEvC,YAAY,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM;IAKvC,OAAO,CAAC,SAAS,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAelD,iBAAiB,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;CAMtD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/artifacts/index.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,QAAQ,EACR,aAAa,EAId,MAAM,4BAA4B,CAAC;AAwBpC,wBAAgB,2BAA2B,uBAM1C;AA6HD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,aAAa,EAAE,EACvB,OAAO,EAAE,MAAM,EACf,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAsErB;AAED,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,eAAe,CAAkB;IACzC,OAAO,CAAC,aAAa,CAAoC;gBAE7C,YAAY,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM;IAK7C,OAAO,CAAC,SAAS,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAiBlD,iBAAiB,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;CAMtD"}