@empiricalrun/test-gen 0.76.0 → 0.77.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. package/CHANGELOG.md +33 -0
  2. package/dist/agent/base/index.d.ts +25 -21
  3. package/dist/agent/base/index.d.ts.map +1 -1
  4. package/dist/agent/base/index.js +48 -37
  5. package/dist/agent/browsing/run.d.ts +1 -2
  6. package/dist/agent/browsing/run.d.ts.map +1 -1
  7. package/dist/agent/browsing/run.js +3 -9
  8. package/dist/agent/browsing/utils.d.ts +2 -9
  9. package/dist/agent/browsing/utils.d.ts.map +1 -1
  10. package/dist/agent/browsing/utils.js +5 -109
  11. package/dist/agent/chat/agent-loop.d.ts +5 -5
  12. package/dist/agent/chat/agent-loop.d.ts.map +1 -1
  13. package/dist/agent/chat/agent-loop.js +3 -8
  14. package/dist/agent/chat/exports.d.ts +5 -4
  15. package/dist/agent/chat/exports.d.ts.map +1 -1
  16. package/dist/agent/chat/exports.js +4 -7
  17. package/dist/agent/chat/index.d.ts +2 -2
  18. package/dist/agent/chat/index.d.ts.map +1 -1
  19. package/dist/agent/chat/index.js +23 -35
  20. package/dist/agent/chat/models.d.ts +0 -2
  21. package/dist/agent/chat/models.d.ts.map +1 -1
  22. package/dist/agent/chat/models.js +12 -26
  23. package/dist/agent/chat/prompt/pw-utils-docs.d.ts +1 -1
  24. package/dist/agent/chat/prompt/pw-utils-docs.d.ts.map +1 -1
  25. package/dist/agent/chat/prompt/pw-utils-docs.js +52 -0
  26. package/dist/agent/chat/prompt/repo.d.ts.map +1 -1
  27. package/dist/agent/chat/prompt/repo.js +11 -22
  28. package/dist/agent/chat/prompt/test-case-def.d.ts +2 -0
  29. package/dist/agent/chat/prompt/test-case-def.d.ts.map +1 -0
  30. package/dist/agent/chat/prompt/test-case-def.js +44 -0
  31. package/dist/agent/chat/state.d.ts +7 -6
  32. package/dist/agent/chat/state.d.ts.map +1 -1
  33. package/dist/agent/chat/state.js +15 -45
  34. package/dist/agent/chat/utils.d.ts +2 -2
  35. package/dist/agent/chat/utils.d.ts.map +1 -1
  36. package/dist/agent/chat/utils.js +14 -7
  37. package/dist/agent/cli.d.ts.map +1 -1
  38. package/dist/agent/cli.js +62 -58
  39. package/dist/agent/code-review/executor/index.d.ts +5 -0
  40. package/dist/agent/code-review/executor/index.d.ts.map +1 -0
  41. package/dist/agent/code-review/executor/index.js +13 -0
  42. package/dist/agent/code-review/index.d.ts +8 -3
  43. package/dist/agent/code-review/index.d.ts.map +1 -1
  44. package/dist/agent/code-review/index.js +115 -21
  45. package/dist/agent/code-review/parser.d.ts +5 -0
  46. package/dist/agent/code-review/parser.d.ts.map +1 -0
  47. package/dist/agent/code-review/parser.js +70 -0
  48. package/dist/agent/code-review/types.d.ts +36 -0
  49. package/dist/agent/code-review/types.d.ts.map +1 -0
  50. package/dist/agent/code-review/types.js +13 -0
  51. package/dist/agent/cua/index.d.ts.map +1 -1
  52. package/dist/agent/cua/index.js +18 -2
  53. package/dist/agent/cua/model.d.ts.map +1 -1
  54. package/dist/agent/cua/model.js +4 -1
  55. package/dist/agent/cua/pw-codegen/pw-pause/index.d.ts.map +1 -1
  56. package/dist/agent/triage/index.d.ts +2 -2
  57. package/dist/agent/triage/index.d.ts.map +1 -1
  58. package/dist/agent/triage/index.js +8 -7
  59. package/dist/agent/video-analysis/executor/index.d.ts +5 -0
  60. package/dist/agent/video-analysis/executor/index.d.ts.map +1 -0
  61. package/dist/agent/video-analysis/executor/index.js +10 -0
  62. package/dist/agent/video-analysis/index.d.ts +2 -2
  63. package/dist/agent/video-analysis/index.d.ts.map +1 -1
  64. package/dist/agent/video-analysis/index.js +38 -13
  65. package/dist/artifacts/index.d.ts +1 -1
  66. package/dist/artifacts/index.d.ts.map +1 -1
  67. package/dist/artifacts/index.js +3 -1
  68. package/dist/artifacts/utils.d.ts.map +1 -1
  69. package/dist/bin/index.js +66 -21
  70. package/dist/constants/index.d.ts +14 -0
  71. package/dist/constants/index.d.ts.map +1 -1
  72. package/dist/constants/index.js +33 -1
  73. package/dist/file/server.d.ts +1 -3
  74. package/dist/file/server.d.ts.map +1 -1
  75. package/dist/file/server.js +0 -13
  76. package/dist/file-info/adapters/file-system/index.d.ts.map +1 -1
  77. package/dist/file-info/adapters/file-system/reader.d.ts.map +1 -1
  78. package/dist/file-info/adapters/file-system/reader.js +8 -1
  79. package/dist/file-info/adapters/github/index.d.ts.map +1 -1
  80. package/dist/file-info/adapters/github/reader.d.ts +1 -1
  81. package/dist/file-info/adapters/github/reader.d.ts.map +1 -1
  82. package/dist/file-info/adapters/github/reader.js +8 -5
  83. package/dist/index.d.ts.map +1 -1
  84. package/dist/tools/analyse-video/index.d.ts +5 -0
  85. package/dist/tools/analyse-video/index.d.ts.map +1 -0
  86. package/dist/tools/analyse-video/index.js +50 -0
  87. package/dist/tools/create-pull-request/index.js +4 -6
  88. package/dist/tools/create-pull-request/utils.d.ts +1 -1
  89. package/dist/tools/definitions/{fetch-video-analysis.d.ts → analyse-video.d.ts} +13 -8
  90. package/dist/tools/definitions/analyse-video.d.ts.map +1 -0
  91. package/dist/tools/definitions/analyse-video.js +60 -0
  92. package/dist/tools/definitions/review-pull-request.d.ts +3 -0
  93. package/dist/tools/definitions/review-pull-request.d.ts.map +1 -0
  94. package/dist/tools/definitions/review-pull-request.js +16 -0
  95. package/dist/tools/definitions/str_replace_editor.d.ts +1 -0
  96. package/dist/tools/definitions/str_replace_editor.d.ts.map +1 -1
  97. package/dist/tools/definitions/str_replace_editor.js +4 -1
  98. package/dist/tools/definitions/test-gen-browser.d.ts +0 -3
  99. package/dist/tools/definitions/test-gen-browser.d.ts.map +1 -1
  100. package/dist/tools/definitions/test-gen-browser.js +33 -8
  101. package/dist/tools/delete-file/index.d.ts.map +1 -1
  102. package/dist/tools/delete-file/index.js +1 -19
  103. package/dist/tools/executor/base.d.ts +32 -0
  104. package/dist/tools/executor/base.d.ts.map +1 -0
  105. package/dist/tools/executor/base.js +114 -0
  106. package/dist/tools/executor/index.d.ts +3 -22
  107. package/dist/tools/executor/index.d.ts.map +1 -1
  108. package/dist/tools/executor/index.js +7 -100
  109. package/dist/tools/executor/utils/checkpoint.d.ts +1 -1
  110. package/dist/tools/executor/utils/checkpoint.d.ts.map +1 -1
  111. package/dist/tools/executor/utils/checkpoint.js +6 -2
  112. package/dist/tools/executor/utils/git.d.ts +2 -2
  113. package/dist/tools/executor/utils/git.d.ts.map +1 -1
  114. package/dist/tools/executor/utils/git.js +7 -3
  115. package/dist/tools/executor/utils/index.d.ts.map +1 -1
  116. package/dist/tools/executor/utils/index.js +1 -1
  117. package/dist/tools/fetch-session-diff/index.js +2 -2
  118. package/dist/tools/file-operations/create.d.ts.map +1 -1
  119. package/dist/tools/file-operations/create.js +1 -4
  120. package/dist/tools/file-operations/index.d.ts +2 -1
  121. package/dist/tools/file-operations/index.d.ts.map +1 -1
  122. package/dist/tools/file-operations/index.js +4 -1
  123. package/dist/tools/file-operations/insert.d.ts +1 -2
  124. package/dist/tools/file-operations/insert.d.ts.map +1 -1
  125. package/dist/tools/file-operations/insert.js +1 -4
  126. package/dist/tools/file-operations/replace.d.ts.map +1 -1
  127. package/dist/tools/file-operations/replace.js +1 -4
  128. package/dist/tools/grep/index.d.ts.map +1 -1
  129. package/dist/tools/grep/index.js +18 -11
  130. package/dist/tools/index.d.ts +5 -5
  131. package/dist/tools/index.d.ts.map +1 -1
  132. package/dist/tools/index.js +17 -16
  133. package/dist/tools/merge-conflicts/index.d.ts.map +1 -1
  134. package/dist/tools/merge-conflicts/index.js +1 -1
  135. package/dist/tools/rename-file/index.js +1 -1
  136. package/dist/tools/review-pull-request/index.d.ts.map +1 -1
  137. package/dist/tools/review-pull-request/index.js +45 -59
  138. package/dist/tools/run-test.d.ts.map +1 -1
  139. package/dist/tools/run-test.js +25 -3
  140. package/dist/tools/test-gen-browser.d.ts.map +1 -1
  141. package/dist/tools/test-gen-browser.js +51 -47
  142. package/dist/utils/artifact-paths.d.ts +20 -0
  143. package/dist/utils/artifact-paths.d.ts.map +1 -0
  144. package/dist/utils/artifact-paths.js +16 -0
  145. package/dist/utils/dedup-image-fs.d.ts +2 -16
  146. package/dist/utils/dedup-image-fs.d.ts.map +1 -1
  147. package/dist/utils/dedup-image-fs.js +12 -16
  148. package/dist/utils/dedup-image.d.ts +1 -14
  149. package/dist/utils/dedup-image.d.ts.map +1 -1
  150. package/dist/utils/dedup-image.js +7 -62
  151. package/dist/utils/{local-ffmpeg-client.d.ts → ffmpeg/index.d.ts} +6 -7
  152. package/dist/utils/ffmpeg/index.d.ts.map +1 -0
  153. package/dist/utils/{local-ffmpeg-client.js → ffmpeg/index.js} +169 -53
  154. package/dist/utils/find-threshold.d.ts +8 -0
  155. package/dist/utils/find-threshold.d.ts.map +1 -0
  156. package/dist/utils/find-threshold.js +55 -0
  157. package/dist/utils/hash.d.ts +2 -0
  158. package/dist/utils/hash.d.ts.map +1 -0
  159. package/dist/utils/hash.js +24 -0
  160. package/dist/utils/model.d.ts +1 -1
  161. package/dist/utils/model.d.ts.map +1 -1
  162. package/dist/utils/model.js +7 -5
  163. package/dist/utils/repo-tree.d.ts +0 -1
  164. package/dist/utils/repo-tree.d.ts.map +1 -1
  165. package/dist/utils/repo-tree.js +2 -14
  166. package/dist/utils/slug.js +1 -1
  167. package/dist/video-core/agent-orchestrator.d.ts +14 -0
  168. package/dist/video-core/agent-orchestrator.d.ts.map +1 -0
  169. package/dist/video-core/agent-orchestrator.js +78 -0
  170. package/dist/video-core/analysis-server.d.ts +24 -0
  171. package/dist/video-core/analysis-server.d.ts.map +1 -0
  172. package/dist/video-core/analysis-server.js +398 -0
  173. package/dist/video-core/analysis-viewer.html +1374 -0
  174. package/dist/video-core/index.d.ts +44 -0
  175. package/dist/video-core/index.d.ts.map +1 -0
  176. package/dist/video-core/index.js +204 -0
  177. package/dist/video-core/model-limits.d.ts +4 -0
  178. package/dist/video-core/model-limits.d.ts.map +1 -0
  179. package/dist/video-core/model-limits.js +67 -0
  180. package/dist/video-core/storage-manager.d.ts +5 -0
  181. package/dist/video-core/storage-manager.d.ts.map +1 -0
  182. package/dist/video-core/storage-manager.js +55 -0
  183. package/dist/video-core/types.d.ts +13 -0
  184. package/dist/video-core/types.d.ts.map +1 -0
  185. package/dist/video-core/types.js +2 -0
  186. package/dist/video-core/utils.d.ts +25 -0
  187. package/dist/video-core/utils.d.ts.map +1 -0
  188. package/dist/video-core/utils.js +211 -0
  189. package/dist/video-core/xml-parser.d.ts +3 -0
  190. package/dist/video-core/xml-parser.d.ts.map +1 -0
  191. package/dist/video-core/xml-parser.js +27 -0
  192. package/package.json +5 -6
  193. package/tsconfig.tsbuildinfo +1 -1
  194. package/dist/agent/chat/prompt/index.d.ts +0 -6
  195. package/dist/agent/chat/prompt/index.d.ts.map +0 -1
  196. package/dist/agent/chat/prompt/index.js +0 -200
  197. package/dist/agent/code-review/prompt.d.ts +0 -2
  198. package/dist/agent/code-review/prompt.d.ts.map +0 -1
  199. package/dist/agent/code-review/prompt.js +0 -55
  200. package/dist/agent/diagnosis-agent/index.d.ts +0 -11
  201. package/dist/agent/diagnosis-agent/index.d.ts.map +0 -1
  202. package/dist/agent/diagnosis-agent/index.js +0 -88
  203. package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts +0 -10
  204. package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts.map +0 -1
  205. package/dist/agent/diagnosis-agent/strict-mode-violation.js +0 -30
  206. package/dist/tools/definitions/extract-frames-from-video.d.ts +0 -39
  207. package/dist/tools/definitions/extract-frames-from-video.d.ts.map +0 -1
  208. package/dist/tools/definitions/extract-frames-from-video.js +0 -60
  209. package/dist/tools/definitions/fetch-video-analysis.d.ts.map +0 -1
  210. package/dist/tools/definitions/fetch-video-analysis.js +0 -61
  211. package/dist/tools/extract-frames-from-video/index.d.ts +0 -7
  212. package/dist/tools/extract-frames-from-video/index.d.ts.map +0 -1
  213. package/dist/tools/extract-frames-from-video/index.js +0 -145
  214. package/dist/tools/fetch-video-analysis/index.d.ts +0 -5
  215. package/dist/tools/fetch-video-analysis/index.d.ts.map +0 -1
  216. package/dist/tools/fetch-video-analysis/index.js +0 -149
  217. package/dist/tools/fetch-video-analysis/open-ai.d.ts +0 -6
  218. package/dist/tools/fetch-video-analysis/open-ai.d.ts.map +0 -1
  219. package/dist/tools/fetch-video-analysis/open-ai.js +0 -37
  220. package/dist/tools/fetch-video-analysis/utils.d.ts +0 -16
  221. package/dist/tools/fetch-video-analysis/utils.d.ts.map +0 -1
  222. package/dist/tools/fetch-video-analysis/utils.js +0 -121
  223. package/dist/tools/fetch-video-analysis/video-analysis.d.ts +0 -7
  224. package/dist/tools/fetch-video-analysis/video-analysis.d.ts.map +0 -1
  225. package/dist/tools/fetch-video-analysis/video-analysis.js +0 -70
  226. package/dist/tools/file-operations/shared/git-helper.d.ts +0 -4
  227. package/dist/tools/file-operations/shared/git-helper.d.ts.map +0 -1
  228. package/dist/tools/file-operations/shared/git-helper.js +0 -29
  229. package/dist/utils/local-ffmpeg-client.d.ts.map +0 -1
  230. package/eslint.config.mjs +0 -43
@@ -1 +1 @@
1
- {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/agent/cli.ts"],"names":[],"mappings":"AAEA,OAAO,EACL,aAAa,EAMb,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AAwCpC,wBAAsB,yBAAyB,IAAI,OAAO,CACxD,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CACvB,CAkCA;AAED,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,SAAS,EACT,SAAS,EACT,UAAU,GACX,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;IACzC,SAAS,EAAE,aAAa,CAAC;IACzB,UAAU,EAAE,OAAO,CAAC;IACpB,SAAS,EAAE,OAAO,CAAC;CACpB,iBAoLA"}
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/agent/cli.ts"],"names":[],"mappings":"AACA,OAAO,EACL,aAAa,EAKb,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AA+BpC,wBAAsB,yBAAyB,IAAI,OAAO,CACxD,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CACvB,CAkCA;AAED,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,SAAS,EACT,SAAS,EACT,UAAU,GACX,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;IACzC,SAAS,EAAE,aAAa,CAAC;IACzB,UAAU,EAAE,OAAO,CAAC;IACpB,SAAS,EAAE,OAAO,CAAC;CACpB,iBA2KA"}
package/dist/agent/cli.js CHANGED
@@ -3,7 +3,6 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.fetchEnvironmentVariables = fetchEnvironmentVariables;
4
4
  exports.runChatAgentForCLI = runChatAgentForCLI;
5
5
  const llm_1 = require("@empiricalrun/llm");
6
- const chat_1 = require("@empiricalrun/llm/chat");
7
6
  const picocolors_1 = require("picocolors");
8
7
  const client_1 = require("../dashboard/client");
9
8
  const reader_1 = require("../file-info/adapters/file-system/reader");
@@ -11,17 +10,17 @@ const human_in_the_loop_1 = require("../human-in-the-loop");
11
10
  const validation_1 = require("../recorder/validation");
12
11
  const executor_1 = require("../tools/executor");
13
12
  const git_1 = require("../tools/executor/utils/git");
14
- const filesystem_cache_1 = require("./chat/filesystem-cache");
15
13
  const state_1 = require("./chat/state");
16
14
  const utils_1 = require("./chat/utils");
17
15
  const index_1 = require("./index");
18
16
  function stopCriteria(userPrompt) {
19
17
  return userPrompt?.toLowerCase() === "stop";
20
18
  }
21
- function concludeAgent(chatModel, useDiskForChatState, selectedModel, error) {
22
- console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + (0, state_1.getUsageSummary)(chatModel))}`);
19
+ function concludeAgent(agent, useDiskForChatState) {
20
+ const chatState = agent.chatState;
21
+ console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + (0, state_1.getUsageSummary)(chatState))}`);
23
22
  if (useDiskForChatState) {
24
- (0, state_1.saveToDisk)(chatModel.messages, selectedModel, chatModel.askUserForInput, error);
23
+ (0, state_1.saveToDisk)(chatState);
25
24
  }
26
25
  }
27
26
  async function fetchEnvironmentVariables() {
@@ -53,41 +52,47 @@ async function fetchEnvironmentVariables() {
53
52
  return envVars;
54
53
  }
55
54
  async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, agentMode, resetChat, useFSCache, }) {
56
- let chatState;
57
55
  const enableStreaming = !useFSCache;
58
- const cache = useFSCache ? new filesystem_cache_1.FilesystemLLMCache() : undefined;
59
- if (resetChat) {
60
- (0, state_1.clearChatState)();
61
- }
62
- if (useDiskForChatState) {
63
- chatState = (0, state_1.loadChatState)();
64
- }
56
+ // TODO: Implement cache support in BaseAgent
57
+ // const cache = useFSCache ? new FilesystemLLMCache() : undefined;
65
58
  // TODO: Store branch name in chat state so that we don't recreate it every time
66
59
  const randomId = crypto.randomUUID().substring(0, 8);
67
60
  const branchName = `branch-${randomId}`;
68
61
  await (0, git_1.checkoutBranch)(branchName, process.cwd());
69
- let messagesLoadedFromDisk = chatState?.messages || [];
70
- let chatModel = (0, chat_1.createChatModel)(messagesLoadedFromDisk, selectedModel, undefined, cache);
71
- chatModel.validateEnvVarsForAuth();
72
- if (initialPromptContent && chatModel.messages.length === 0) {
73
- chatModel.pushUserMessage(initialPromptContent, []);
74
- }
75
- else if (initialPromptContent && chatModel.messages.length > 0) {
76
- console.warn(`Ignoring initial prompt because we have existing messages.`);
77
- }
78
- if (chatModel.askUserForInput) {
79
- // Show last message to the user for context when we loaded from disk
80
- const latest = chatModel.getHumanReadableLatestMessage();
81
- if (latest) {
82
- console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
83
- }
62
+ let chatState;
63
+ if (useDiskForChatState) {
64
+ chatState = (0, state_1.loadChatState)({ resetChat });
84
65
  }
85
66
  if (chatState && chatState.error) {
86
67
  // Reset error state as we are attempting a retry
87
68
  chatState.error = null;
88
69
  }
70
+ if (initialPromptContent) {
71
+ if (!chatState) {
72
+ const { text, attachments } = (0, utils_1.extractAttachments)(initialPromptContent);
73
+ chatState = (0, state_1.createChatState)({
74
+ userPrompt: text,
75
+ attachments: attachments,
76
+ existingState: undefined,
77
+ selectedModel,
78
+ error: null,
79
+ });
80
+ }
81
+ else {
82
+ console.warn(`Ignoring initial prompt because we have existing chat state.`);
83
+ }
84
+ }
85
+ if (!chatState) {
86
+ chatState = (0, state_1.createChatState)({
87
+ userPrompt: undefined,
88
+ attachments: [],
89
+ selectedModel,
90
+ existingState: undefined,
91
+ error: null,
92
+ });
93
+ }
89
94
  const handleSigInt = () => {
90
- concludeAgent(chatModel, useDiskForChatState, selectedModel, null);
95
+ concludeAgent(agent, useDiskForChatState);
91
96
  process.exit(0);
92
97
  };
93
98
  process.once("SIGINT", handleSigInt);
@@ -95,7 +100,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
95
100
  let userPrompt;
96
101
  let reporterFunc = async (chatState, latest) => {
97
102
  if (useDiskForChatState) {
98
- (0, state_1.saveToDisk)(chatState.messages, selectedModel, chatState.askUserForInput, chatState.error);
103
+ (0, state_1.saveToDisk)(chatState);
99
104
  }
100
105
  if (latest) {
101
106
  if (!enableStreaming) {
@@ -122,13 +127,31 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
122
127
  const apiClient = new client_1.DashboardAPIClient({
123
128
  authType,
124
129
  });
130
+ const toolExecutor = new executor_1.ToolExecutor({
131
+ chatSession: { branchName },
132
+ repoPath: process.cwd(),
133
+ apiClient,
134
+ trace,
135
+ featureFlags: [],
136
+ environmentOverrides: await fetchEnvironmentVariables(),
137
+ });
125
138
  const fileInfoBuilder = () => (0, reader_1.getFileInfoFromFS)(process.cwd());
126
139
  const agentParams = {
127
140
  selectedModel,
141
+ featureFlags: [],
142
+ chatState,
143
+ toolExecutor,
128
144
  };
129
- const agent = index_1.MODE_TO_AGENT_MAP[agentMode](agentParams);
145
+ const agent = index_1.MODE_TO_AGENT_MAP[agentMode]({ ...agentParams });
146
+ if (agent.askUserForInput) {
147
+ // Show last message to the user for context when we loaded from disk
148
+ const latest = agent.getHumanReadableLatestMessage();
149
+ if (latest) {
150
+ console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
151
+ }
152
+ }
130
153
  while (!stopCriteria(userPrompt)) {
131
- if (chatModel.askUserForInput) {
154
+ if (agent.askUserForInput) {
132
155
  try {
133
156
  userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
134
157
  message: "User:",
@@ -137,33 +160,19 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
137
160
  catch (e) {
138
161
  // https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
139
162
  if (e instanceof Error && e.name === "ExitPromptError") {
140
- concludeAgent(chatModel, useDiskForChatState, selectedModel, null);
163
+ concludeAgent(agent, useDiskForChatState);
141
164
  process.exit(0);
142
165
  }
143
- concludeAgent(chatModel, useDiskForChatState, selectedModel, {
144
- message: e.message,
145
- stack: e.stack || "Stack trace not available",
146
- timestamp: new Date().toISOString(),
147
- });
166
+ concludeAgent(agent, useDiskForChatState);
148
167
  throw e;
149
168
  }
150
169
  if (!stopCriteria(userPrompt)) {
151
170
  const { text, attachments } = (0, utils_1.extractAttachments)(userPrompt);
152
- chatModel.pushUserMessage(text, attachments);
171
+ agent.pushUserMessage(text, attachments);
153
172
  }
154
173
  }
155
174
  else {
156
- const toolExecutor = new executor_1.ToolExecutor({
157
- chatSession: null,
158
- branchName,
159
- repoPath: process.cwd(),
160
- apiClient,
161
- trace,
162
- featureFlags: [],
163
- environmentOverrides: await fetchEnvironmentVariables(),
164
- });
165
175
  await agent.runLoop({
166
- messages: chatModel.messages,
167
176
  reporter: reporterFunc,
168
177
  streamingMessageReporter: (() => {
169
178
  if (!enableStreaming) {
@@ -192,18 +201,13 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
192
201
  trace,
193
202
  repoInfoBuilder: fileInfoBuilder,
194
203
  onPendingToolCall: async (toolCalls) => {
195
- const toolResults = await toolExecutor.execute(toolCalls);
196
- chatModel.pushToolResultsMessage(toolCalls, toolResults);
204
+ const { toolResults, checkpoint } = await toolExecutor.execute(toolCalls);
205
+ agent.processToolResults(toolCalls, toolResults, checkpoint);
197
206
  },
198
207
  });
199
- // Update the chatModel with the agent's final state for next iteration
200
- if (agent.messages) {
201
- chatModel = (0, chat_1.createChatModel)(agent.messages, selectedModel, undefined, cache);
202
- }
203
208
  }
204
209
  }
205
- trace?.update({ output: { messages: chatModel.messages } });
210
+ trace?.update({ output: { messages: agent.messages } });
206
211
  await llm_1.langfuseInstance?.flushAsync();
207
- const usageSummary = (0, state_1.getUsageSummary)(chatModel);
208
- console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
212
+ console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + (0, state_1.getUsageSummary)(agent.chatState))}`);
209
213
  }
@@ -0,0 +1,5 @@
1
+ import { BaseToolExecutor, BaseToolExecutorProps } from "../../../tools/executor/base";
2
+ export declare class CodeReviewToolExecutor extends BaseToolExecutor {
3
+ constructor(params: Omit<BaseToolExecutorProps, "tools">);
4
+ }
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/agent/code-review/executor/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,gBAAgB,EAChB,qBAAqB,EACtB,MAAM,8BAA8B,CAAC;AAItC,qBAAa,sBAAuB,SAAQ,gBAAgB;gBAC9C,MAAM,EAAE,IAAI,CAAC,qBAAqB,EAAE,OAAO,CAAC;CAIzD"}
@@ -0,0 +1,13 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CodeReviewToolExecutor = void 0;
4
+ const base_1 = require("../../../tools/executor/base");
5
+ const fetch_session_diff_1 = require("../../../tools/fetch-session-diff");
6
+ const file_operations_1 = require("../../../tools/file-operations");
7
+ class CodeReviewToolExecutor extends base_1.BaseToolExecutor {
8
+ constructor(params) {
9
+ const tools = [fetch_session_diff_1.fetchSessionDiffTool, ...file_operations_1.viewOnlyTools];
10
+ super({ ...params, tools });
11
+ }
12
+ }
13
+ exports.CodeReviewToolExecutor = CodeReviewToolExecutor;
@@ -1,7 +1,12 @@
1
- import type { ToolDefinition } from "@empiricalrun/shared-types";
1
+ import type { ToolsForLLM } from "@empiricalrun/shared-types";
2
2
  import { BaseAgent } from "../base";
3
+ import { type CodeReviewResultV0, type CodeReviewResultV1, type CodeReviewResultV2, CodeReviewSeverity, CodeReviewVerdict } from "./types";
4
+ export type { CodeReviewResultV1, CodeReviewResultV0, CodeReviewResultV2 };
5
+ export { CodeReviewVerdict, CodeReviewSeverity };
6
+ export type CodeReviewVersionedResult = CodeReviewResultV1 | CodeReviewResultV0 | CodeReviewResultV2;
3
7
  export declare class CodeReviewAgent extends BaseAgent {
4
- protected getTools(): ToolDefinition[];
5
- protected buildSystemPrompt(): Promise<string>;
8
+ protected getTools(): ToolsForLLM;
9
+ getResult(): CodeReviewVersionedResult | undefined;
10
+ protected buildSystemPrompt(repoContext: string): Promise<string>;
6
11
  }
7
12
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAGjE,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,qBAAa,eAAgB,SAAQ,SAAS;IAC5C,SAAS,CAAC,QAAQ,IAAI,cAAc,EAAE;cAItB,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC;CAsDrD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAmB,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAG/E,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,kBAAkB,EACvB,KAAK,kBAAkB,EACvB,kBAAkB,EAClB,iBAAiB,EAClB,MAAM,SAAS,CAAC;AAEjB,YAAY,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,CAAC;AAC3E,OAAO,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,CAAC;AACjD,MAAM,MAAM,yBAAyB,GACjC,kBAAkB,GAClB,kBAAkB,GAClB,kBAAkB,CAAC;AAEvB,qBAAa,eAAgB,SAAQ,SAAS;IAC5C,SAAS,CAAC,QAAQ,IAAI,WAAW;IAWjC,SAAS,IAAI,yBAAyB,GAAG,SAAS;cAmBlC,iBAAiB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAuHxE"}
@@ -1,52 +1,137 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.CodeReviewAgent = void 0;
3
+ exports.CodeReviewAgent = exports.CodeReviewSeverity = exports.CodeReviewVerdict = void 0;
4
4
  const tools_1 = require("../../tools");
5
+ const fetch_session_diff_1 = require("../../tools/fetch-session-diff");
5
6
  const base_1 = require("../base");
7
+ const parser_1 = require("./parser");
8
+ const types_1 = require("./types");
9
+ Object.defineProperty(exports, "CodeReviewSeverity", { enumerable: true, get: function () { return types_1.CodeReviewSeverity; } });
10
+ Object.defineProperty(exports, "CodeReviewVerdict", { enumerable: true, get: function () { return types_1.CodeReviewVerdict; } });
6
11
  class CodeReviewAgent extends base_1.BaseAgent {
7
12
  getTools() {
8
- return [tools_1.fetchSessionDiffTool];
13
+ const custom = [
14
+ fetch_session_diff_1.fetchSessionDiffTool,
15
+ ...(0, tools_1.textViewToolsForModel)(this.selectedModel),
16
+ ];
17
+ return {
18
+ custom,
19
+ builtInTextEditor: (0, tools_1.hasBuiltInTextEditor)(this.selectedModel),
20
+ };
9
21
  }
10
- async buildSystemPrompt() {
22
+ getResult() {
23
+ const messages = this.messages || [];
24
+ const lastMessage = messages.length
25
+ ? messages[messages.length - 1]
26
+ : undefined;
27
+ const lastMessageTextPart = lastMessage
28
+ ? lastMessage.parts
29
+ .filter((p) => "text" in p)
30
+ .find((p) => "text" in p && !!p.text)
31
+ : undefined;
32
+ const textPart = lastMessageTextPart;
33
+ const text = textPart?.text.trim();
34
+ if (!text) {
35
+ return undefined;
36
+ }
37
+ return (0, parser_1.convertXmlToV2Format)(text);
38
+ }
39
+ async buildSystemPrompt(repoContext) {
11
40
  return `
12
41
  You are an expert code reviewer that specializes in reviewing Playwright test code. You are
13
- provided with tools to fetch diff for a code review, where a test has been added, test modified,
42
+ provided with tools to fetch diff and pull-request metadata for a code review, where a test has been added, test modified,
14
43
  or some configuration has changed.
15
44
 
16
45
  # Your goals
17
- - Identify code smells in test code - see below
18
- - Call out test data assumptions or lack of clean up
46
+ - Understand the purpose and scope of the code change. You can use available tools to gather context of the change.
47
+ - Identify critical issues that must be fixed before the code can be safely merged.
48
+ - Detect code smells, anti-patterns, and non-deterministic behaviors that reduce test reliability - see below.
49
+ - Call out test data assumptions or lack of clean up.
50
+ - Suggest improvements and best practices to enhance maintainability and readability.
51
+ - Form a definite conclusion on whether the code can be merged or not.
52
+ - Share your findings and conclusion in the structured format shared below
19
53
 
20
54
  # Output format
21
- - You are expected to return two sections in your response: describe_code_change and code_review_comments
22
- - describe_code_change: A brief summary of what the code change is doing. This should be 4-6 sentences in a bullet list.
23
- - code_review_comments: A bulleted list of code review comments that catch for any of the specific bits below or other
24
- red flags you might see in the code. Each comment should be 1-2 sentences.
55
+ - You are expected to return the following sections in your response: last_commit, describe_code_change, line_comments, verdict and version
56
+ - The last commit comes from the session diff tool call along with pull request metadata -- reproduce the commit sha as it is, without any additions (ignore the last commit timestamp) or bullet points
57
+ - describe_code_change: A brief summary of what the code change is doing. This should be 4-6 sentences in a bullet list, formatted in markdown where each bullet must begin with a hyphen followed by a space (- ).
58
+ Do not use any other character for bullets.
59
+ - line_comments: Individual comments for specific issues found in the code, attributed to one or more lines of problematic code. There can be multiple separate issues for each line, share them using separate comments of varying severity.
60
+ Each comment should correspond to one issue and include the following tags:
61
+ - file: The relative path to the file from repository root
62
+ - line-start: Starting line number of the issue
63
+ - line-end: Ending line number of the issue (same as line-start for if issue is in a single line of code)
64
+ - severity: Either "merge-blocking" or "warning". It is possible to have multiple issues in the same set of lines of either nature, in that case add multiple line_comments for those lines.
65
+ Here severity denotes the nature of the issue - any issue that is preventing the code from being safe to merge and should be considered high priority is "merge-blocking". Look for any of the specific
66
+ bits below or other red flags you might see in the code. Each comment should be 1-2 sentences. If no blocking issues are found, a review will have no "merge-blocking" line comments.
67
+ Alternatively, the severity should be "warning" for situations where best practices were not followed or contain minor issues or warnings that can be safely ignored ie
68
+ - message: 1-2 sentences describing the specific issue and suggested fixes or improvements.
69
+ - verdict: "Approved" if code can be merged to production ie there are no "merge-blocking" line-comments or "Rejected" if the issues cannot be safely ignored. "Approved" or
70
+ "Rejected" are the only two possible values for this field.
25
71
 
26
72
  Return these as XML tags with markdown inside them
27
73
 
74
+ <last_commit>
75
+ ...
76
+ </last_commit>
77
+
28
78
  <describe_code_change>
29
- - ...
79
+ ...
30
80
  </describe_code_change>
31
81
 
32
- <code_review_comments>
33
- - ...
34
- </code_review_comments>
82
+ <line_comments>
83
+ <comment>
84
+ <file>..</file>
85
+ <line-start>..</line-start>
86
+ <line-end>..</line-end>
87
+ <severity>..</severity>
88
+ <message>..</message>
89
+ </comment>
90
+
91
+ <comment>
92
+ <file>..</file>
93
+ <line-start>..</line-start>
94
+ <line-end>..</line-end>
95
+ <severity>..</severity>
96
+ <message>..</message>
97
+ </comment>
98
+ </line_comments>
99
+
100
+ <verdict>
101
+ ...
102
+ </verdict>
35
103
 
36
- # Specific bits to catch in the code review
104
+ # Severity: Merge blocking
37
105
 
38
- ## Code smells to look for
39
- - Any form of try-catch or exception handling is a code smell in test code. If there's an
106
+ ## Functionality regression
107
+ - If the change is modifying an existing test, we need to ensure the functionality of the original test
108
+ is maintained in the new version. No hacking our way to get a green test!
109
+
110
+ ## Exception handling
111
+ - Any form of try-catch or exception handling is a code smell in test code. If there's an
40
112
  exception, the test should fail
41
- - Any conditionals (if, switch, ternary) in test code is a code smell. Tests are expected to be
113
+
114
+ ## Conditionals
115
+ - Any conditionals (if, switch, ternary) in test code is a code smell. Tests are expected to be
42
116
  deterministic. If you see conditionals, check if there's a comment explaining why it's needed.
43
117
  Critically review the comment -- if it's not convincing, call it out as a code smell.
44
118
 
45
- ## Ensure Playwright best practices
119
+ ## Playwright common mistakes
120
+ - Don't use waitForLoadState or networkidle - these are not required since Playwright auto-waits after navigations. networkidle
121
+ can cause failures because modern web apps often have background network activity, which never settles.
46
122
  - Use locators instead of selectors: waitForSelector, $, $$ are bad - use locators instead (e.g. locator.waitFor)
47
123
  - If the test relies on some Playwright APIs that do not auto-wait (e.g. isVisible(), count()), we need to ensure
48
124
  they are used AFTER some action that ensures the page has loaded. If nothing, at least it should have a waitForTimeout
49
- - Don't use waitForLoadState or networkidle - these are not required since Playwright auto-waits after navigations
125
+
126
+ ## Deprecated patterns
127
+ - test.describe.serial(...) is not prefered: use test.describe.configure({ mode: "serial" }) if the tests need to be serial
128
+
129
+ ## Repo conventions
130
+ - Tests are located in files in the tests/ directory (e.g. in tests/example.spec.ts)
131
+ - Helper methods (that are imported in the tests) should be in pages/ directory (e.g. pages/common.ts)
132
+ - Helper methods should be functional - not classes (conventional class-based page object models are NOT recommended - use functions!)
133
+
134
+ # Severity: Warning
50
135
 
51
136
  ## Call out test data assumptions
52
137
  - If new test data is created (e.g. creating a new entity in the app, doing some actions on it) - it should be cleaned up
@@ -58,7 +143,16 @@ Return these as XML tags with markdown inside them
58
143
  - Dependency on static data that can change across environments (e.g. number of rows in a table) should be avoided.
59
144
 
60
145
  ## Remove debug artifacts
61
- - If there are console.logs or page.screenshot usage, call it out. They should be removed before merging.
146
+ - If there are console.logs or page.screenshot usage, call it out.
147
+
148
+ ## Extra waits
149
+ - Wait for timeout for static values are bad, but sometimes needed. Some apps are flaky and need additional waiting.
150
+
151
+ ## Element locators
152
+ - CSS selectors can be brittle - prefer user facing selectors like getByRole, getByText
153
+
154
+ # Repo context
155
+ ${repoContext}
62
156
  `;
63
157
  }
64
158
  }
@@ -0,0 +1,5 @@
1
+ import { type CodeReviewResultV2 } from "./types";
2
+ export type { CodeReviewLineComment, CodeReviewResultV0, CodeReviewResultV1, CodeReviewResultV2, } from "./types";
3
+ export { CodeReviewSeverity, CodeReviewVerdict } from "./types";
4
+ export declare function convertXmlToV2Format(output: string): CodeReviewResultV2;
5
+ //# sourceMappingURL=parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,kBAAkB,EAGxB,MAAM,SAAS,CAAC;AAEjB,YAAY,EACV,qBAAqB,EACrB,kBAAkB,EAClB,kBAAkB,EAClB,kBAAkB,GACnB,MAAM,SAAS,CAAC;AAEjB,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAC;AAchE,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,MAAM,GAAG,kBAAkB,CAmFvE"}
@@ -0,0 +1,70 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CodeReviewVerdict = exports.CodeReviewSeverity = void 0;
4
+ exports.convertXmlToV2Format = convertXmlToV2Format;
5
+ const types_1 = require("./types");
6
+ var types_2 = require("./types");
7
+ Object.defineProperty(exports, "CodeReviewSeverity", { enumerable: true, get: function () { return types_2.CodeReviewSeverity; } });
8
+ Object.defineProperty(exports, "CodeReviewVerdict", { enumerable: true, get: function () { return types_2.CodeReviewVerdict; } });
9
+ function inferVerdictFromCommentsV2(lineComments) {
10
+ const hasMergeBlockingIssues = lineComments.some((comment) => comment.severity === types_1.CodeReviewSeverity.MergeBlocking);
11
+ return hasMergeBlockingIssues
12
+ ? types_1.CodeReviewVerdict.Rejected
13
+ : types_1.CodeReviewVerdict.Approved;
14
+ }
15
+ function convertXmlToV2Format(output) {
16
+ const lastCommitMatch = output.match(/<last_commit>([\s\S]*?)<\/last_commit>/i);
17
+ const codeChangeMatch = output.match(/<describe_code_change>([\s\S]*?)<\/describe_code_change>/i);
18
+ const verdictMatch = output.match(/<verdict>([\s\S]*?)<\/verdict>/i);
19
+ const lineComments = [];
20
+ const lineCommentsMatch = output.match(/<line_comments>([\s\S]*?)<\/line_comments>/i);
21
+ if (lineCommentsMatch) {
22
+ const commentsContent = lineCommentsMatch[1];
23
+ const commentMatches = commentsContent.match(/<comment>([\s\S]*?)<\/comment>/gi);
24
+ if (commentMatches) {
25
+ for (const commentMatch of commentMatches) {
26
+ const fileMatch = commentMatch.match(/<file>([\s\S]*?)<\/file>/i);
27
+ const lineStartMatch = commentMatch.match(/<line-start>([\s\S]*?)<\/line-start>/i);
28
+ const lineEndMatch = commentMatch.match(/<line-end>([\s\S]*?)<\/line-end>/i);
29
+ const severityMatch = commentMatch.match(/<severity>([\s\S]*?)<\/severity>/i);
30
+ const messageMatch = commentMatch.match(/<message>([\s\S]*?)<\/message>/i);
31
+ if (fileMatch &&
32
+ lineStartMatch &&
33
+ lineEndMatch &&
34
+ severityMatch &&
35
+ messageMatch) {
36
+ const severityText = severityMatch[1].trim();
37
+ const severity = severityText === "merge-blocking"
38
+ ? types_1.CodeReviewSeverity.MergeBlocking
39
+ : severityText === "warning"
40
+ ? types_1.CodeReviewSeverity.Warning
41
+ : null;
42
+ if (severity !== null) {
43
+ lineComments.push({
44
+ file: fileMatch[1].trim(),
45
+ line_start: parseInt(lineStartMatch[1].trim(), 10),
46
+ line_end: parseInt(lineEndMatch[1].trim(), 10),
47
+ severity: severity,
48
+ message: messageMatch[1].trim(),
49
+ });
50
+ }
51
+ }
52
+ }
53
+ }
54
+ }
55
+ const lastCommit = lastCommitMatch[1].trim();
56
+ const describeCodeChange = codeChangeMatch[1].trim();
57
+ const verdict = verdictMatch?.[1]?.trim();
58
+ const finalVerdict = verdict
59
+ ? verdict === types_1.CodeReviewVerdict.Approved
60
+ ? types_1.CodeReviewVerdict.Approved
61
+ : types_1.CodeReviewVerdict.Rejected
62
+ : inferVerdictFromCommentsV2(lineComments);
63
+ return {
64
+ version: "2.0",
65
+ last_commit: lastCommit,
66
+ describe_code_change: describeCodeChange,
67
+ line_comments: lineComments,
68
+ verdict: finalVerdict,
69
+ };
70
+ }
@@ -0,0 +1,36 @@
1
+ export declare enum CodeReviewVerdict {
2
+ Approved = "Approved",
3
+ Rejected = "Rejected"
4
+ }
5
+ export declare enum CodeReviewSeverity {
6
+ MergeBlocking = "merge-blocking",
7
+ Warning = "warning"
8
+ }
9
+ export type CodeReviewResultV1 = {
10
+ version: "1.0";
11
+ last_commit: string | null;
12
+ code_review_comments: string | null;
13
+ describe_code_change: string | null;
14
+ merge_blocking_issues: string | null;
15
+ best_practices_and_warnings: string | null;
16
+ verdict: "Approved" | "Rejected" | null;
17
+ };
18
+ export type CodeReviewLineComment = {
19
+ file: string;
20
+ line_start: number;
21
+ line_end: number;
22
+ severity: CodeReviewSeverity;
23
+ message: string;
24
+ };
25
+ export type CodeReviewResultV2 = {
26
+ version: "2.0";
27
+ last_commit: string;
28
+ describe_code_change: string;
29
+ line_comments: CodeReviewLineComment[];
30
+ verdict: CodeReviewVerdict;
31
+ };
32
+ export type CodeReviewResultV0 = {
33
+ version: "0.1";
34
+ result: string;
35
+ };
36
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/types.ts"],"names":[],"mappings":"AAAA,oBAAY,iBAAiB;IAC3B,QAAQ,aAAa;IACrB,QAAQ,aAAa;CACtB;AAED,oBAAY,kBAAkB;IAC5B,aAAa,mBAAmB;IAChC,OAAO,YAAY;CACpB;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,OAAO,EAAE,KAAK,CAAC;IACf,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,qBAAqB,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,2BAA2B,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,OAAO,EAAE,UAAU,GAAG,UAAU,GAAG,IAAI,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,kBAAkB,CAAC;IAC7B,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,OAAO,EAAE,KAAK,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,aAAa,EAAE,qBAAqB,EAAE,CAAC;IACvC,OAAO,EAAE,iBAAiB,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,OAAO,EAAE,KAAK,CAAC;IAEf,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC"}
@@ -0,0 +1,13 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CodeReviewSeverity = exports.CodeReviewVerdict = void 0;
4
+ var CodeReviewVerdict;
5
+ (function (CodeReviewVerdict) {
6
+ CodeReviewVerdict["Approved"] = "Approved";
7
+ CodeReviewVerdict["Rejected"] = "Rejected";
8
+ })(CodeReviewVerdict || (exports.CodeReviewVerdict = CodeReviewVerdict = {}));
9
+ var CodeReviewSeverity;
10
+ (function (CodeReviewSeverity) {
11
+ CodeReviewSeverity["MergeBlocking"] = "merge-blocking";
12
+ CodeReviewSeverity["Warning"] = "warning";
13
+ })(CodeReviewSeverity || (exports.CodeReviewSeverity = CodeReviewSeverity = {}));
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAmClC,MAAM,MAAM,kBAAkB,GAAG,KAAK,CAClC;IACE,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,iBAAiB,CAAC;CAC/B,CACJ,CAAC;AAEF,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,8BAAsC,GACvC,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,8BAA8B,CAAC,EAAE,OAAO,CAAC;CAC1C,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,KAAK,EAAE,KAAK,CAAC;CACd,CAAC,CAuND"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAoDlC,MAAM,MAAM,kBAAkB,GAAG,KAAK,CAClC;IACE,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,iBAAiB,CAAC;CAC/B,CACJ,CAAC;AAEF,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,8BAAsC,GACvC,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,8BAA8B,CAAC,EAAE,OAAO,CAAC;CAC1C,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,KAAK,EAAE,KAAK,CAAC;CACd,CAAC,CAuND"}
@@ -28,6 +28,22 @@ function artifact(screenshot, name) {
28
28
  data: Buffer.from(screenshot, "base64"),
29
29
  };
30
30
  }
31
+ function stateOfTheBrowser(page) {
32
+ const browserContext = page.context();
33
+ const pages = browserContext.pages();
34
+ return `
35
+ ## Browser window
36
+
37
+ ### Current page (what you are working on)
38
+ Current page URL: ${page.url()}
39
+ Current page title: ${page.title()}
40
+
41
+ ### All pages
42
+ Number of open pages: ${pages.length}
43
+
44
+ URLs and titles:
45
+ ${pages.map((p) => ` - ${p.url()} - ${p.title()}`).join("\n")}`;
46
+ }
31
47
  async function createTestUsingComputerUseAgent({ page, task, trace, prefersElementFromPointCodegen = false, }) {
32
48
  const codegen = await getCodegenInstance(prefersElementFromPointCodegen);
33
49
  await codegen.initialize(page);
@@ -53,7 +69,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, prefersEleme
53
69
  content: [
54
70
  {
55
71
  type: "input_text",
56
- text: `Task to execute: ${task}\n\nCurrent page URL: ${page.url()}`,
72
+ text: `Task to execute: ${task}\n\n${stateOfTheBrowser(page)}`,
57
73
  },
58
74
  {
59
75
  type: "input_image",
@@ -177,7 +193,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, prefersEleme
177
193
  content: [
178
194
  {
179
195
  type: "input_text",
180
- text: `Action executed: ${executedActionSummary || "None"}\nCurrent page URL: ${page.url()}`,
196
+ text: `Action executed: ${executedActionSummary || "None"}\n\n${stateOfTheBrowser(page)}`,
181
197
  },
182
198
  ],
183
199
  },
@@ -1 +1 @@
1
- {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;;;EAQrE"}
1
+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AAiClD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;;;EAQrE"}
@@ -9,7 +9,10 @@ For example, if the user message says "Click on Submit button", then
9
9
  you click on the submit button -- even if it looks like a scary action.
10
10
 
11
11
  If you have been asked to retrieve text or verify something on the UI, then communicate
12
- that in your responses so that the user can see your thinking process in its entirety.`;
12
+ that in your responses so that the user can see your thinking process in its entirety.
13
+
14
+ Your work is limited to the current browser page (tab) that you are provided with. You will
15
+ have to conclude your actions before the user can ask you to do actions on different pages (tabs).`;
13
16
  const pageGotoTool = {
14
17
  type: "function",
15
18
  name: "page_goto",