@empiricalrun/test-gen 0.76.0 → 0.77.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +33 -0
- package/dist/agent/base/index.d.ts +25 -21
- package/dist/agent/base/index.d.ts.map +1 -1
- package/dist/agent/base/index.js +48 -37
- package/dist/agent/browsing/run.d.ts +1 -2
- package/dist/agent/browsing/run.d.ts.map +1 -1
- package/dist/agent/browsing/run.js +3 -9
- package/dist/agent/browsing/utils.d.ts +2 -9
- package/dist/agent/browsing/utils.d.ts.map +1 -1
- package/dist/agent/browsing/utils.js +5 -109
- package/dist/agent/chat/agent-loop.d.ts +5 -5
- package/dist/agent/chat/agent-loop.d.ts.map +1 -1
- package/dist/agent/chat/agent-loop.js +3 -8
- package/dist/agent/chat/exports.d.ts +5 -4
- package/dist/agent/chat/exports.d.ts.map +1 -1
- package/dist/agent/chat/exports.js +4 -7
- package/dist/agent/chat/index.d.ts +2 -2
- package/dist/agent/chat/index.d.ts.map +1 -1
- package/dist/agent/chat/index.js +23 -35
- package/dist/agent/chat/models.d.ts +0 -2
- package/dist/agent/chat/models.d.ts.map +1 -1
- package/dist/agent/chat/models.js +12 -26
- package/dist/agent/chat/prompt/pw-utils-docs.d.ts +1 -1
- package/dist/agent/chat/prompt/pw-utils-docs.d.ts.map +1 -1
- package/dist/agent/chat/prompt/pw-utils-docs.js +52 -0
- package/dist/agent/chat/prompt/repo.d.ts.map +1 -1
- package/dist/agent/chat/prompt/repo.js +11 -22
- package/dist/agent/chat/prompt/test-case-def.d.ts +2 -0
- package/dist/agent/chat/prompt/test-case-def.d.ts.map +1 -0
- package/dist/agent/chat/prompt/test-case-def.js +44 -0
- package/dist/agent/chat/state.d.ts +7 -6
- package/dist/agent/chat/state.d.ts.map +1 -1
- package/dist/agent/chat/state.js +15 -45
- package/dist/agent/chat/utils.d.ts +2 -2
- package/dist/agent/chat/utils.d.ts.map +1 -1
- package/dist/agent/chat/utils.js +14 -7
- package/dist/agent/cli.d.ts.map +1 -1
- package/dist/agent/cli.js +62 -58
- package/dist/agent/code-review/executor/index.d.ts +5 -0
- package/dist/agent/code-review/executor/index.d.ts.map +1 -0
- package/dist/agent/code-review/executor/index.js +13 -0
- package/dist/agent/code-review/index.d.ts +8 -3
- package/dist/agent/code-review/index.d.ts.map +1 -1
- package/dist/agent/code-review/index.js +115 -21
- package/dist/agent/code-review/parser.d.ts +5 -0
- package/dist/agent/code-review/parser.d.ts.map +1 -0
- package/dist/agent/code-review/parser.js +70 -0
- package/dist/agent/code-review/types.d.ts +36 -0
- package/dist/agent/code-review/types.d.ts.map +1 -0
- package/dist/agent/code-review/types.js +13 -0
- package/dist/agent/cua/index.d.ts.map +1 -1
- package/dist/agent/cua/index.js +18 -2
- package/dist/agent/cua/model.d.ts.map +1 -1
- package/dist/agent/cua/model.js +4 -1
- package/dist/agent/cua/pw-codegen/pw-pause/index.d.ts.map +1 -1
- package/dist/agent/triage/index.d.ts +2 -2
- package/dist/agent/triage/index.d.ts.map +1 -1
- package/dist/agent/triage/index.js +8 -7
- package/dist/agent/video-analysis/executor/index.d.ts +5 -0
- package/dist/agent/video-analysis/executor/index.d.ts.map +1 -0
- package/dist/agent/video-analysis/executor/index.js +10 -0
- package/dist/agent/video-analysis/index.d.ts +2 -2
- package/dist/agent/video-analysis/index.d.ts.map +1 -1
- package/dist/agent/video-analysis/index.js +38 -13
- package/dist/artifacts/index.d.ts +1 -1
- package/dist/artifacts/index.d.ts.map +1 -1
- package/dist/artifacts/index.js +3 -1
- package/dist/artifacts/utils.d.ts.map +1 -1
- package/dist/bin/index.js +66 -21
- package/dist/constants/index.d.ts +14 -0
- package/dist/constants/index.d.ts.map +1 -1
- package/dist/constants/index.js +33 -1
- package/dist/file/server.d.ts +1 -3
- package/dist/file/server.d.ts.map +1 -1
- package/dist/file/server.js +0 -13
- package/dist/file-info/adapters/file-system/index.d.ts.map +1 -1
- package/dist/file-info/adapters/file-system/reader.d.ts.map +1 -1
- package/dist/file-info/adapters/file-system/reader.js +8 -1
- package/dist/file-info/adapters/github/index.d.ts.map +1 -1
- package/dist/file-info/adapters/github/reader.d.ts +1 -1
- package/dist/file-info/adapters/github/reader.d.ts.map +1 -1
- package/dist/file-info/adapters/github/reader.js +8 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/tools/analyse-video/index.d.ts +5 -0
- package/dist/tools/analyse-video/index.d.ts.map +1 -0
- package/dist/tools/analyse-video/index.js +50 -0
- package/dist/tools/create-pull-request/index.js +4 -6
- package/dist/tools/create-pull-request/utils.d.ts +1 -1
- package/dist/tools/definitions/{fetch-video-analysis.d.ts → analyse-video.d.ts} +13 -8
- package/dist/tools/definitions/analyse-video.d.ts.map +1 -0
- package/dist/tools/definitions/analyse-video.js +60 -0
- package/dist/tools/definitions/review-pull-request.d.ts +3 -0
- package/dist/tools/definitions/review-pull-request.d.ts.map +1 -0
- package/dist/tools/definitions/review-pull-request.js +16 -0
- package/dist/tools/definitions/str_replace_editor.d.ts +1 -0
- package/dist/tools/definitions/str_replace_editor.d.ts.map +1 -1
- package/dist/tools/definitions/str_replace_editor.js +4 -1
- package/dist/tools/definitions/test-gen-browser.d.ts +0 -3
- package/dist/tools/definitions/test-gen-browser.d.ts.map +1 -1
- package/dist/tools/definitions/test-gen-browser.js +33 -8
- package/dist/tools/delete-file/index.d.ts.map +1 -1
- package/dist/tools/delete-file/index.js +1 -19
- package/dist/tools/executor/base.d.ts +32 -0
- package/dist/tools/executor/base.d.ts.map +1 -0
- package/dist/tools/executor/base.js +114 -0
- package/dist/tools/executor/index.d.ts +3 -22
- package/dist/tools/executor/index.d.ts.map +1 -1
- package/dist/tools/executor/index.js +7 -100
- package/dist/tools/executor/utils/checkpoint.d.ts +1 -1
- package/dist/tools/executor/utils/checkpoint.d.ts.map +1 -1
- package/dist/tools/executor/utils/checkpoint.js +6 -2
- package/dist/tools/executor/utils/git.d.ts +2 -2
- package/dist/tools/executor/utils/git.d.ts.map +1 -1
- package/dist/tools/executor/utils/git.js +7 -3
- package/dist/tools/executor/utils/index.d.ts.map +1 -1
- package/dist/tools/executor/utils/index.js +1 -1
- package/dist/tools/fetch-session-diff/index.js +2 -2
- package/dist/tools/file-operations/create.d.ts.map +1 -1
- package/dist/tools/file-operations/create.js +1 -4
- package/dist/tools/file-operations/index.d.ts +2 -1
- package/dist/tools/file-operations/index.d.ts.map +1 -1
- package/dist/tools/file-operations/index.js +4 -1
- package/dist/tools/file-operations/insert.d.ts +1 -2
- package/dist/tools/file-operations/insert.d.ts.map +1 -1
- package/dist/tools/file-operations/insert.js +1 -4
- package/dist/tools/file-operations/replace.d.ts.map +1 -1
- package/dist/tools/file-operations/replace.js +1 -4
- package/dist/tools/grep/index.d.ts.map +1 -1
- package/dist/tools/grep/index.js +18 -11
- package/dist/tools/index.d.ts +5 -5
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +17 -16
- package/dist/tools/merge-conflicts/index.d.ts.map +1 -1
- package/dist/tools/merge-conflicts/index.js +1 -1
- package/dist/tools/rename-file/index.js +1 -1
- package/dist/tools/review-pull-request/index.d.ts.map +1 -1
- package/dist/tools/review-pull-request/index.js +45 -59
- package/dist/tools/run-test.d.ts.map +1 -1
- package/dist/tools/run-test.js +25 -3
- package/dist/tools/test-gen-browser.d.ts.map +1 -1
- package/dist/tools/test-gen-browser.js +51 -47
- package/dist/utils/artifact-paths.d.ts +20 -0
- package/dist/utils/artifact-paths.d.ts.map +1 -0
- package/dist/utils/artifact-paths.js +16 -0
- package/dist/utils/dedup-image-fs.d.ts +2 -16
- package/dist/utils/dedup-image-fs.d.ts.map +1 -1
- package/dist/utils/dedup-image-fs.js +12 -16
- package/dist/utils/dedup-image.d.ts +1 -14
- package/dist/utils/dedup-image.d.ts.map +1 -1
- package/dist/utils/dedup-image.js +7 -62
- package/dist/utils/{local-ffmpeg-client.d.ts → ffmpeg/index.d.ts} +6 -7
- package/dist/utils/ffmpeg/index.d.ts.map +1 -0
- package/dist/utils/{local-ffmpeg-client.js → ffmpeg/index.js} +169 -53
- package/dist/utils/find-threshold.d.ts +8 -0
- package/dist/utils/find-threshold.d.ts.map +1 -0
- package/dist/utils/find-threshold.js +55 -0
- package/dist/utils/hash.d.ts +2 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +24 -0
- package/dist/utils/model.d.ts +1 -1
- package/dist/utils/model.d.ts.map +1 -1
- package/dist/utils/model.js +7 -5
- package/dist/utils/repo-tree.d.ts +0 -1
- package/dist/utils/repo-tree.d.ts.map +1 -1
- package/dist/utils/repo-tree.js +2 -14
- package/dist/utils/slug.js +1 -1
- package/dist/video-core/agent-orchestrator.d.ts +14 -0
- package/dist/video-core/agent-orchestrator.d.ts.map +1 -0
- package/dist/video-core/agent-orchestrator.js +78 -0
- package/dist/video-core/analysis-server.d.ts +24 -0
- package/dist/video-core/analysis-server.d.ts.map +1 -0
- package/dist/video-core/analysis-server.js +398 -0
- package/dist/video-core/analysis-viewer.html +1374 -0
- package/dist/video-core/index.d.ts +44 -0
- package/dist/video-core/index.d.ts.map +1 -0
- package/dist/video-core/index.js +204 -0
- package/dist/video-core/model-limits.d.ts +4 -0
- package/dist/video-core/model-limits.d.ts.map +1 -0
- package/dist/video-core/model-limits.js +67 -0
- package/dist/video-core/storage-manager.d.ts +5 -0
- package/dist/video-core/storage-manager.d.ts.map +1 -0
- package/dist/video-core/storage-manager.js +55 -0
- package/dist/video-core/types.d.ts +13 -0
- package/dist/video-core/types.d.ts.map +1 -0
- package/dist/video-core/types.js +2 -0
- package/dist/video-core/utils.d.ts +25 -0
- package/dist/video-core/utils.d.ts.map +1 -0
- package/dist/video-core/utils.js +211 -0
- package/dist/video-core/xml-parser.d.ts +3 -0
- package/dist/video-core/xml-parser.d.ts.map +1 -0
- package/dist/video-core/xml-parser.js +27 -0
- package/package.json +5 -6
- package/tsconfig.tsbuildinfo +1 -1
- package/dist/agent/chat/prompt/index.d.ts +0 -6
- package/dist/agent/chat/prompt/index.d.ts.map +0 -1
- package/dist/agent/chat/prompt/index.js +0 -200
- package/dist/agent/code-review/prompt.d.ts +0 -2
- package/dist/agent/code-review/prompt.d.ts.map +0 -1
- package/dist/agent/code-review/prompt.js +0 -55
- package/dist/agent/diagnosis-agent/index.d.ts +0 -11
- package/dist/agent/diagnosis-agent/index.d.ts.map +0 -1
- package/dist/agent/diagnosis-agent/index.js +0 -88
- package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts +0 -10
- package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts.map +0 -1
- package/dist/agent/diagnosis-agent/strict-mode-violation.js +0 -30
- package/dist/tools/definitions/extract-frames-from-video.d.ts +0 -39
- package/dist/tools/definitions/extract-frames-from-video.d.ts.map +0 -1
- package/dist/tools/definitions/extract-frames-from-video.js +0 -60
- package/dist/tools/definitions/fetch-video-analysis.d.ts.map +0 -1
- package/dist/tools/definitions/fetch-video-analysis.js +0 -61
- package/dist/tools/extract-frames-from-video/index.d.ts +0 -7
- package/dist/tools/extract-frames-from-video/index.d.ts.map +0 -1
- package/dist/tools/extract-frames-from-video/index.js +0 -145
- package/dist/tools/fetch-video-analysis/index.d.ts +0 -5
- package/dist/tools/fetch-video-analysis/index.d.ts.map +0 -1
- package/dist/tools/fetch-video-analysis/index.js +0 -149
- package/dist/tools/fetch-video-analysis/open-ai.d.ts +0 -6
- package/dist/tools/fetch-video-analysis/open-ai.d.ts.map +0 -1
- package/dist/tools/fetch-video-analysis/open-ai.js +0 -37
- package/dist/tools/fetch-video-analysis/utils.d.ts +0 -16
- package/dist/tools/fetch-video-analysis/utils.d.ts.map +0 -1
- package/dist/tools/fetch-video-analysis/utils.js +0 -121
- package/dist/tools/fetch-video-analysis/video-analysis.d.ts +0 -7
- package/dist/tools/fetch-video-analysis/video-analysis.d.ts.map +0 -1
- package/dist/tools/fetch-video-analysis/video-analysis.js +0 -70
- package/dist/tools/file-operations/shared/git-helper.d.ts +0 -4
- package/dist/tools/file-operations/shared/git-helper.d.ts.map +0 -1
- package/dist/tools/file-operations/shared/git-helper.js +0 -29
- package/dist/utils/local-ffmpeg-client.d.ts.map +0 -1
- package/eslint.config.mjs +0 -43
package/dist/agent/cli.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/agent/cli.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/agent/cli.ts"],"names":[],"mappings":"AACA,OAAO,EACL,aAAa,EAKb,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AA+BpC,wBAAsB,yBAAyB,IAAI,OAAO,CACxD,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CACvB,CAkCA;AAED,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,SAAS,EACT,SAAS,EACT,UAAU,GACX,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;IACzC,SAAS,EAAE,aAAa,CAAC;IACzB,UAAU,EAAE,OAAO,CAAC;IACpB,SAAS,EAAE,OAAO,CAAC;CACpB,iBA2KA"}
|
package/dist/agent/cli.js
CHANGED
|
@@ -3,7 +3,6 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.fetchEnvironmentVariables = fetchEnvironmentVariables;
|
|
4
4
|
exports.runChatAgentForCLI = runChatAgentForCLI;
|
|
5
5
|
const llm_1 = require("@empiricalrun/llm");
|
|
6
|
-
const chat_1 = require("@empiricalrun/llm/chat");
|
|
7
6
|
const picocolors_1 = require("picocolors");
|
|
8
7
|
const client_1 = require("../dashboard/client");
|
|
9
8
|
const reader_1 = require("../file-info/adapters/file-system/reader");
|
|
@@ -11,17 +10,17 @@ const human_in_the_loop_1 = require("../human-in-the-loop");
|
|
|
11
10
|
const validation_1 = require("../recorder/validation");
|
|
12
11
|
const executor_1 = require("../tools/executor");
|
|
13
12
|
const git_1 = require("../tools/executor/utils/git");
|
|
14
|
-
const filesystem_cache_1 = require("./chat/filesystem-cache");
|
|
15
13
|
const state_1 = require("./chat/state");
|
|
16
14
|
const utils_1 = require("./chat/utils");
|
|
17
15
|
const index_1 = require("./index");
|
|
18
16
|
function stopCriteria(userPrompt) {
|
|
19
17
|
return userPrompt?.toLowerCase() === "stop";
|
|
20
18
|
}
|
|
21
|
-
function concludeAgent(
|
|
22
|
-
|
|
19
|
+
function concludeAgent(agent, useDiskForChatState) {
|
|
20
|
+
const chatState = agent.chatState;
|
|
21
|
+
console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + (0, state_1.getUsageSummary)(chatState))}`);
|
|
23
22
|
if (useDiskForChatState) {
|
|
24
|
-
(0, state_1.saveToDisk)(
|
|
23
|
+
(0, state_1.saveToDisk)(chatState);
|
|
25
24
|
}
|
|
26
25
|
}
|
|
27
26
|
async function fetchEnvironmentVariables() {
|
|
@@ -53,41 +52,47 @@ async function fetchEnvironmentVariables() {
|
|
|
53
52
|
return envVars;
|
|
54
53
|
}
|
|
55
54
|
async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, agentMode, resetChat, useFSCache, }) {
|
|
56
|
-
let chatState;
|
|
57
55
|
const enableStreaming = !useFSCache;
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
(0, state_1.clearChatState)();
|
|
61
|
-
}
|
|
62
|
-
if (useDiskForChatState) {
|
|
63
|
-
chatState = (0, state_1.loadChatState)();
|
|
64
|
-
}
|
|
56
|
+
// TODO: Implement cache support in BaseAgent
|
|
57
|
+
// const cache = useFSCache ? new FilesystemLLMCache() : undefined;
|
|
65
58
|
// TODO: Store branch name in chat state so that we don't recreate it every time
|
|
66
59
|
const randomId = crypto.randomUUID().substring(0, 8);
|
|
67
60
|
const branchName = `branch-${randomId}`;
|
|
68
61
|
await (0, git_1.checkoutBranch)(branchName, process.cwd());
|
|
69
|
-
let
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
if (initialPromptContent && chatModel.messages.length === 0) {
|
|
73
|
-
chatModel.pushUserMessage(initialPromptContent, []);
|
|
74
|
-
}
|
|
75
|
-
else if (initialPromptContent && chatModel.messages.length > 0) {
|
|
76
|
-
console.warn(`Ignoring initial prompt because we have existing messages.`);
|
|
77
|
-
}
|
|
78
|
-
if (chatModel.askUserForInput) {
|
|
79
|
-
// Show last message to the user for context when we loaded from disk
|
|
80
|
-
const latest = chatModel.getHumanReadableLatestMessage();
|
|
81
|
-
if (latest) {
|
|
82
|
-
console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
|
|
83
|
-
}
|
|
62
|
+
let chatState;
|
|
63
|
+
if (useDiskForChatState) {
|
|
64
|
+
chatState = (0, state_1.loadChatState)({ resetChat });
|
|
84
65
|
}
|
|
85
66
|
if (chatState && chatState.error) {
|
|
86
67
|
// Reset error state as we are attempting a retry
|
|
87
68
|
chatState.error = null;
|
|
88
69
|
}
|
|
70
|
+
if (initialPromptContent) {
|
|
71
|
+
if (!chatState) {
|
|
72
|
+
const { text, attachments } = (0, utils_1.extractAttachments)(initialPromptContent);
|
|
73
|
+
chatState = (0, state_1.createChatState)({
|
|
74
|
+
userPrompt: text,
|
|
75
|
+
attachments: attachments,
|
|
76
|
+
existingState: undefined,
|
|
77
|
+
selectedModel,
|
|
78
|
+
error: null,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
console.warn(`Ignoring initial prompt because we have existing chat state.`);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
if (!chatState) {
|
|
86
|
+
chatState = (0, state_1.createChatState)({
|
|
87
|
+
userPrompt: undefined,
|
|
88
|
+
attachments: [],
|
|
89
|
+
selectedModel,
|
|
90
|
+
existingState: undefined,
|
|
91
|
+
error: null,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
89
94
|
const handleSigInt = () => {
|
|
90
|
-
concludeAgent(
|
|
95
|
+
concludeAgent(agent, useDiskForChatState);
|
|
91
96
|
process.exit(0);
|
|
92
97
|
};
|
|
93
98
|
process.once("SIGINT", handleSigInt);
|
|
@@ -95,7 +100,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
95
100
|
let userPrompt;
|
|
96
101
|
let reporterFunc = async (chatState, latest) => {
|
|
97
102
|
if (useDiskForChatState) {
|
|
98
|
-
(0, state_1.saveToDisk)(chatState
|
|
103
|
+
(0, state_1.saveToDisk)(chatState);
|
|
99
104
|
}
|
|
100
105
|
if (latest) {
|
|
101
106
|
if (!enableStreaming) {
|
|
@@ -122,13 +127,31 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
122
127
|
const apiClient = new client_1.DashboardAPIClient({
|
|
123
128
|
authType,
|
|
124
129
|
});
|
|
130
|
+
const toolExecutor = new executor_1.ToolExecutor({
|
|
131
|
+
chatSession: { branchName },
|
|
132
|
+
repoPath: process.cwd(),
|
|
133
|
+
apiClient,
|
|
134
|
+
trace,
|
|
135
|
+
featureFlags: [],
|
|
136
|
+
environmentOverrides: await fetchEnvironmentVariables(),
|
|
137
|
+
});
|
|
125
138
|
const fileInfoBuilder = () => (0, reader_1.getFileInfoFromFS)(process.cwd());
|
|
126
139
|
const agentParams = {
|
|
127
140
|
selectedModel,
|
|
141
|
+
featureFlags: [],
|
|
142
|
+
chatState,
|
|
143
|
+
toolExecutor,
|
|
128
144
|
};
|
|
129
|
-
const agent = index_1.MODE_TO_AGENT_MAP[agentMode](agentParams);
|
|
145
|
+
const agent = index_1.MODE_TO_AGENT_MAP[agentMode]({ ...agentParams });
|
|
146
|
+
if (agent.askUserForInput) {
|
|
147
|
+
// Show last message to the user for context when we loaded from disk
|
|
148
|
+
const latest = agent.getHumanReadableLatestMessage();
|
|
149
|
+
if (latest) {
|
|
150
|
+
console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
130
153
|
while (!stopCriteria(userPrompt)) {
|
|
131
|
-
if (
|
|
154
|
+
if (agent.askUserForInput) {
|
|
132
155
|
try {
|
|
133
156
|
userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
|
|
134
157
|
message: "User:",
|
|
@@ -137,33 +160,19 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
137
160
|
catch (e) {
|
|
138
161
|
// https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
|
|
139
162
|
if (e instanceof Error && e.name === "ExitPromptError") {
|
|
140
|
-
concludeAgent(
|
|
163
|
+
concludeAgent(agent, useDiskForChatState);
|
|
141
164
|
process.exit(0);
|
|
142
165
|
}
|
|
143
|
-
concludeAgent(
|
|
144
|
-
message: e.message,
|
|
145
|
-
stack: e.stack || "Stack trace not available",
|
|
146
|
-
timestamp: new Date().toISOString(),
|
|
147
|
-
});
|
|
166
|
+
concludeAgent(agent, useDiskForChatState);
|
|
148
167
|
throw e;
|
|
149
168
|
}
|
|
150
169
|
if (!stopCriteria(userPrompt)) {
|
|
151
170
|
const { text, attachments } = (0, utils_1.extractAttachments)(userPrompt);
|
|
152
|
-
|
|
171
|
+
agent.pushUserMessage(text, attachments);
|
|
153
172
|
}
|
|
154
173
|
}
|
|
155
174
|
else {
|
|
156
|
-
const toolExecutor = new executor_1.ToolExecutor({
|
|
157
|
-
chatSession: null,
|
|
158
|
-
branchName,
|
|
159
|
-
repoPath: process.cwd(),
|
|
160
|
-
apiClient,
|
|
161
|
-
trace,
|
|
162
|
-
featureFlags: [],
|
|
163
|
-
environmentOverrides: await fetchEnvironmentVariables(),
|
|
164
|
-
});
|
|
165
175
|
await agent.runLoop({
|
|
166
|
-
messages: chatModel.messages,
|
|
167
176
|
reporter: reporterFunc,
|
|
168
177
|
streamingMessageReporter: (() => {
|
|
169
178
|
if (!enableStreaming) {
|
|
@@ -192,18 +201,13 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
192
201
|
trace,
|
|
193
202
|
repoInfoBuilder: fileInfoBuilder,
|
|
194
203
|
onPendingToolCall: async (toolCalls) => {
|
|
195
|
-
const toolResults = await toolExecutor.execute(toolCalls);
|
|
196
|
-
|
|
204
|
+
const { toolResults, checkpoint } = await toolExecutor.execute(toolCalls);
|
|
205
|
+
agent.processToolResults(toolCalls, toolResults, checkpoint);
|
|
197
206
|
},
|
|
198
207
|
});
|
|
199
|
-
// Update the chatModel with the agent's final state for next iteration
|
|
200
|
-
if (agent.messages) {
|
|
201
|
-
chatModel = (0, chat_1.createChatModel)(agent.messages, selectedModel, undefined, cache);
|
|
202
|
-
}
|
|
203
208
|
}
|
|
204
209
|
}
|
|
205
|
-
trace?.update({ output: { messages:
|
|
210
|
+
trace?.update({ output: { messages: agent.messages } });
|
|
206
211
|
await llm_1.langfuseInstance?.flushAsync();
|
|
207
|
-
|
|
208
|
-
console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
|
|
212
|
+
console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + (0, state_1.getUsageSummary)(agent.chatState))}`);
|
|
209
213
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/agent/code-review/executor/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,gBAAgB,EAChB,qBAAqB,EACtB,MAAM,8BAA8B,CAAC;AAItC,qBAAa,sBAAuB,SAAQ,gBAAgB;gBAC9C,MAAM,EAAE,IAAI,CAAC,qBAAqB,EAAE,OAAO,CAAC;CAIzD"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CodeReviewToolExecutor = void 0;
|
|
4
|
+
const base_1 = require("../../../tools/executor/base");
|
|
5
|
+
const fetch_session_diff_1 = require("../../../tools/fetch-session-diff");
|
|
6
|
+
const file_operations_1 = require("../../../tools/file-operations");
|
|
7
|
+
class CodeReviewToolExecutor extends base_1.BaseToolExecutor {
|
|
8
|
+
constructor(params) {
|
|
9
|
+
const tools = [fetch_session_diff_1.fetchSessionDiffTool, ...file_operations_1.viewOnlyTools];
|
|
10
|
+
super({ ...params, tools });
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
exports.CodeReviewToolExecutor = CodeReviewToolExecutor;
|
|
@@ -1,7 +1,12 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { ToolsForLLM } from "@empiricalrun/shared-types";
|
|
2
2
|
import { BaseAgent } from "../base";
|
|
3
|
+
import { type CodeReviewResultV0, type CodeReviewResultV1, type CodeReviewResultV2, CodeReviewSeverity, CodeReviewVerdict } from "./types";
|
|
4
|
+
export type { CodeReviewResultV1, CodeReviewResultV0, CodeReviewResultV2 };
|
|
5
|
+
export { CodeReviewVerdict, CodeReviewSeverity };
|
|
6
|
+
export type CodeReviewVersionedResult = CodeReviewResultV1 | CodeReviewResultV0 | CodeReviewResultV2;
|
|
3
7
|
export declare class CodeReviewAgent extends BaseAgent {
|
|
4
|
-
protected getTools():
|
|
5
|
-
|
|
8
|
+
protected getTools(): ToolsForLLM;
|
|
9
|
+
getResult(): CodeReviewVersionedResult | undefined;
|
|
10
|
+
protected buildSystemPrompt(repoContext: string): Promise<string>;
|
|
6
11
|
}
|
|
7
12
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAmB,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAG/E,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,kBAAkB,EACvB,KAAK,kBAAkB,EACvB,kBAAkB,EAClB,iBAAiB,EAClB,MAAM,SAAS,CAAC;AAEjB,YAAY,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,CAAC;AAC3E,OAAO,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,CAAC;AACjD,MAAM,MAAM,yBAAyB,GACjC,kBAAkB,GAClB,kBAAkB,GAClB,kBAAkB,CAAC;AAEvB,qBAAa,eAAgB,SAAQ,SAAS;IAC5C,SAAS,CAAC,QAAQ,IAAI,WAAW;IAWjC,SAAS,IAAI,yBAAyB,GAAG,SAAS;cAmBlC,iBAAiB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAuHxE"}
|
|
@@ -1,52 +1,137 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.CodeReviewAgent = void 0;
|
|
3
|
+
exports.CodeReviewAgent = exports.CodeReviewSeverity = exports.CodeReviewVerdict = void 0;
|
|
4
4
|
const tools_1 = require("../../tools");
|
|
5
|
+
const fetch_session_diff_1 = require("../../tools/fetch-session-diff");
|
|
5
6
|
const base_1 = require("../base");
|
|
7
|
+
const parser_1 = require("./parser");
|
|
8
|
+
const types_1 = require("./types");
|
|
9
|
+
Object.defineProperty(exports, "CodeReviewSeverity", { enumerable: true, get: function () { return types_1.CodeReviewSeverity; } });
|
|
10
|
+
Object.defineProperty(exports, "CodeReviewVerdict", { enumerable: true, get: function () { return types_1.CodeReviewVerdict; } });
|
|
6
11
|
class CodeReviewAgent extends base_1.BaseAgent {
|
|
7
12
|
getTools() {
|
|
8
|
-
|
|
13
|
+
const custom = [
|
|
14
|
+
fetch_session_diff_1.fetchSessionDiffTool,
|
|
15
|
+
...(0, tools_1.textViewToolsForModel)(this.selectedModel),
|
|
16
|
+
];
|
|
17
|
+
return {
|
|
18
|
+
custom,
|
|
19
|
+
builtInTextEditor: (0, tools_1.hasBuiltInTextEditor)(this.selectedModel),
|
|
20
|
+
};
|
|
9
21
|
}
|
|
10
|
-
|
|
22
|
+
getResult() {
|
|
23
|
+
const messages = this.messages || [];
|
|
24
|
+
const lastMessage = messages.length
|
|
25
|
+
? messages[messages.length - 1]
|
|
26
|
+
: undefined;
|
|
27
|
+
const lastMessageTextPart = lastMessage
|
|
28
|
+
? lastMessage.parts
|
|
29
|
+
.filter((p) => "text" in p)
|
|
30
|
+
.find((p) => "text" in p && !!p.text)
|
|
31
|
+
: undefined;
|
|
32
|
+
const textPart = lastMessageTextPart;
|
|
33
|
+
const text = textPart?.text.trim();
|
|
34
|
+
if (!text) {
|
|
35
|
+
return undefined;
|
|
36
|
+
}
|
|
37
|
+
return (0, parser_1.convertXmlToV2Format)(text);
|
|
38
|
+
}
|
|
39
|
+
async buildSystemPrompt(repoContext) {
|
|
11
40
|
return `
|
|
12
41
|
You are an expert code reviewer that specializes in reviewing Playwright test code. You are
|
|
13
|
-
provided with tools to fetch diff for a code review, where a test has been added, test modified,
|
|
42
|
+
provided with tools to fetch diff and pull-request metadata for a code review, where a test has been added, test modified,
|
|
14
43
|
or some configuration has changed.
|
|
15
44
|
|
|
16
45
|
# Your goals
|
|
17
|
-
-
|
|
18
|
-
-
|
|
46
|
+
- Understand the purpose and scope of the code change. You can use available tools to gather context of the change.
|
|
47
|
+
- Identify critical issues that must be fixed before the code can be safely merged.
|
|
48
|
+
- Detect code smells, anti-patterns, and non-deterministic behaviors that reduce test reliability - see below.
|
|
49
|
+
- Call out test data assumptions or lack of clean up.
|
|
50
|
+
- Suggest improvements and best practices to enhance maintainability and readability.
|
|
51
|
+
- Form a definite conclusion on whether the code can be merged or not.
|
|
52
|
+
- Share your findings and conclusion in the structured format shared below
|
|
19
53
|
|
|
20
54
|
# Output format
|
|
21
|
-
- You are expected to return
|
|
22
|
-
-
|
|
23
|
-
-
|
|
24
|
-
|
|
55
|
+
- You are expected to return the following sections in your response: last_commit, describe_code_change, line_comments, verdict and version
|
|
56
|
+
- The last commit comes from the session diff tool call along with pull request metadata -- reproduce the commit sha as it is, without any additions (ignore the last commit timestamp) or bullet points
|
|
57
|
+
- describe_code_change: A brief summary of what the code change is doing. This should be 4-6 sentences in a bullet list, formatted in markdown where each bullet must begin with a hyphen followed by a space (- ).
|
|
58
|
+
Do not use any other character for bullets.
|
|
59
|
+
- line_comments: Individual comments for specific issues found in the code, attributed to one or more lines of problematic code. There can be multiple separate issues for each line, share them using separate comments of varying severity.
|
|
60
|
+
Each comment should correspond to one issue and include the following tags:
|
|
61
|
+
- file: The relative path to the file from repository root
|
|
62
|
+
- line-start: Starting line number of the issue
|
|
63
|
+
- line-end: Ending line number of the issue (same as line-start for if issue is in a single line of code)
|
|
64
|
+
- severity: Either "merge-blocking" or "warning". It is possible to have multiple issues in the same set of lines of either nature, in that case add multiple line_comments for those lines.
|
|
65
|
+
Here severity denotes the nature of the issue - any issue that is preventing the code from being safe to merge and should be considered high priority is "merge-blocking". Look for any of the specific
|
|
66
|
+
bits below or other red flags you might see in the code. Each comment should be 1-2 sentences. If no blocking issues are found, a review will have no "merge-blocking" line comments.
|
|
67
|
+
Alternatively, the severity should be "warning" for situations where best practices were not followed or contain minor issues or warnings that can be safely ignored ie
|
|
68
|
+
- message: 1-2 sentences describing the specific issue and suggested fixes or improvements.
|
|
69
|
+
- verdict: "Approved" if code can be merged to production ie there are no "merge-blocking" line-comments or "Rejected" if the issues cannot be safely ignored. "Approved" or
|
|
70
|
+
"Rejected" are the only two possible values for this field.
|
|
25
71
|
|
|
26
72
|
Return these as XML tags with markdown inside them
|
|
27
73
|
|
|
74
|
+
<last_commit>
|
|
75
|
+
...
|
|
76
|
+
</last_commit>
|
|
77
|
+
|
|
28
78
|
<describe_code_change>
|
|
29
|
-
|
|
79
|
+
...
|
|
30
80
|
</describe_code_change>
|
|
31
81
|
|
|
32
|
-
<
|
|
33
|
-
|
|
34
|
-
|
|
82
|
+
<line_comments>
|
|
83
|
+
<comment>
|
|
84
|
+
<file>..</file>
|
|
85
|
+
<line-start>..</line-start>
|
|
86
|
+
<line-end>..</line-end>
|
|
87
|
+
<severity>..</severity>
|
|
88
|
+
<message>..</message>
|
|
89
|
+
</comment>
|
|
90
|
+
|
|
91
|
+
<comment>
|
|
92
|
+
<file>..</file>
|
|
93
|
+
<line-start>..</line-start>
|
|
94
|
+
<line-end>..</line-end>
|
|
95
|
+
<severity>..</severity>
|
|
96
|
+
<message>..</message>
|
|
97
|
+
</comment>
|
|
98
|
+
</line_comments>
|
|
99
|
+
|
|
100
|
+
<verdict>
|
|
101
|
+
...
|
|
102
|
+
</verdict>
|
|
35
103
|
|
|
36
|
-
#
|
|
104
|
+
# Severity: Merge blocking
|
|
37
105
|
|
|
38
|
-
##
|
|
39
|
-
-
|
|
106
|
+
## Functionality regression
|
|
107
|
+
- If the change is modifying an existing test, we need to ensure the functionality of the original test
|
|
108
|
+
is maintained in the new version. No hacking our way to get a green test!
|
|
109
|
+
|
|
110
|
+
## Exception handling
|
|
111
|
+
- Any form of try-catch or exception handling is a code smell in test code. If there's an
|
|
40
112
|
exception, the test should fail
|
|
41
|
-
|
|
113
|
+
|
|
114
|
+
## Conditionals
|
|
115
|
+
- Any conditionals (if, switch, ternary) in test code is a code smell. Tests are expected to be
|
|
42
116
|
deterministic. If you see conditionals, check if there's a comment explaining why it's needed.
|
|
43
117
|
Critically review the comment -- if it's not convincing, call it out as a code smell.
|
|
44
118
|
|
|
45
|
-
##
|
|
119
|
+
## Playwright common mistakes
|
|
120
|
+
- Don't use waitForLoadState or networkidle - these are not required since Playwright auto-waits after navigations. networkidle
|
|
121
|
+
can cause failures because modern web apps often have background network activity, which never settles.
|
|
46
122
|
- Use locators instead of selectors: waitForSelector, $, $$ are bad - use locators instead (e.g. locator.waitFor)
|
|
47
123
|
- If the test relies on some Playwright APIs that do not auto-wait (e.g. isVisible(), count()), we need to ensure
|
|
48
124
|
they are used AFTER some action that ensures the page has loaded. If nothing, at least it should have a waitForTimeout
|
|
49
|
-
|
|
125
|
+
|
|
126
|
+
## Deprecated patterns
|
|
127
|
+
- test.describe.serial(...) is not prefered: use test.describe.configure({ mode: "serial" }) if the tests need to be serial
|
|
128
|
+
|
|
129
|
+
## Repo conventions
|
|
130
|
+
- Tests are located in files in the tests/ directory (e.g. in tests/example.spec.ts)
|
|
131
|
+
- Helper methods (that are imported in the tests) should be in pages/ directory (e.g. pages/common.ts)
|
|
132
|
+
- Helper methods should be functional - not classes (conventional class-based page object models are NOT recommended - use functions!)
|
|
133
|
+
|
|
134
|
+
# Severity: Warning
|
|
50
135
|
|
|
51
136
|
## Call out test data assumptions
|
|
52
137
|
- If new test data is created (e.g. creating a new entity in the app, doing some actions on it) - it should be cleaned up
|
|
@@ -58,7 +143,16 @@ Return these as XML tags with markdown inside them
|
|
|
58
143
|
- Dependency on static data that can change across environments (e.g. number of rows in a table) should be avoided.
|
|
59
144
|
|
|
60
145
|
## Remove debug artifacts
|
|
61
|
-
- If there are console.logs or page.screenshot usage, call it out.
|
|
146
|
+
- If there are console.logs or page.screenshot usage, call it out.
|
|
147
|
+
|
|
148
|
+
## Extra waits
|
|
149
|
+
- Wait for timeout for static values are bad, but sometimes needed. Some apps are flaky and need additional waiting.
|
|
150
|
+
|
|
151
|
+
## Element locators
|
|
152
|
+
- CSS selectors can be brittle - prefer user facing selectors like getByRole, getByText
|
|
153
|
+
|
|
154
|
+
# Repo context
|
|
155
|
+
${repoContext}
|
|
62
156
|
`;
|
|
63
157
|
}
|
|
64
158
|
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import { type CodeReviewResultV2 } from "./types";
|
|
2
|
+
export type { CodeReviewLineComment, CodeReviewResultV0, CodeReviewResultV1, CodeReviewResultV2, } from "./types";
|
|
3
|
+
export { CodeReviewSeverity, CodeReviewVerdict } from "./types";
|
|
4
|
+
export declare function convertXmlToV2Format(output: string): CodeReviewResultV2;
|
|
5
|
+
//# sourceMappingURL=parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,kBAAkB,EAGxB,MAAM,SAAS,CAAC;AAEjB,YAAY,EACV,qBAAqB,EACrB,kBAAkB,EAClB,kBAAkB,EAClB,kBAAkB,GACnB,MAAM,SAAS,CAAC;AAEjB,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAC;AAchE,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,MAAM,GAAG,kBAAkB,CAmFvE"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CodeReviewVerdict = exports.CodeReviewSeverity = void 0;
|
|
4
|
+
exports.convertXmlToV2Format = convertXmlToV2Format;
|
|
5
|
+
const types_1 = require("./types");
|
|
6
|
+
var types_2 = require("./types");
|
|
7
|
+
Object.defineProperty(exports, "CodeReviewSeverity", { enumerable: true, get: function () { return types_2.CodeReviewSeverity; } });
|
|
8
|
+
Object.defineProperty(exports, "CodeReviewVerdict", { enumerable: true, get: function () { return types_2.CodeReviewVerdict; } });
|
|
9
|
+
function inferVerdictFromCommentsV2(lineComments) {
|
|
10
|
+
const hasMergeBlockingIssues = lineComments.some((comment) => comment.severity === types_1.CodeReviewSeverity.MergeBlocking);
|
|
11
|
+
return hasMergeBlockingIssues
|
|
12
|
+
? types_1.CodeReviewVerdict.Rejected
|
|
13
|
+
: types_1.CodeReviewVerdict.Approved;
|
|
14
|
+
}
|
|
15
|
+
function convertXmlToV2Format(output) {
|
|
16
|
+
const lastCommitMatch = output.match(/<last_commit>([\s\S]*?)<\/last_commit>/i);
|
|
17
|
+
const codeChangeMatch = output.match(/<describe_code_change>([\s\S]*?)<\/describe_code_change>/i);
|
|
18
|
+
const verdictMatch = output.match(/<verdict>([\s\S]*?)<\/verdict>/i);
|
|
19
|
+
const lineComments = [];
|
|
20
|
+
const lineCommentsMatch = output.match(/<line_comments>([\s\S]*?)<\/line_comments>/i);
|
|
21
|
+
if (lineCommentsMatch) {
|
|
22
|
+
const commentsContent = lineCommentsMatch[1];
|
|
23
|
+
const commentMatches = commentsContent.match(/<comment>([\s\S]*?)<\/comment>/gi);
|
|
24
|
+
if (commentMatches) {
|
|
25
|
+
for (const commentMatch of commentMatches) {
|
|
26
|
+
const fileMatch = commentMatch.match(/<file>([\s\S]*?)<\/file>/i);
|
|
27
|
+
const lineStartMatch = commentMatch.match(/<line-start>([\s\S]*?)<\/line-start>/i);
|
|
28
|
+
const lineEndMatch = commentMatch.match(/<line-end>([\s\S]*?)<\/line-end>/i);
|
|
29
|
+
const severityMatch = commentMatch.match(/<severity>([\s\S]*?)<\/severity>/i);
|
|
30
|
+
const messageMatch = commentMatch.match(/<message>([\s\S]*?)<\/message>/i);
|
|
31
|
+
if (fileMatch &&
|
|
32
|
+
lineStartMatch &&
|
|
33
|
+
lineEndMatch &&
|
|
34
|
+
severityMatch &&
|
|
35
|
+
messageMatch) {
|
|
36
|
+
const severityText = severityMatch[1].trim();
|
|
37
|
+
const severity = severityText === "merge-blocking"
|
|
38
|
+
? types_1.CodeReviewSeverity.MergeBlocking
|
|
39
|
+
: severityText === "warning"
|
|
40
|
+
? types_1.CodeReviewSeverity.Warning
|
|
41
|
+
: null;
|
|
42
|
+
if (severity !== null) {
|
|
43
|
+
lineComments.push({
|
|
44
|
+
file: fileMatch[1].trim(),
|
|
45
|
+
line_start: parseInt(lineStartMatch[1].trim(), 10),
|
|
46
|
+
line_end: parseInt(lineEndMatch[1].trim(), 10),
|
|
47
|
+
severity: severity,
|
|
48
|
+
message: messageMatch[1].trim(),
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
const lastCommit = lastCommitMatch[1].trim();
|
|
56
|
+
const describeCodeChange = codeChangeMatch[1].trim();
|
|
57
|
+
const verdict = verdictMatch?.[1]?.trim();
|
|
58
|
+
const finalVerdict = verdict
|
|
59
|
+
? verdict === types_1.CodeReviewVerdict.Approved
|
|
60
|
+
? types_1.CodeReviewVerdict.Approved
|
|
61
|
+
: types_1.CodeReviewVerdict.Rejected
|
|
62
|
+
: inferVerdictFromCommentsV2(lineComments);
|
|
63
|
+
return {
|
|
64
|
+
version: "2.0",
|
|
65
|
+
last_commit: lastCommit,
|
|
66
|
+
describe_code_change: describeCodeChange,
|
|
67
|
+
line_comments: lineComments,
|
|
68
|
+
verdict: finalVerdict,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
export declare enum CodeReviewVerdict {
|
|
2
|
+
Approved = "Approved",
|
|
3
|
+
Rejected = "Rejected"
|
|
4
|
+
}
|
|
5
|
+
export declare enum CodeReviewSeverity {
|
|
6
|
+
MergeBlocking = "merge-blocking",
|
|
7
|
+
Warning = "warning"
|
|
8
|
+
}
|
|
9
|
+
export type CodeReviewResultV1 = {
|
|
10
|
+
version: "1.0";
|
|
11
|
+
last_commit: string | null;
|
|
12
|
+
code_review_comments: string | null;
|
|
13
|
+
describe_code_change: string | null;
|
|
14
|
+
merge_blocking_issues: string | null;
|
|
15
|
+
best_practices_and_warnings: string | null;
|
|
16
|
+
verdict: "Approved" | "Rejected" | null;
|
|
17
|
+
};
|
|
18
|
+
export type CodeReviewLineComment = {
|
|
19
|
+
file: string;
|
|
20
|
+
line_start: number;
|
|
21
|
+
line_end: number;
|
|
22
|
+
severity: CodeReviewSeverity;
|
|
23
|
+
message: string;
|
|
24
|
+
};
|
|
25
|
+
export type CodeReviewResultV2 = {
|
|
26
|
+
version: "2.0";
|
|
27
|
+
last_commit: string;
|
|
28
|
+
describe_code_change: string;
|
|
29
|
+
line_comments: CodeReviewLineComment[];
|
|
30
|
+
verdict: CodeReviewVerdict;
|
|
31
|
+
};
|
|
32
|
+
export type CodeReviewResultV0 = {
|
|
33
|
+
version: "0.1";
|
|
34
|
+
result: string;
|
|
35
|
+
};
|
|
36
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/types.ts"],"names":[],"mappings":"AAAA,oBAAY,iBAAiB;IAC3B,QAAQ,aAAa;IACrB,QAAQ,aAAa;CACtB;AAED,oBAAY,kBAAkB;IAC5B,aAAa,mBAAmB;IAChC,OAAO,YAAY;CACpB;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,OAAO,EAAE,KAAK,CAAC;IACf,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,qBAAqB,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,2BAA2B,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,OAAO,EAAE,UAAU,GAAG,UAAU,GAAG,IAAI,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,kBAAkB,CAAC;IAC7B,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,OAAO,EAAE,KAAK,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,aAAa,EAAE,qBAAqB,EAAE,CAAC;IACvC,OAAO,EAAE,iBAAiB,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,OAAO,EAAE,KAAK,CAAC;IAEf,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CodeReviewSeverity = exports.CodeReviewVerdict = void 0;
|
|
4
|
+
var CodeReviewVerdict;
|
|
5
|
+
(function (CodeReviewVerdict) {
|
|
6
|
+
CodeReviewVerdict["Approved"] = "Approved";
|
|
7
|
+
CodeReviewVerdict["Rejected"] = "Rejected";
|
|
8
|
+
})(CodeReviewVerdict || (exports.CodeReviewVerdict = CodeReviewVerdict = {}));
|
|
9
|
+
var CodeReviewSeverity;
|
|
10
|
+
(function (CodeReviewSeverity) {
|
|
11
|
+
CodeReviewSeverity["MergeBlocking"] = "merge-blocking";
|
|
12
|
+
CodeReviewSeverity["Warning"] = "warning";
|
|
13
|
+
})(CodeReviewSeverity || (exports.CodeReviewSeverity = CodeReviewSeverity = {}));
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAoDlC,MAAM,MAAM,kBAAkB,GAAG,KAAK,CAClC;IACE,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,iBAAiB,CAAC;CAC/B,CACJ,CAAC;AAEF,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,8BAAsC,GACvC,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,8BAA8B,CAAC,EAAE,OAAO,CAAC;CAC1C,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,KAAK,EAAE,KAAK,CAAC;CACd,CAAC,CAuND"}
|
package/dist/agent/cua/index.js
CHANGED
|
@@ -28,6 +28,22 @@ function artifact(screenshot, name) {
|
|
|
28
28
|
data: Buffer.from(screenshot, "base64"),
|
|
29
29
|
};
|
|
30
30
|
}
|
|
31
|
+
function stateOfTheBrowser(page) {
|
|
32
|
+
const browserContext = page.context();
|
|
33
|
+
const pages = browserContext.pages();
|
|
34
|
+
return `
|
|
35
|
+
## Browser window
|
|
36
|
+
|
|
37
|
+
### Current page (what you are working on)
|
|
38
|
+
Current page URL: ${page.url()}
|
|
39
|
+
Current page title: ${page.title()}
|
|
40
|
+
|
|
41
|
+
### All pages
|
|
42
|
+
Number of open pages: ${pages.length}
|
|
43
|
+
|
|
44
|
+
URLs and titles:
|
|
45
|
+
${pages.map((p) => ` - ${p.url()} - ${p.title()}`).join("\n")}`;
|
|
46
|
+
}
|
|
31
47
|
async function createTestUsingComputerUseAgent({ page, task, trace, prefersElementFromPointCodegen = false, }) {
|
|
32
48
|
const codegen = await getCodegenInstance(prefersElementFromPointCodegen);
|
|
33
49
|
await codegen.initialize(page);
|
|
@@ -53,7 +69,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, prefersEleme
|
|
|
53
69
|
content: [
|
|
54
70
|
{
|
|
55
71
|
type: "input_text",
|
|
56
|
-
text: `Task to execute: ${task}\n\
|
|
72
|
+
text: `Task to execute: ${task}\n\n${stateOfTheBrowser(page)}`,
|
|
57
73
|
},
|
|
58
74
|
{
|
|
59
75
|
type: "input_image",
|
|
@@ -177,7 +193,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, prefersEleme
|
|
|
177
193
|
content: [
|
|
178
194
|
{
|
|
179
195
|
type: "input_text",
|
|
180
|
-
text: `Action executed: ${executedActionSummary || "None"}\
|
|
196
|
+
text: `Action executed: ${executedActionSummary || "None"}\n\n${stateOfTheBrowser(page)}`,
|
|
181
197
|
},
|
|
182
198
|
],
|
|
183
199
|
},
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;
|
|
1
|
+
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AAiClD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;;;EAQrE"}
|
package/dist/agent/cua/model.js
CHANGED
|
@@ -9,7 +9,10 @@ For example, if the user message says "Click on Submit button", then
|
|
|
9
9
|
you click on the submit button -- even if it looks like a scary action.
|
|
10
10
|
|
|
11
11
|
If you have been asked to retrieve text or verify something on the UI, then communicate
|
|
12
|
-
that in your responses so that the user can see your thinking process in its entirety
|
|
12
|
+
that in your responses so that the user can see your thinking process in its entirety.
|
|
13
|
+
|
|
14
|
+
Your work is limited to the current browser page (tab) that you are provided with. You will
|
|
15
|
+
have to conclude your actions before the user can ask you to do actions on different pages (tabs).`;
|
|
13
16
|
const pageGotoTool = {
|
|
14
17
|
type: "function",
|
|
15
18
|
name: "page_goto",
|