@empiricalrun/test-gen 0.75.0 → 0.77.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +48 -0
- package/dist/agent/base/index.d.ts +32 -21
- package/dist/agent/base/index.d.ts.map +1 -1
- package/dist/agent/base/index.js +100 -57
- package/dist/agent/browsing/run.d.ts +1 -2
- package/dist/agent/browsing/run.d.ts.map +1 -1
- package/dist/agent/browsing/run.js +3 -9
- package/dist/agent/browsing/utils.d.ts +2 -9
- package/dist/agent/browsing/utils.d.ts.map +1 -1
- package/dist/agent/browsing/utils.js +5 -109
- package/dist/agent/chat/agent-loop.d.ts +8 -7
- package/dist/agent/chat/agent-loop.d.ts.map +1 -1
- package/dist/agent/chat/agent-loop.js +7 -18
- package/dist/agent/chat/exports.d.ts +9 -6
- package/dist/agent/chat/exports.d.ts.map +1 -1
- package/dist/agent/chat/exports.js +11 -13
- package/dist/agent/chat/index.d.ts +6 -10
- package/dist/agent/chat/index.d.ts.map +1 -1
- package/dist/agent/chat/index.js +117 -196
- package/dist/agent/chat/models.d.ts +0 -2
- package/dist/agent/chat/models.d.ts.map +1 -1
- package/dist/agent/chat/models.js +12 -26
- package/dist/agent/chat/prompt/pw-utils-docs.d.ts +1 -1
- package/dist/agent/chat/prompt/pw-utils-docs.d.ts.map +1 -1
- package/dist/agent/chat/prompt/pw-utils-docs.js +52 -0
- package/dist/agent/chat/prompt/repo.d.ts.map +1 -1
- package/dist/agent/chat/prompt/repo.js +11 -22
- package/dist/agent/chat/prompt/test-case-def.d.ts +2 -0
- package/dist/agent/chat/prompt/test-case-def.d.ts.map +1 -0
- package/dist/agent/chat/prompt/test-case-def.js +44 -0
- package/dist/agent/chat/state.d.ts +8 -8
- package/dist/agent/chat/state.d.ts.map +1 -1
- package/dist/agent/chat/state.js +17 -47
- package/dist/agent/chat/utils.d.ts +4 -5
- package/dist/agent/chat/utils.d.ts.map +1 -1
- package/dist/agent/chat/utils.js +15 -9
- package/dist/agent/cli.d.ts +11 -0
- package/dist/agent/cli.d.ts.map +1 -0
- package/dist/agent/cli.js +213 -0
- package/dist/agent/code-review/executor/index.d.ts +5 -0
- package/dist/agent/code-review/executor/index.d.ts.map +1 -0
- package/dist/agent/code-review/executor/index.js +13 -0
- package/dist/agent/code-review/index.d.ts +12 -0
- package/dist/agent/code-review/index.d.ts.map +1 -0
- package/dist/agent/code-review/index.js +159 -0
- package/dist/agent/code-review/parser.d.ts +5 -0
- package/dist/agent/code-review/parser.d.ts.map +1 -0
- package/dist/agent/code-review/parser.js +70 -0
- package/dist/agent/code-review/types.d.ts +36 -0
- package/dist/agent/code-review/types.d.ts.map +1 -0
- package/dist/agent/code-review/types.js +13 -0
- package/dist/agent/cua/index.d.ts.map +1 -1
- package/dist/agent/cua/index.js +18 -2
- package/dist/agent/cua/model.d.ts.map +1 -1
- package/dist/agent/cua/model.js +4 -1
- package/dist/agent/cua/pw-codegen/pw-pause/index.d.ts.map +1 -1
- package/dist/agent/index.d.ts +10 -0
- package/dist/agent/index.d.ts.map +1 -0
- package/dist/agent/index.js +19 -0
- package/dist/agent/triage/index.d.ts +7 -0
- package/dist/agent/triage/index.d.ts.map +1 -0
- package/dist/agent/triage/index.js +103 -0
- package/dist/agent/video-analysis/executor/index.d.ts +5 -0
- package/dist/agent/video-analysis/executor/index.d.ts.map +1 -0
- package/dist/agent/video-analysis/executor/index.js +10 -0
- package/dist/agent/video-analysis/index.d.ts +7 -0
- package/dist/agent/video-analysis/index.d.ts.map +1 -0
- package/dist/agent/video-analysis/index.js +60 -0
- package/dist/artifacts/index.d.ts +1 -1
- package/dist/artifacts/index.d.ts.map +1 -1
- package/dist/artifacts/index.js +3 -1
- package/dist/artifacts/utils.d.ts.map +1 -1
- package/dist/bin/index.js +68 -23
- package/dist/constants/index.d.ts +14 -0
- package/dist/constants/index.d.ts.map +1 -1
- package/dist/constants/index.js +33 -1
- package/dist/file/server.d.ts +1 -3
- package/dist/file/server.d.ts.map +1 -1
- package/dist/file/server.js +0 -13
- package/dist/file-info/adapters/file-system/index.d.ts.map +1 -1
- package/dist/file-info/adapters/file-system/reader.d.ts.map +1 -1
- package/dist/file-info/adapters/file-system/reader.js +8 -1
- package/dist/file-info/adapters/github/index.d.ts.map +1 -1
- package/dist/file-info/adapters/github/index.js +1 -2
- package/dist/file-info/adapters/github/reader.d.ts +4 -9
- package/dist/file-info/adapters/github/reader.d.ts.map +1 -1
- package/dist/file-info/adapters/github/reader.js +166 -134
- package/dist/index.d.ts.map +1 -1
- package/dist/tools/analyse-video/index.d.ts +5 -0
- package/dist/tools/analyse-video/index.d.ts.map +1 -0
- package/dist/tools/analyse-video/index.js +50 -0
- package/dist/tools/create-pull-request/index.d.ts.map +1 -0
- package/dist/tools/{definitions/commit-and-create-pr.js → create-pull-request/index.js} +28 -1
- package/dist/tools/create-pull-request/utils.d.ts +21 -0
- package/dist/tools/create-pull-request/utils.d.ts.map +1 -0
- package/dist/tools/create-pull-request/utils.js +83 -0
- package/dist/tools/definitions/{fetch-video-analysis.d.ts → analyse-video.d.ts} +17 -12
- package/dist/tools/definitions/analyse-video.d.ts.map +1 -0
- package/dist/tools/definitions/analyse-video.js +60 -0
- package/dist/tools/definitions/review-pull-request.d.ts +3 -0
- package/dist/tools/definitions/review-pull-request.d.ts.map +1 -0
- package/dist/tools/definitions/review-pull-request.js +16 -0
- package/dist/tools/definitions/str_replace_editor.d.ts +1 -0
- package/dist/tools/definitions/str_replace_editor.d.ts.map +1 -1
- package/dist/tools/definitions/str_replace_editor.js +4 -1
- package/dist/tools/definitions/test-gen-browser.d.ts +0 -3
- package/dist/tools/definitions/test-gen-browser.d.ts.map +1 -1
- package/dist/tools/definitions/test-gen-browser.js +33 -8
- package/dist/tools/delete-file/index.d.ts.map +1 -1
- package/dist/tools/delete-file/index.js +1 -19
- package/dist/tools/executor/base.d.ts +32 -0
- package/dist/tools/executor/base.d.ts.map +1 -0
- package/dist/tools/executor/base.js +114 -0
- package/dist/tools/executor/index.d.ts +3 -22
- package/dist/tools/executor/index.d.ts.map +1 -1
- package/dist/tools/executor/index.js +13 -92
- package/dist/tools/executor/utils/checkpoint.d.ts +1 -1
- package/dist/tools/executor/utils/checkpoint.d.ts.map +1 -1
- package/dist/tools/executor/utils/checkpoint.js +6 -2
- package/dist/tools/executor/utils/git.d.ts +2 -2
- package/dist/tools/executor/utils/git.d.ts.map +1 -1
- package/dist/tools/executor/utils/git.js +7 -3
- package/dist/tools/executor/utils/index.d.ts.map +1 -1
- package/dist/tools/executor/utils/index.js +1 -1
- package/dist/tools/fetch-session-diff/index.d.ts +3 -0
- package/dist/tools/fetch-session-diff/index.d.ts.map +1 -0
- package/dist/tools/fetch-session-diff/index.js +46 -0
- package/dist/tools/file-operations/create.d.ts.map +1 -1
- package/dist/tools/file-operations/create.js +1 -4
- package/dist/tools/file-operations/index.d.ts +2 -1
- package/dist/tools/file-operations/index.d.ts.map +1 -1
- package/dist/tools/file-operations/index.js +4 -1
- package/dist/tools/file-operations/insert.d.ts +1 -2
- package/dist/tools/file-operations/insert.d.ts.map +1 -1
- package/dist/tools/file-operations/insert.js +1 -4
- package/dist/tools/file-operations/replace.d.ts.map +1 -1
- package/dist/tools/file-operations/replace.js +1 -4
- package/dist/tools/grep/index.d.ts.map +1 -1
- package/dist/tools/grep/index.js +18 -11
- package/dist/tools/index.d.ts +28 -2
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +52 -33
- package/dist/tools/merge-conflicts/index.d.ts.map +1 -1
- package/dist/tools/merge-conflicts/index.js +1 -1
- package/dist/tools/rename-file/index.js +1 -1
- package/dist/tools/review-pull-request/index.d.ts +3 -0
- package/dist/tools/review-pull-request/index.d.ts.map +1 -0
- package/dist/tools/review-pull-request/index.js +89 -0
- package/dist/tools/run-test.d.ts.map +1 -1
- package/dist/tools/run-test.js +25 -3
- package/dist/tools/test-gen-browser.d.ts.map +1 -1
- package/dist/tools/test-gen-browser.js +51 -47
- package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
- package/dist/tools/test-run-fetcher/index.js +4 -14
- package/dist/tools/utils/urls.d.ts +5 -0
- package/dist/tools/utils/urls.d.ts.map +1 -0
- package/dist/tools/utils/urls.js +19 -0
- package/dist/tools/view-failed-test-run-report/index.d.ts.map +1 -1
- package/dist/tools/view-failed-test-run-report/index.js +3 -15
- package/dist/utils/artifact-paths.d.ts +20 -0
- package/dist/utils/artifact-paths.d.ts.map +1 -0
- package/dist/utils/artifact-paths.js +16 -0
- package/dist/utils/dedup-image-fs.d.ts +2 -16
- package/dist/utils/dedup-image-fs.d.ts.map +1 -1
- package/dist/utils/dedup-image-fs.js +12 -16
- package/dist/utils/dedup-image.d.ts +1 -14
- package/dist/utils/dedup-image.d.ts.map +1 -1
- package/dist/utils/dedup-image.js +7 -62
- package/dist/{tools/fetch-video-analysis/local-ffmpeg-client.d.ts → utils/ffmpeg/index.d.ts} +9 -6
- package/dist/utils/ffmpeg/index.d.ts.map +1 -0
- package/dist/utils/ffmpeg/index.js +415 -0
- package/dist/utils/file.d.ts +1 -0
- package/dist/utils/file.d.ts.map +1 -1
- package/dist/utils/file.js +45 -1
- package/dist/utils/find-threshold.d.ts +8 -0
- package/dist/utils/find-threshold.d.ts.map +1 -0
- package/dist/utils/find-threshold.js +55 -0
- package/dist/utils/hash.d.ts +2 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +24 -0
- package/dist/utils/model.d.ts +1 -1
- package/dist/utils/model.d.ts.map +1 -1
- package/dist/utils/model.js +7 -5
- package/dist/utils/repo-tree.d.ts +0 -1
- package/dist/utils/repo-tree.d.ts.map +1 -1
- package/dist/utils/repo-tree.js +2 -14
- package/dist/utils/slug.js +1 -1
- package/dist/video-core/agent-orchestrator.d.ts +14 -0
- package/dist/video-core/agent-orchestrator.d.ts.map +1 -0
- package/dist/video-core/agent-orchestrator.js +78 -0
- package/dist/video-core/analysis-server.d.ts +24 -0
- package/dist/video-core/analysis-server.d.ts.map +1 -0
- package/dist/video-core/analysis-server.js +398 -0
- package/dist/video-core/analysis-viewer.html +1374 -0
- package/dist/video-core/index.d.ts +44 -0
- package/dist/video-core/index.d.ts.map +1 -0
- package/dist/video-core/index.js +204 -0
- package/dist/video-core/model-limits.d.ts +4 -0
- package/dist/video-core/model-limits.d.ts.map +1 -0
- package/dist/video-core/model-limits.js +67 -0
- package/dist/video-core/storage-manager.d.ts +5 -0
- package/dist/video-core/storage-manager.d.ts.map +1 -0
- package/dist/video-core/storage-manager.js +55 -0
- package/dist/video-core/types.d.ts +13 -0
- package/dist/video-core/types.d.ts.map +1 -0
- package/dist/video-core/types.js +2 -0
- package/dist/video-core/utils.d.ts +25 -0
- package/dist/video-core/utils.d.ts.map +1 -0
- package/dist/video-core/utils.js +211 -0
- package/dist/video-core/xml-parser.d.ts +3 -0
- package/dist/video-core/xml-parser.d.ts.map +1 -0
- package/dist/video-core/xml-parser.js +27 -0
- package/package.json +5 -6
- package/tsconfig.tsbuildinfo +1 -1
- package/dist/agent/chat/prompt/index.d.ts +0 -5
- package/dist/agent/chat/prompt/index.d.ts.map +0 -1
- package/dist/agent/chat/prompt/index.js +0 -189
- package/dist/agent/chat/utils/tool-calls.d.ts +0 -21
- package/dist/agent/chat/utils/tool-calls.d.ts.map +0 -1
- package/dist/agent/chat/utils/tool-calls.js +0 -64
- package/dist/agent/code-review/prompt.d.ts +0 -2
- package/dist/agent/code-review/prompt.d.ts.map +0 -1
- package/dist/agent/code-review/prompt.js +0 -19
- package/dist/agent/diagnosis-agent/index.d.ts +0 -11
- package/dist/agent/diagnosis-agent/index.d.ts.map +0 -1
- package/dist/agent/diagnosis-agent/index.js +0 -88
- package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts +0 -10
- package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts.map +0 -1
- package/dist/agent/diagnosis-agent/strict-mode-violation.js +0 -30
- package/dist/tools/commit-and-create-pr/index.d.ts.map +0 -1
- package/dist/tools/commit-and-create-pr/index.js +0 -83
- package/dist/tools/definitions/commit-and-create-pr.d.ts +0 -3
- package/dist/tools/definitions/commit-and-create-pr.d.ts.map +0 -1
- package/dist/tools/definitions/fetch-video-analysis.d.ts.map +0 -1
- package/dist/tools/definitions/fetch-video-analysis.js +0 -61
- package/dist/tools/fetch-video-analysis/index.d.ts +0 -5
- package/dist/tools/fetch-video-analysis/index.d.ts.map +0 -1
- package/dist/tools/fetch-video-analysis/index.js +0 -138
- package/dist/tools/fetch-video-analysis/local-ffmpeg-client.d.ts.map +0 -1
- package/dist/tools/fetch-video-analysis/local-ffmpeg-client.js +0 -247
- package/dist/tools/fetch-video-analysis/open-ai.d.ts +0 -6
- package/dist/tools/fetch-video-analysis/open-ai.d.ts.map +0 -1
- package/dist/tools/fetch-video-analysis/open-ai.js +0 -37
- package/dist/tools/fetch-video-analysis/utils.d.ts +0 -13
- package/dist/tools/fetch-video-analysis/utils.d.ts.map +0 -1
- package/dist/tools/fetch-video-analysis/utils.js +0 -98
- package/dist/tools/fetch-video-analysis/video-analysis.d.ts +0 -7
- package/dist/tools/fetch-video-analysis/video-analysis.d.ts.map +0 -1
- package/dist/tools/fetch-video-analysis/video-analysis.js +0 -54
- package/dist/tools/file-operations/shared/git-helper.d.ts +0 -4
- package/dist/tools/file-operations/shared/git-helper.d.ts.map +0 -1
- package/dist/tools/file-operations/shared/git-helper.js +0 -29
- package/eslint.config.mjs +0 -43
- /package/dist/tools/{commit-and-create-pr → create-pull-request}/index.d.ts +0 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CodeReviewAgent = exports.CodeReviewSeverity = exports.CodeReviewVerdict = void 0;
|
|
4
|
+
const tools_1 = require("../../tools");
|
|
5
|
+
const fetch_session_diff_1 = require("../../tools/fetch-session-diff");
|
|
6
|
+
const base_1 = require("../base");
|
|
7
|
+
const parser_1 = require("./parser");
|
|
8
|
+
const types_1 = require("./types");
|
|
9
|
+
Object.defineProperty(exports, "CodeReviewSeverity", { enumerable: true, get: function () { return types_1.CodeReviewSeverity; } });
|
|
10
|
+
Object.defineProperty(exports, "CodeReviewVerdict", { enumerable: true, get: function () { return types_1.CodeReviewVerdict; } });
|
|
11
|
+
class CodeReviewAgent extends base_1.BaseAgent {
|
|
12
|
+
getTools() {
|
|
13
|
+
const custom = [
|
|
14
|
+
fetch_session_diff_1.fetchSessionDiffTool,
|
|
15
|
+
...(0, tools_1.textViewToolsForModel)(this.selectedModel),
|
|
16
|
+
];
|
|
17
|
+
return {
|
|
18
|
+
custom,
|
|
19
|
+
builtInTextEditor: (0, tools_1.hasBuiltInTextEditor)(this.selectedModel),
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
getResult() {
|
|
23
|
+
const messages = this.messages || [];
|
|
24
|
+
const lastMessage = messages.length
|
|
25
|
+
? messages[messages.length - 1]
|
|
26
|
+
: undefined;
|
|
27
|
+
const lastMessageTextPart = lastMessage
|
|
28
|
+
? lastMessage.parts
|
|
29
|
+
.filter((p) => "text" in p)
|
|
30
|
+
.find((p) => "text" in p && !!p.text)
|
|
31
|
+
: undefined;
|
|
32
|
+
const textPart = lastMessageTextPart;
|
|
33
|
+
const text = textPart?.text.trim();
|
|
34
|
+
if (!text) {
|
|
35
|
+
return undefined;
|
|
36
|
+
}
|
|
37
|
+
return (0, parser_1.convertXmlToV2Format)(text);
|
|
38
|
+
}
|
|
39
|
+
async buildSystemPrompt(repoContext) {
|
|
40
|
+
return `
|
|
41
|
+
You are an expert code reviewer that specializes in reviewing Playwright test code. You are
|
|
42
|
+
provided with tools to fetch diff and pull-request metadata for a code review, where a test has been added, test modified,
|
|
43
|
+
or some configuration has changed.
|
|
44
|
+
|
|
45
|
+
# Your goals
|
|
46
|
+
- Understand the purpose and scope of the code change. You can use available tools to gather context of the change.
|
|
47
|
+
- Identify critical issues that must be fixed before the code can be safely merged.
|
|
48
|
+
- Detect code smells, anti-patterns, and non-deterministic behaviors that reduce test reliability - see below.
|
|
49
|
+
- Call out test data assumptions or lack of clean up.
|
|
50
|
+
- Suggest improvements and best practices to enhance maintainability and readability.
|
|
51
|
+
- Form a definite conclusion on whether the code can be merged or not.
|
|
52
|
+
- Share your findings and conclusion in the structured format shared below
|
|
53
|
+
|
|
54
|
+
# Output format
|
|
55
|
+
- You are expected to return the following sections in your response: last_commit, describe_code_change, line_comments, verdict and version
|
|
56
|
+
- The last commit comes from the session diff tool call along with pull request metadata -- reproduce the commit sha as it is, without any additions (ignore the last commit timestamp) or bullet points
|
|
57
|
+
- describe_code_change: A brief summary of what the code change is doing. This should be 4-6 sentences in a bullet list, formatted in markdown where each bullet must begin with a hyphen followed by a space (- ).
|
|
58
|
+
Do not use any other character for bullets.
|
|
59
|
+
- line_comments: Individual comments for specific issues found in the code, attributed to one or more lines of problematic code. There can be multiple separate issues for each line, share them using separate comments of varying severity.
|
|
60
|
+
Each comment should correspond to one issue and include the following tags:
|
|
61
|
+
- file: The relative path to the file from repository root
|
|
62
|
+
- line-start: Starting line number of the issue
|
|
63
|
+
- line-end: Ending line number of the issue (same as line-start for if issue is in a single line of code)
|
|
64
|
+
- severity: Either "merge-blocking" or "warning". It is possible to have multiple issues in the same set of lines of either nature, in that case add multiple line_comments for those lines.
|
|
65
|
+
Here severity denotes the nature of the issue - any issue that is preventing the code from being safe to merge and should be considered high priority is "merge-blocking". Look for any of the specific
|
|
66
|
+
bits below or other red flags you might see in the code. Each comment should be 1-2 sentences. If no blocking issues are found, a review will have no "merge-blocking" line comments.
|
|
67
|
+
Alternatively, the severity should be "warning" for situations where best practices were not followed or contain minor issues or warnings that can be safely ignored ie
|
|
68
|
+
- message: 1-2 sentences describing the specific issue and suggested fixes or improvements.
|
|
69
|
+
- verdict: "Approved" if code can be merged to production ie there are no "merge-blocking" line-comments or "Rejected" if the issues cannot be safely ignored. "Approved" or
|
|
70
|
+
"Rejected" are the only two possible values for this field.
|
|
71
|
+
|
|
72
|
+
Return these as XML tags with markdown inside them
|
|
73
|
+
|
|
74
|
+
<last_commit>
|
|
75
|
+
...
|
|
76
|
+
</last_commit>
|
|
77
|
+
|
|
78
|
+
<describe_code_change>
|
|
79
|
+
...
|
|
80
|
+
</describe_code_change>
|
|
81
|
+
|
|
82
|
+
<line_comments>
|
|
83
|
+
<comment>
|
|
84
|
+
<file>..</file>
|
|
85
|
+
<line-start>..</line-start>
|
|
86
|
+
<line-end>..</line-end>
|
|
87
|
+
<severity>..</severity>
|
|
88
|
+
<message>..</message>
|
|
89
|
+
</comment>
|
|
90
|
+
|
|
91
|
+
<comment>
|
|
92
|
+
<file>..</file>
|
|
93
|
+
<line-start>..</line-start>
|
|
94
|
+
<line-end>..</line-end>
|
|
95
|
+
<severity>..</severity>
|
|
96
|
+
<message>..</message>
|
|
97
|
+
</comment>
|
|
98
|
+
</line_comments>
|
|
99
|
+
|
|
100
|
+
<verdict>
|
|
101
|
+
...
|
|
102
|
+
</verdict>
|
|
103
|
+
|
|
104
|
+
# Severity: Merge blocking
|
|
105
|
+
|
|
106
|
+
## Functionality regression
|
|
107
|
+
- If the change is modifying an existing test, we need to ensure the functionality of the original test
|
|
108
|
+
is maintained in the new version. No hacking our way to get a green test!
|
|
109
|
+
|
|
110
|
+
## Exception handling
|
|
111
|
+
- Any form of try-catch or exception handling is a code smell in test code. If there's an
|
|
112
|
+
exception, the test should fail
|
|
113
|
+
|
|
114
|
+
## Conditionals
|
|
115
|
+
- Any conditionals (if, switch, ternary) in test code is a code smell. Tests are expected to be
|
|
116
|
+
deterministic. If you see conditionals, check if there's a comment explaining why it's needed.
|
|
117
|
+
Critically review the comment -- if it's not convincing, call it out as a code smell.
|
|
118
|
+
|
|
119
|
+
## Playwright common mistakes
|
|
120
|
+
- Don't use waitForLoadState or networkidle - these are not required since Playwright auto-waits after navigations. networkidle
|
|
121
|
+
can cause failures because modern web apps often have background network activity, which never settles.
|
|
122
|
+
- Use locators instead of selectors: waitForSelector, $, $$ are bad - use locators instead (e.g. locator.waitFor)
|
|
123
|
+
- If the test relies on some Playwright APIs that do not auto-wait (e.g. isVisible(), count()), we need to ensure
|
|
124
|
+
they are used AFTER some action that ensures the page has loaded. If nothing, at least it should have a waitForTimeout
|
|
125
|
+
|
|
126
|
+
## Deprecated patterns
|
|
127
|
+
- test.describe.serial(...) is not prefered: use test.describe.configure({ mode: "serial" }) if the tests need to be serial
|
|
128
|
+
|
|
129
|
+
## Repo conventions
|
|
130
|
+
- Tests are located in files in the tests/ directory (e.g. in tests/example.spec.ts)
|
|
131
|
+
- Helper methods (that are imported in the tests) should be in pages/ directory (e.g. pages/common.ts)
|
|
132
|
+
- Helper methods should be functional - not classes (conventional class-based page object models are NOT recommended - use functions!)
|
|
133
|
+
|
|
134
|
+
# Severity: Warning
|
|
135
|
+
|
|
136
|
+
## Call out test data assumptions
|
|
137
|
+
- If new test data is created (e.g. creating a new entity in the app, doing some actions on it) - it should be cleaned up
|
|
138
|
+
at the end of the test. If not, call it out.
|
|
139
|
+
- If the test data cannot be cleaned up, are we using some random names to ensure no conflicts in future test runs?
|
|
140
|
+
- If the test assumes some data exists (e.g. a user with a specific email) - call it out. It might fail across other
|
|
141
|
+
environments.
|
|
142
|
+
- No hard coded URLs - use relative URLs instead - that can work across environments.
|
|
143
|
+
- Dependency on static data that can change across environments (e.g. number of rows in a table) should be avoided.
|
|
144
|
+
|
|
145
|
+
## Remove debug artifacts
|
|
146
|
+
- If there are console.logs or page.screenshot usage, call it out.
|
|
147
|
+
|
|
148
|
+
## Extra waits
|
|
149
|
+
- Wait for timeout for static values are bad, but sometimes needed. Some apps are flaky and need additional waiting.
|
|
150
|
+
|
|
151
|
+
## Element locators
|
|
152
|
+
- CSS selectors can be brittle - prefer user facing selectors like getByRole, getByText
|
|
153
|
+
|
|
154
|
+
# Repo context
|
|
155
|
+
${repoContext}
|
|
156
|
+
`;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
exports.CodeReviewAgent = CodeReviewAgent;
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import { type CodeReviewResultV2 } from "./types";
|
|
2
|
+
export type { CodeReviewLineComment, CodeReviewResultV0, CodeReviewResultV1, CodeReviewResultV2, } from "./types";
|
|
3
|
+
export { CodeReviewSeverity, CodeReviewVerdict } from "./types";
|
|
4
|
+
export declare function convertXmlToV2Format(output: string): CodeReviewResultV2;
|
|
5
|
+
//# sourceMappingURL=parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,kBAAkB,EAGxB,MAAM,SAAS,CAAC;AAEjB,YAAY,EACV,qBAAqB,EACrB,kBAAkB,EAClB,kBAAkB,EAClB,kBAAkB,GACnB,MAAM,SAAS,CAAC;AAEjB,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAC;AAchE,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,MAAM,GAAG,kBAAkB,CAmFvE"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CodeReviewVerdict = exports.CodeReviewSeverity = void 0;
|
|
4
|
+
exports.convertXmlToV2Format = convertXmlToV2Format;
|
|
5
|
+
const types_1 = require("./types");
|
|
6
|
+
var types_2 = require("./types");
|
|
7
|
+
Object.defineProperty(exports, "CodeReviewSeverity", { enumerable: true, get: function () { return types_2.CodeReviewSeverity; } });
|
|
8
|
+
Object.defineProperty(exports, "CodeReviewVerdict", { enumerable: true, get: function () { return types_2.CodeReviewVerdict; } });
|
|
9
|
+
function inferVerdictFromCommentsV2(lineComments) {
|
|
10
|
+
const hasMergeBlockingIssues = lineComments.some((comment) => comment.severity === types_1.CodeReviewSeverity.MergeBlocking);
|
|
11
|
+
return hasMergeBlockingIssues
|
|
12
|
+
? types_1.CodeReviewVerdict.Rejected
|
|
13
|
+
: types_1.CodeReviewVerdict.Approved;
|
|
14
|
+
}
|
|
15
|
+
function convertXmlToV2Format(output) {
|
|
16
|
+
const lastCommitMatch = output.match(/<last_commit>([\s\S]*?)<\/last_commit>/i);
|
|
17
|
+
const codeChangeMatch = output.match(/<describe_code_change>([\s\S]*?)<\/describe_code_change>/i);
|
|
18
|
+
const verdictMatch = output.match(/<verdict>([\s\S]*?)<\/verdict>/i);
|
|
19
|
+
const lineComments = [];
|
|
20
|
+
const lineCommentsMatch = output.match(/<line_comments>([\s\S]*?)<\/line_comments>/i);
|
|
21
|
+
if (lineCommentsMatch) {
|
|
22
|
+
const commentsContent = lineCommentsMatch[1];
|
|
23
|
+
const commentMatches = commentsContent.match(/<comment>([\s\S]*?)<\/comment>/gi);
|
|
24
|
+
if (commentMatches) {
|
|
25
|
+
for (const commentMatch of commentMatches) {
|
|
26
|
+
const fileMatch = commentMatch.match(/<file>([\s\S]*?)<\/file>/i);
|
|
27
|
+
const lineStartMatch = commentMatch.match(/<line-start>([\s\S]*?)<\/line-start>/i);
|
|
28
|
+
const lineEndMatch = commentMatch.match(/<line-end>([\s\S]*?)<\/line-end>/i);
|
|
29
|
+
const severityMatch = commentMatch.match(/<severity>([\s\S]*?)<\/severity>/i);
|
|
30
|
+
const messageMatch = commentMatch.match(/<message>([\s\S]*?)<\/message>/i);
|
|
31
|
+
if (fileMatch &&
|
|
32
|
+
lineStartMatch &&
|
|
33
|
+
lineEndMatch &&
|
|
34
|
+
severityMatch &&
|
|
35
|
+
messageMatch) {
|
|
36
|
+
const severityText = severityMatch[1].trim();
|
|
37
|
+
const severity = severityText === "merge-blocking"
|
|
38
|
+
? types_1.CodeReviewSeverity.MergeBlocking
|
|
39
|
+
: severityText === "warning"
|
|
40
|
+
? types_1.CodeReviewSeverity.Warning
|
|
41
|
+
: null;
|
|
42
|
+
if (severity !== null) {
|
|
43
|
+
lineComments.push({
|
|
44
|
+
file: fileMatch[1].trim(),
|
|
45
|
+
line_start: parseInt(lineStartMatch[1].trim(), 10),
|
|
46
|
+
line_end: parseInt(lineEndMatch[1].trim(), 10),
|
|
47
|
+
severity: severity,
|
|
48
|
+
message: messageMatch[1].trim(),
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
const lastCommit = lastCommitMatch[1].trim();
|
|
56
|
+
const describeCodeChange = codeChangeMatch[1].trim();
|
|
57
|
+
const verdict = verdictMatch?.[1]?.trim();
|
|
58
|
+
const finalVerdict = verdict
|
|
59
|
+
? verdict === types_1.CodeReviewVerdict.Approved
|
|
60
|
+
? types_1.CodeReviewVerdict.Approved
|
|
61
|
+
: types_1.CodeReviewVerdict.Rejected
|
|
62
|
+
: inferVerdictFromCommentsV2(lineComments);
|
|
63
|
+
return {
|
|
64
|
+
version: "2.0",
|
|
65
|
+
last_commit: lastCommit,
|
|
66
|
+
describe_code_change: describeCodeChange,
|
|
67
|
+
line_comments: lineComments,
|
|
68
|
+
verdict: finalVerdict,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
export declare enum CodeReviewVerdict {
|
|
2
|
+
Approved = "Approved",
|
|
3
|
+
Rejected = "Rejected"
|
|
4
|
+
}
|
|
5
|
+
export declare enum CodeReviewSeverity {
|
|
6
|
+
MergeBlocking = "merge-blocking",
|
|
7
|
+
Warning = "warning"
|
|
8
|
+
}
|
|
9
|
+
export type CodeReviewResultV1 = {
|
|
10
|
+
version: "1.0";
|
|
11
|
+
last_commit: string | null;
|
|
12
|
+
code_review_comments: string | null;
|
|
13
|
+
describe_code_change: string | null;
|
|
14
|
+
merge_blocking_issues: string | null;
|
|
15
|
+
best_practices_and_warnings: string | null;
|
|
16
|
+
verdict: "Approved" | "Rejected" | null;
|
|
17
|
+
};
|
|
18
|
+
export type CodeReviewLineComment = {
|
|
19
|
+
file: string;
|
|
20
|
+
line_start: number;
|
|
21
|
+
line_end: number;
|
|
22
|
+
severity: CodeReviewSeverity;
|
|
23
|
+
message: string;
|
|
24
|
+
};
|
|
25
|
+
export type CodeReviewResultV2 = {
|
|
26
|
+
version: "2.0";
|
|
27
|
+
last_commit: string;
|
|
28
|
+
describe_code_change: string;
|
|
29
|
+
line_comments: CodeReviewLineComment[];
|
|
30
|
+
verdict: CodeReviewVerdict;
|
|
31
|
+
};
|
|
32
|
+
export type CodeReviewResultV0 = {
|
|
33
|
+
version: "0.1";
|
|
34
|
+
result: string;
|
|
35
|
+
};
|
|
36
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/types.ts"],"names":[],"mappings":"AAAA,oBAAY,iBAAiB;IAC3B,QAAQ,aAAa;IACrB,QAAQ,aAAa;CACtB;AAED,oBAAY,kBAAkB;IAC5B,aAAa,mBAAmB;IAChC,OAAO,YAAY;CACpB;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,OAAO,EAAE,KAAK,CAAC;IACf,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,qBAAqB,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,2BAA2B,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,OAAO,EAAE,UAAU,GAAG,UAAU,GAAG,IAAI,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,kBAAkB,CAAC;IAC7B,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,OAAO,EAAE,KAAK,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,aAAa,EAAE,qBAAqB,EAAE,CAAC;IACvC,OAAO,EAAE,iBAAiB,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,OAAO,EAAE,KAAK,CAAC;IAEf,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CodeReviewSeverity = exports.CodeReviewVerdict = void 0;
|
|
4
|
+
var CodeReviewVerdict;
|
|
5
|
+
(function (CodeReviewVerdict) {
|
|
6
|
+
CodeReviewVerdict["Approved"] = "Approved";
|
|
7
|
+
CodeReviewVerdict["Rejected"] = "Rejected";
|
|
8
|
+
})(CodeReviewVerdict || (exports.CodeReviewVerdict = CodeReviewVerdict = {}));
|
|
9
|
+
var CodeReviewSeverity;
|
|
10
|
+
(function (CodeReviewSeverity) {
|
|
11
|
+
CodeReviewSeverity["MergeBlocking"] = "merge-blocking";
|
|
12
|
+
CodeReviewSeverity["Warning"] = "warning";
|
|
13
|
+
})(CodeReviewSeverity || (exports.CodeReviewSeverity = CodeReviewSeverity = {}));
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAoDlC,MAAM,MAAM,kBAAkB,GAAG,KAAK,CAClC;IACE,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,iBAAiB,CAAC;CAC/B,CACJ,CAAC;AAEF,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,8BAAsC,GACvC,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,8BAA8B,CAAC,EAAE,OAAO,CAAC;CAC1C,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,KAAK,EAAE,KAAK,CAAC;CACd,CAAC,CAuND"}
|
package/dist/agent/cua/index.js
CHANGED
|
@@ -28,6 +28,22 @@ function artifact(screenshot, name) {
|
|
|
28
28
|
data: Buffer.from(screenshot, "base64"),
|
|
29
29
|
};
|
|
30
30
|
}
|
|
31
|
+
function stateOfTheBrowser(page) {
|
|
32
|
+
const browserContext = page.context();
|
|
33
|
+
const pages = browserContext.pages();
|
|
34
|
+
return `
|
|
35
|
+
## Browser window
|
|
36
|
+
|
|
37
|
+
### Current page (what you are working on)
|
|
38
|
+
Current page URL: ${page.url()}
|
|
39
|
+
Current page title: ${page.title()}
|
|
40
|
+
|
|
41
|
+
### All pages
|
|
42
|
+
Number of open pages: ${pages.length}
|
|
43
|
+
|
|
44
|
+
URLs and titles:
|
|
45
|
+
${pages.map((p) => ` - ${p.url()} - ${p.title()}`).join("\n")}`;
|
|
46
|
+
}
|
|
31
47
|
async function createTestUsingComputerUseAgent({ page, task, trace, prefersElementFromPointCodegen = false, }) {
|
|
32
48
|
const codegen = await getCodegenInstance(prefersElementFromPointCodegen);
|
|
33
49
|
await codegen.initialize(page);
|
|
@@ -53,7 +69,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, prefersEleme
|
|
|
53
69
|
content: [
|
|
54
70
|
{
|
|
55
71
|
type: "input_text",
|
|
56
|
-
text: `Task to execute: ${task}\n\
|
|
72
|
+
text: `Task to execute: ${task}\n\n${stateOfTheBrowser(page)}`,
|
|
57
73
|
},
|
|
58
74
|
{
|
|
59
75
|
type: "input_image",
|
|
@@ -177,7 +193,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, prefersEleme
|
|
|
177
193
|
content: [
|
|
178
194
|
{
|
|
179
195
|
type: "input_text",
|
|
180
|
-
text: `Action executed: ${executedActionSummary || "None"}\
|
|
196
|
+
text: `Action executed: ${executedActionSummary || "None"}\n\n${stateOfTheBrowser(page)}`,
|
|
181
197
|
},
|
|
182
198
|
],
|
|
183
199
|
},
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;
|
|
1
|
+
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AAiClD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;;;EAQrE"}
|
package/dist/agent/cua/model.js
CHANGED
|
@@ -9,7 +9,10 @@ For example, if the user message says "Click on Submit button", then
|
|
|
9
9
|
you click on the submit button -- even if it looks like a scary action.
|
|
10
10
|
|
|
11
11
|
If you have been asked to retrieve text or verify something on the UI, then communicate
|
|
12
|
-
that in your responses so that the user can see your thinking process in its entirety
|
|
12
|
+
that in your responses so that the user can see your thinking process in its entirety.
|
|
13
|
+
|
|
14
|
+
Your work is limited to the current browser page (tab) that you are provided with. You will
|
|
15
|
+
have to conclude your actions before the user can ask you to do actions on different pages (tabs).`;
|
|
13
16
|
const pageGotoTool = {
|
|
14
17
|
type: "function",
|
|
15
18
|
name: "page_goto",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/agent/cua/pw-codegen/pw-pause/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAGvC,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/agent/cua/pw-codegen/pw-pause/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAGvC,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAMjD,OAAO,EAAE,2BAA2B,EAAE,sBAAsB,EAAE,MAAM,SAAS,CAAC;AAE9E,wBAAsB,kBAAkB,CAAC,OAAO,EAAE,MAAM,oBAwCvD;AAED,qBAAa,sBAAuB,YAAW,qBAAqB;IAClE,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,IAAI,CAAmB;IAC/B,OAAO,CAAC,MAAM,CAA4C;IAC1D,OAAO,CAAC,iBAAiB,CAAqB;;YAMhC,QAAQ;IAUhB,UAAU,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBrC,sBAAsB,CAAC,IAAI,EAAE,IAAI;IAqBjC,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAI7B,oBAAoB,IAAI,OAAO,CAAC,MAAM,CAAC;CAU9C"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { AgentModeEnum } from "@empiricalrun/shared-types";
|
|
2
|
+
import { type AgentParams, BaseAgent } from "./base";
|
|
3
|
+
import { ChatAgent } from "./chat";
|
|
4
|
+
import { CodeReviewAgent } from "./code-review";
|
|
5
|
+
import { TriageAgent } from "./triage";
|
|
6
|
+
import { VideoAnalysisAgent } from "./video-analysis";
|
|
7
|
+
export declare const MODE_TO_AGENT_MAP: Record<AgentModeEnum, (params: AgentParams) => BaseAgent>;
|
|
8
|
+
export { BaseAgent, ChatAgent, CodeReviewAgent, TriageAgent, VideoAnalysisAgent, };
|
|
9
|
+
export type { AgentParams };
|
|
10
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAEhE,OAAO,EAAE,KAAK,WAAW,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACrD,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEtD,eAAO,MAAM,iBAAiB,EAAE,MAAM,CACpC,aAAa,EACb,CAAC,MAAM,EAAE,WAAW,KAAK,SAAS,CAMnC,CAAC;AAEF,OAAO,EACL,SAAS,EACT,SAAS,EACT,eAAe,EACf,WAAW,EACX,kBAAkB,GACnB,CAAC;AACF,YAAY,EAAE,WAAW,EAAE,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.VideoAnalysisAgent = exports.TriageAgent = exports.CodeReviewAgent = exports.ChatAgent = exports.BaseAgent = exports.MODE_TO_AGENT_MAP = void 0;
|
|
4
|
+
const base_1 = require("./base");
|
|
5
|
+
Object.defineProperty(exports, "BaseAgent", { enumerable: true, get: function () { return base_1.BaseAgent; } });
|
|
6
|
+
const chat_1 = require("./chat");
|
|
7
|
+
Object.defineProperty(exports, "ChatAgent", { enumerable: true, get: function () { return chat_1.ChatAgent; } });
|
|
8
|
+
const code_review_1 = require("./code-review");
|
|
9
|
+
Object.defineProperty(exports, "CodeReviewAgent", { enumerable: true, get: function () { return code_review_1.CodeReviewAgent; } });
|
|
10
|
+
const triage_1 = require("./triage");
|
|
11
|
+
Object.defineProperty(exports, "TriageAgent", { enumerable: true, get: function () { return triage_1.TriageAgent; } });
|
|
12
|
+
const video_analysis_1 = require("./video-analysis");
|
|
13
|
+
Object.defineProperty(exports, "VideoAnalysisAgent", { enumerable: true, get: function () { return video_analysis_1.VideoAnalysisAgent; } });
|
|
14
|
+
exports.MODE_TO_AGENT_MAP = {
|
|
15
|
+
triage: (params) => new triage_1.TriageAgent(params),
|
|
16
|
+
chat: (params) => new chat_1.ChatAgent(params),
|
|
17
|
+
video: (params) => new video_analysis_1.VideoAnalysisAgent(params),
|
|
18
|
+
"code-review": (params) => new code_review_1.CodeReviewAgent(params),
|
|
19
|
+
};
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { ToolsForLLM } from "@empiricalrun/shared-types";
|
|
2
|
+
import { BaseAgent } from "../base";
|
|
3
|
+
export declare class TriageAgent extends BaseAgent {
|
|
4
|
+
protected getTools(): ToolsForLLM;
|
|
5
|
+
protected buildSystemPrompt(repoContext?: string): Promise<string>;
|
|
6
|
+
}
|
|
7
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/triage/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAoB9D,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAGpC,qBAAa,WAAY,SAAQ,SAAS;IACxC,SAAS,CAAC,QAAQ,IAAI,WAAW;cA2BjB,iBAAiB,CAAC,WAAW,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAoEzE"}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TriageAgent = void 0;
|
|
4
|
+
const tools_1 = require("../../tools");
|
|
5
|
+
const base_1 = require("../base");
|
|
6
|
+
const test_case_def_1 = require("../chat/prompt/test-case-def");
|
|
7
|
+
class TriageAgent extends base_1.BaseAgent {
|
|
8
|
+
getTools() {
|
|
9
|
+
const tools = [
|
|
10
|
+
// Common tools
|
|
11
|
+
tools_1.runTestTool,
|
|
12
|
+
tools_1.grepTool,
|
|
13
|
+
tools_1.fetchDiagnosisReportTool,
|
|
14
|
+
tools_1.listEnvironmentsTool,
|
|
15
|
+
tools_1.downloadBuildTool,
|
|
16
|
+
tools_1.fetchFileTool,
|
|
17
|
+
tools_1.traceDotZipTool,
|
|
18
|
+
// Triage specific tools
|
|
19
|
+
tools_1.listIssuesTool,
|
|
20
|
+
tools_1.createIssueTool,
|
|
21
|
+
tools_1.updateIssueTool,
|
|
22
|
+
tools_1.viewFailedTestRunReportTool,
|
|
23
|
+
tools_1.analyseVideo,
|
|
24
|
+
tools_1.fetchLastSuccessfulTestRunTool,
|
|
25
|
+
tools_1.sendTriageSummaryTool,
|
|
26
|
+
// Model-specific tools
|
|
27
|
+
...(0, tools_1.textEditorToolsForModel)(this.selectedModel),
|
|
28
|
+
];
|
|
29
|
+
return {
|
|
30
|
+
custom: tools,
|
|
31
|
+
builtInTextEditor: (0, tools_1.hasBuiltInTextEditor)(this.selectedModel),
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
async buildSystemPrompt(repoContext) {
|
|
35
|
+
if (!repoContext) {
|
|
36
|
+
throw new Error(`Triage agent needs repo context`);
|
|
37
|
+
}
|
|
38
|
+
return `
|
|
39
|
+
You are a helpful assistant that help with analysis of Playwright test reports. Your goal is to help the user analyse a test report and identify the root cause of the test failures, and log the unique failuers as issues so that the user can keep a track and fix them.
|
|
40
|
+
|
|
41
|
+
You are working on a test code repository that contains Playwright tests and other related files. Your working directory has been checked out on a git branch.
|
|
42
|
+
|
|
43
|
+
# Your capabilities
|
|
44
|
+
|
|
45
|
+
When provided with a test report URL, you can use these capabilities to triage the test failures in the report:
|
|
46
|
+
|
|
47
|
+
## Fetch and view the test report
|
|
48
|
+
|
|
49
|
+
- Use viewFailedTestRunReportTool tool to get more information about all tests that failed in the run
|
|
50
|
+
|
|
51
|
+
## Analyze each test case
|
|
52
|
+
|
|
53
|
+
You are provided with multiple tools to help you understand each failing test case better. Understanding each test case allows you to identify the root cause and create more accurate issues. These tools can also be called in parallel.
|
|
54
|
+
|
|
55
|
+
- Each test case generates artifacts: images, videos, playwright trace zip file. With your tools, you can fetch image, analyze the video frames and trace.zip to find out failing network requests and console logs
|
|
56
|
+
- Each video represents one browser tab of the test case (so multiple videos implies the test had multiple tabs or browser windows)
|
|
57
|
+
- Read the error stack and test file to understand what the test is doing
|
|
58
|
+
- Fetch the last successful run of the test case to understand the earlier flow. This report will contain image and video URLs that can also be analyzed with your available tools.
|
|
59
|
+
- If you think the issue is explained by a timing or intermittent issue, you can also re-run the test case
|
|
60
|
+
|
|
61
|
+
## Listing, updating and creating issues
|
|
62
|
+
|
|
63
|
+
- Test failures will become issues that can be assigned to developers to fix the app or update the test. Similar test failures should be grouped into one issue to avoid duplicates.
|
|
64
|
+
- Before you create a new issue, you MUST list existing issues that have been created for this repo, to avoid creating duplicate issues.
|
|
65
|
+
- If you find duplicates, use the update issue tool to update the existing issue with new information from the test report
|
|
66
|
+
- When you are creating a new issue, use the description and title to clearly call out the error reason (share error stack, error message, relevant lines of code, etc.) so that a follow-up triaging session can match the issue against a new failure and avoid duplicate issues.
|
|
67
|
+
- What makes a good issue: accurate classification between app or test issue, accurate grouping, and a good auto-fix prompt - see more about this below
|
|
68
|
+
|
|
69
|
+
## Classify tests as app or test issues
|
|
70
|
+
- An app issue is an issue in the application that is being tested. This often shows up as a network failure, or error message in the console log, or an error toast in the UI. Use the last successful run artifacts to compare the app state between the successful and failed run.
|
|
71
|
+
- A test issue is an issue in the test code. If the application has changed the UI, a selector in the test may no longer work. Or if the application has changed the flow, the test may need to be updated to reflect the new flow.
|
|
72
|
+
|
|
73
|
+
## Grouping test failures
|
|
74
|
+
- Before you create issues, group the failures together so that we create useful issues
|
|
75
|
+
- What makes a good group: failures that have the same root cause - because of similar error stacks - and can be fixed with the same change to the app or test
|
|
76
|
+
- Both "type of failure" and "proposed fix" are important to determine if two failures belong to the same group
|
|
77
|
+
- Example: if two tests fail with strict mode violations, but for 2 different selectors, they are different groups because the proposed fixes are different
|
|
78
|
+
- What does not make a good group: the location of the test or the name of the test. Two tests that are located in the same file or have similar names should ONLY be grouped together if the root cause of failures is same
|
|
79
|
+
|
|
80
|
+
## Crafting a good auto-fix prompt for test issues
|
|
81
|
+
- When you create issues with type "test", you are expected to share a test_issue_prompt which is your proposed change to the test to adapt to the new app state.
|
|
82
|
+
- This prompt is handed over to another agent to update the test code, and your prompt is the ONLY context that the agent has to update the test.
|
|
83
|
+
- Therefore, your prompt must contain:
|
|
84
|
+
- Which test cases to be updated - with test and describe block names, file name
|
|
85
|
+
- What failed in the test - error message, error stack, relevant lines of code, or bits to locate the failure
|
|
86
|
+
- Your suggested change to the test
|
|
87
|
+
- Emphasis to re-run the test after making the change, to ensure that the change works
|
|
88
|
+
|
|
89
|
+
## Conclusion
|
|
90
|
+
- After you are done with triaging and creating issues, summarize the work done with a list of created issues for the user to review. Don't be too verbose - a bullet list of issues created or updated, with a small description is enough.
|
|
91
|
+
- It is important to show proof that you have gone through all of the failures in the test run report, so use numbers to call out 1. total failures, and 2. failures associated with each issue.
|
|
92
|
+
|
|
93
|
+
${test_case_def_1.testCasesDefinitionPrompt}
|
|
94
|
+
|
|
95
|
+
# Repo context
|
|
96
|
+
${repoContext}
|
|
97
|
+
|
|
98
|
+
# Reference
|
|
99
|
+
Today's date is ${new Date().toDateString()}
|
|
100
|
+
`;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
exports.TriageAgent = TriageAgent;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/agent/video-analysis/executor/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,gBAAgB,EAChB,qBAAqB,EACtB,MAAM,8BAA8B,CAAC;AAEtC,qBAAa,yBAA0B,SAAQ,gBAAgB;gBACjD,MAAM,EAAE,IAAI,CAAC,qBAAqB,EAAE,OAAO,CAAC;CAGzD"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.VideoAnalysisToolExecutor = void 0;
|
|
4
|
+
const base_1 = require("../../../tools/executor/base");
|
|
5
|
+
class VideoAnalysisToolExecutor extends base_1.BaseToolExecutor {
|
|
6
|
+
constructor(params) {
|
|
7
|
+
super({ ...params, tools: [] });
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
exports.VideoAnalysisToolExecutor = VideoAnalysisToolExecutor;
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { ToolsForLLM } from "@empiricalrun/shared-types";
|
|
2
|
+
import { BaseAgent } from "../base";
|
|
3
|
+
export declare class VideoAnalysisAgent extends BaseAgent {
|
|
4
|
+
protected getTools(): ToolsForLLM;
|
|
5
|
+
protected buildSystemPrompt(): Promise<string>;
|
|
6
|
+
}
|
|
7
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/video-analysis/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAC9D,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,qBAAa,kBAAmB,SAAQ,SAAS;IAC/C,SAAS,CAAC,QAAQ,IAAI,WAAW;cAOjB,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC;CA+CrD"}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.VideoAnalysisAgent = void 0;
|
|
4
|
+
const base_1 = require("../base");
|
|
5
|
+
class VideoAnalysisAgent extends base_1.BaseAgent {
|
|
6
|
+
getTools() {
|
|
7
|
+
return {
|
|
8
|
+
custom: [],
|
|
9
|
+
builtInTextEditor: false,
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
async buildSystemPrompt() {
|
|
13
|
+
return `
|
|
14
|
+
You are a video analysis agent specialized in analyzing screen recordings and user interface interactions.
|
|
15
|
+
|
|
16
|
+
You will receive individual video frames with their Frame IDs as user input for detailed visual analysis.
|
|
17
|
+
|
|
18
|
+
When analyzing the provided frames:
|
|
19
|
+
1. Analyze each frame for UI elements, user actions, and state changes
|
|
20
|
+
2. Provide specific observations about what's happening in each frame
|
|
21
|
+
3. The Summary should be in the given XML format
|
|
22
|
+
|
|
23
|
+
Your analysis should be:
|
|
24
|
+
- Detailed and specific about UI elements and interactions
|
|
25
|
+
- Sequential, following the flow of actions in the video
|
|
26
|
+
|
|
27
|
+
CRITICAL: You MUST use the EXACT frame IDs that are provided with each frame. Each frame will be labeled with text like "Frame ID: frame_000000" - use this exact ID in your <key_frame> tags.
|
|
28
|
+
|
|
29
|
+
Note: The Last frame from the attachments should always be included in the <key_frame> tag
|
|
30
|
+
|
|
31
|
+
# Output format
|
|
32
|
+
<summary>
|
|
33
|
+
<section>
|
|
34
|
+
<key_frame>frame_id</key_frame>
|
|
35
|
+
<description>text description of the frame</description>
|
|
36
|
+
</section>
|
|
37
|
+
<section>
|
|
38
|
+
<key_frame>frame_id</key_frame>
|
|
39
|
+
<description>text description of the frame</description>
|
|
40
|
+
</section>
|
|
41
|
+
.
|
|
42
|
+
.
|
|
43
|
+
.
|
|
44
|
+
<section>
|
|
45
|
+
<key_frame>frame_id</key_frame>
|
|
46
|
+
<description>text description of the frame</description>
|
|
47
|
+
</section>
|
|
48
|
+
</summary>
|
|
49
|
+
|
|
50
|
+
## Example
|
|
51
|
+
If the attachments include "frame_000000.png", "frame_000078.png", and "frame_000156.png", then:
|
|
52
|
+
- To reference the first frame, use: <key_frame>frame_000000</key_frame>
|
|
53
|
+
- To reference the second frame, use: <key_frame>frame_000078</key_frame>
|
|
54
|
+
- To reference the third frame, use: <key_frame>frame_000156</key_frame>
|
|
55
|
+
|
|
56
|
+
WRONG: <key_frame>frame_000001</key_frame> (unless there's actually a file named frame_000001.png in the attachments)
|
|
57
|
+
`;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
exports.VideoAnalysisAgent = VideoAnalysisAgent;
|
|
@@ -46,7 +46,7 @@ export declare class UploadArtifactsQueue {
|
|
|
46
46
|
private artifactResults;
|
|
47
47
|
private uploadPromise;
|
|
48
48
|
constructor(baseRepoPath: string, toolCallId: string);
|
|
49
|
-
addTask(artifacts: ArtifactInput[]): Promise<
|
|
49
|
+
addTask(artifacts: ArtifactInput[]): Promise<Artifact[]>;
|
|
50
50
|
waitForCompletion(): Promise<Artifact[]>;
|
|
51
51
|
}
|
|
52
52
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/artifacts/index.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,QAAQ,EACR,aAAa,EAId,MAAM,4BAA4B,CAAC;AAwBpC,wBAAgB,2BAA2B,uBAM1C;AA6HD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,aAAa,EAAE,EACvB,OAAO,EAAE,MAAM,EACf,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAsErB;AAED,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,eAAe,CAAkB;IACzC,OAAO,CAAC,aAAa,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/artifacts/index.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,QAAQ,EACR,aAAa,EAId,MAAM,4BAA4B,CAAC;AAwBpC,wBAAgB,2BAA2B,uBAM1C;AA6HD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,aAAa,EAAE,EACvB,OAAO,EAAE,MAAM,EACf,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAsErB;AAED,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,eAAe,CAAkB;IACzC,OAAO,CAAC,aAAa,CAAoC;gBAE7C,YAAY,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM;IAK7C,OAAO,CAAC,SAAS,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAiBlD,iBAAiB,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;CAMtD"}
|