@skyramp/mcp 0.2.0-rc.1 → 0.2.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +4 -2
- package/build/prompts/code-reuse.js +106 -7
- package/build/prompts/pom-aware-code-reuse.js +106 -7
- package/build/prompts/startTraceCollectionPrompts.js +37 -15
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +26 -31
- package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +40 -1
- package/build/prompts/test-maintenance/driftAnalysisSections.js +90 -86
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +286 -163
- package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -45
- package/build/prompts/test-recommendation/diffExecutionPlan.js +215 -117
- package/build/prompts/test-recommendation/promptPlan.js +290 -0
- package/build/prompts/test-recommendation/promptPlan.test.js +336 -0
- package/build/prompts/test-recommendation/recommendationSections.js +3 -1
- package/build/prompts/test-recommendation/recommendationShared.js +23 -1
- package/build/prompts/test-recommendation/scopeAssessment.js +65 -14
- package/build/prompts/test-recommendation/scopeAssessment.test.js +93 -2
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +36 -12
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +222 -1
- package/build/prompts/testbot/testbot-prompts.js +18 -62
- package/build/prompts/testbot/testbot-prompts.test.js +65 -31
- package/build/services/ScenarioGenerationService.js +11 -1
- package/build/services/TestExecutionService.js +73 -15
- package/build/services/TestExecutionService.test.js +105 -0
- package/build/services/TestGenerationService.js +11 -1
- package/build/tools/executeSkyrampTestTool.js +1 -10
- package/build/tools/test-management/actionsTool.js +152 -63
- package/build/tools/test-management/analyzeChangesTool.js +171 -63
- package/build/tools/test-management/analyzeChangesTool.test.js +103 -16
- package/build/tools/test-management/analyzeTestHealthTool.js +30 -81
- package/build/tools/test-management/index.js +1 -0
- package/build/tools/test-management/uiAnalyzeChangesTool.js +149 -0
- package/build/tools/test-management/uiAnalyzeChangesTool.test.js +100 -0
- package/build/tools/trace/resolveSaveStoragePath.js +16 -0
- package/build/tools/trace/resolveSaveStoragePath.test.js +17 -0
- package/build/tools/trace/resolveSessionPaths.js +39 -0
- package/build/tools/trace/resolveSessionPaths.test.js +103 -0
- package/build/tools/trace/sessionState.js +14 -0
- package/build/tools/trace/sessionState.test.js +17 -0
- package/build/tools/trace/startTraceCollectionTool.js +84 -14
- package/build/tools/trace/stopTraceCollectionTool.js +9 -2
- package/build/types/TestAnalysis.js +50 -0
- package/build/types/TestRecommendation.js +6 -58
- package/build/types/TestTypes.js +1 -1
- package/build/utils/AnalysisStateManager.js +22 -11
- package/build/utils/branchDiff.js +11 -2
- package/build/utils/docker.test.js +1 -1
- package/build/utils/gitStaging.js +52 -3
- package/build/utils/gitStaging.test.js +19 -1
- package/build/utils/repoScanner.js +18 -10
- package/build/utils/repoScanner.test.js +92 -0
- package/build/utils/routeParsers.js +168 -25
- package/build/utils/routeParsers.test.js +180 -1
- package/build/utils/scenarioDrafting.js +220 -17
- package/build/utils/scenarioDrafting.test.js +182 -9
- package/build/utils/sourceRouteExtractor.js +806 -0
- package/build/utils/sourceRouteExtractor.test.js +565 -0
- package/build/utils/uiPageEnumerator.js +319 -0
- package/build/utils/uiPageEnumerator.test.js +422 -0
- package/build/utils/utils.js +27 -0
- package/build/utils/versions.js +1 -1
- package/build/utils/workspaceAuth.js +33 -4
- package/node_modules/playwright/lib/dom-analyzer/blueprint.js +54 -5
- package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.js +4 -0
- package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.test.js +6 -0
- package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.js +150 -0
- package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.test.js +470 -0
- package/node_modules/playwright/lib/mcp/browser/tab.js +1 -1
- package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.js +21 -4
- package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.test.js +3 -0
- package/node_modules/playwright/package.json +1 -1
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.4.tgz +0 -0
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.5.tgz +0 -0
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.6.tgz +0 -0
- package/package.json +3 -3
- package/build/services/TestHealthService.js +0 -694
- package/build/services/TestHealthService.test.js +0 -241
- package/build/types/TestDriftAnalysis.js +0 -1
- package/build/types/TestHealth.js +0 -4
|
@@ -2,40 +2,48 @@
|
|
|
2
2
|
* Modular section builders for the Drift Analysis prompt,
|
|
3
3
|
* mirroring the recommendationSections.ts pattern.
|
|
4
4
|
*/
|
|
5
|
-
export function buildDriftScoringGuide() {
|
|
6
|
-
return `## Drift Score Guide (0–100)
|
|
7
|
-
|
|
8
|
-
| Score | Label | Meaning |
|
|
9
|
-
|-------|-------|---------|
|
|
10
|
-
| 0–20 | IGNORE | No meaningful drift — test is still valid as-is |
|
|
11
|
-
| 21–40 | VERIFY | Minor changes detected — review but likely fine |
|
|
12
|
-
| 41–70 | UPDATE | Breaking changes detected — test needs edits |
|
|
13
|
-
| 71–100 | REGENERATE | Major structural changes — regenerate from scratch |
|
|
14
|
-
| 80–100 | DELETE | ALL endpoints the test covers were removed — test is obsolete |
|
|
15
|
-
|
|
16
|
-
DELETE and REGENERATE overlap in the 80–100 range. The distinction is cause, not score: DELETE when the endpoints no longer exist, REGENERATE when they still exist but changed drastically.
|
|
17
|
-
|
|
18
|
-
Assign each existing test a score based on how much the codebase has changed relative to what the test expects.`;
|
|
19
|
-
}
|
|
20
5
|
export function buildActionDecisionMatrix() {
|
|
21
|
-
return
|
|
6
|
+
return `<decision_rules>
|
|
7
|
+
## Action Decision Tree
|
|
22
8
|
|
|
23
|
-
For each test,
|
|
9
|
+
For each existing test, work through these checks in order — the first match wins:
|
|
24
10
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
11
|
+
1. **All endpoints the test covers were removed** → **DELETE**
|
|
12
|
+
2. **Some endpoints removed, some renamed** → **UPDATE**
|
|
13
|
+
3. **New response field added to a covered endpoint** → **UPDATE** — the test needs a new assertion even if existing assertions still pass
|
|
14
|
+
4. **Shape change breaks assertions (field-level: ≤2 fields changed, renamed, or type-swapped)** → **UPDATE**
|
|
15
|
+
**Shape change restructures the root response (flat→nested, new wrapper object, root key renamed, ≥50% of test assertions broken)** → **REGENERATE**
|
|
16
|
+
5. **Auth added or auth method changed** → **UPDATE**
|
|
17
|
+
**Auth removed** → **VERIFY**
|
|
18
|
+
6. **No breaking changes detected** → **IGNORE** or **VERIFY** for minor drift
|
|
33
19
|
|
|
34
20
|
Rules:
|
|
35
|
-
-
|
|
21
|
+
- DELETE when all covered endpoints no longer exist; REGENERATE when they still exist but changed drastically.
|
|
22
|
+
- REGENERATE means: the top-level response shape changed (flat→nested, new wrapper object added, root key renamed), OR ≥50% of the test's assertions reference fields that were removed or restructured. In all other cases, prefer UPDATE.
|
|
23
|
+
- Prefer UPDATE over REGENERATE when changes are field-level (≤2 fields added, removed, renamed, or type-swapped).
|
|
36
24
|
- Prefer IGNORE over VERIFY when all changed files are unrelated to the test's endpoint.
|
|
37
|
-
-
|
|
38
|
-
|
|
25
|
+
- ADD actions belong in the next step — complete this assessment with IGNORE / VERIFY / UPDATE / REGENERATE / DELETE only.
|
|
26
|
+
|
|
27
|
+
<examples>
|
|
28
|
+
<example>
|
|
29
|
+
Diff adds one field to a response object and renames a URL path segment:
|
|
30
|
+
\`\`\`
|
|
31
|
+
- @app.route("/users/<id>/orders")
|
|
32
|
+
+ @app.route("/users/<id>/purchases")
|
|
33
|
+
+ "total_items": len(order.items)
|
|
34
|
+
\`\`\`
|
|
35
|
+
→ **UPDATE**: path rename + one new field — both are field-level changes. Patch the URL and add an assertion for \`total_items\`.
|
|
36
|
+
</example>
|
|
37
|
+
<example>
|
|
38
|
+
Diff wraps the entire response in a new envelope object:
|
|
39
|
+
\`\`\`
|
|
40
|
+
- return Response({"id": ..., "status": ..., "items": [...]})
|
|
41
|
+
+ return Response({"data": {"id": ..., "status": ..., "items": [...]}, "meta": {"page": 1}})
|
|
42
|
+
\`\`\`
|
|
43
|
+
→ **REGENERATE**: root shape changed from a flat object to \`{data, meta}\`. Every existing assertion (e.g. \`response["id"]\`, \`response["status"]\`) is broken — rewrite the test from scratch.
|
|
44
|
+
</example>
|
|
45
|
+
</examples>
|
|
46
|
+
</decision_rules>`;
|
|
39
47
|
}
|
|
40
48
|
export function buildBreakingChangePatterns() {
|
|
41
49
|
return `## Breaking Change Patterns to Detect
|
|
@@ -78,45 +86,48 @@ For each existing test file, run these checks:
|
|
|
78
86
|
|
|
79
87
|
### Check A: Endpoint existence
|
|
80
88
|
Does the endpoint the test targets still exist in the codebase?
|
|
81
|
-
- If ALL endpoints the test covers were removed →
|
|
82
|
-
- If SOME methods were removed but others remain →
|
|
83
|
-
- If the endpoint was renamed →
|
|
89
|
+
- If ALL endpoints the test covers were removed → action: DELETE (the entire test file is obsolete)
|
|
90
|
+
- If SOME methods were removed but others remain → action: UPDATE (remove the test functions for deleted methods, keep the rest)
|
|
91
|
+
- If the endpoint was renamed → action: UPDATE (path substitution)
|
|
84
92
|
|
|
85
93
|
### Check B: Request/response shape (breaking changes)
|
|
86
94
|
Has the request body or response structure changed in a way that breaks the test?
|
|
87
95
|
- Compare test's expected fields against current schema/model definitions
|
|
88
|
-
- Type changes (string→int, int→string)
|
|
89
|
-
-
|
|
90
|
-
-
|
|
96
|
+
- Type changes (string→int, int→string) on individual fields → action: UPDATE
|
|
97
|
+
- Type change restructures the root object or makes the entire request body invalid → action: REGENERATE
|
|
98
|
+
- New required fields the test doesn't send → action: UPDATE
|
|
99
|
+
- Response fields the test asserts on have been removed → action: UPDATE
|
|
100
|
+
- ≥50% of the test's assertions reference fields that were removed or restructured → action: REGENERATE
|
|
101
|
+
|
|
102
|
+
**UPDATE vs REGENERATE:** choose UPDATE when changes are field-level (≤2 fields added, removed, renamed, or type-swapped). Choose REGENERATE only when the root response shape changed (flat→nested, new wrapper object, root key renamed) or ≥50% of assertions are broken.
|
|
91
103
|
|
|
92
104
|
### Check B2: Additive response field changes (coverage gaps)
|
|
93
105
|
**Even if existing assertions still pass**, does the diff add a new field to the response of an endpoint this test already covers?
|
|
94
106
|
- Look at the diff for lines like \`+ "newField":\` or \`+ newField =\` inside a view/serializer this test hits
|
|
95
|
-
- If YES →
|
|
107
|
+
- If YES → action: UPDATE
|
|
96
108
|
- This applies even when the test only checks status codes — the test should be extended to cover the new field
|
|
97
|
-
-
|
|
109
|
+
- A new response field on a covered endpoint always triggers UPDATE — even when existing assertions still pass.
|
|
98
110
|
|
|
99
111
|
### Check C: Auth changes
|
|
100
112
|
Has the authentication mechanism for this endpoint changed?
|
|
101
|
-
- Auth added where none existed →
|
|
102
|
-
- Auth method changed (bearer→cookie) →
|
|
103
|
-
- Auth removed →
|
|
104
|
-
|
|
105
|
-
### Check D: Assign
|
|
106
|
-
Based on the above,
|
|
107
|
-
|
|
108
|
-
- If Check B2 flagged an additive field → score must be ≥ 30 and action must be UPDATE, even if Checks B/C found no breaking changes.`;
|
|
113
|
+
- Auth added where none existed → action: UPDATE
|
|
114
|
+
- Auth method changed (bearer→cookie) → action: UPDATE
|
|
115
|
+
- Auth removed → action: VERIFY
|
|
116
|
+
|
|
117
|
+
### Check D: Assign action
|
|
118
|
+
Based on the above, choose the action (IGNORE / VERIFY / UPDATE / REGENERATE / DELETE) and provide a 1-2 sentence rationale.
|
|
119
|
+
- If Check B2 flagged an additive field → action must be UPDATE, even if Checks B/C found no breaking changes.`;
|
|
109
120
|
}
|
|
110
121
|
export function buildAddRecommendationGuidelines() {
|
|
111
122
|
return `## ADD — New Tests for New Endpoints
|
|
112
123
|
|
|
113
|
-
**
|
|
124
|
+
**ADD applies only when:**
|
|
114
125
|
- The diff introduces a brand-new route that has **no existing test coverage at all**, OR
|
|
115
126
|
- The diff introduces a new auth path, error branch, or fundamentally separate scenario that no existing test covers.
|
|
116
127
|
|
|
117
|
-
**
|
|
118
|
-
- The resource already has existing tests and the diff only adds a new HTTP method —
|
|
119
|
-
- The endpoint existed before this diff but lacks tests —
|
|
128
|
+
**Use UPDATE instead of ADD when:**
|
|
129
|
+
- The resource already has existing tests and the diff only adds a new HTTP method — add the new method's test cases to the existing file.
|
|
130
|
+
- The endpoint existed before this diff but lacks tests — log it in \`additionalRecommendations\` and skip it; pre-existing coverage gaps are out of scope for ADD.
|
|
120
131
|
|
|
121
132
|
**Test type priority by HTTP method:**
|
|
122
133
|
| Method | Recommended test types |
|
|
@@ -125,28 +136,28 @@ export function buildAddRecommendationGuidelines() {
|
|
|
125
136
|
| GET | contract, smoke |
|
|
126
137
|
| DELETE | integration, smoke |
|
|
127
138
|
|
|
128
|
-
Use a unique descriptive filename for every new test file.
|
|
139
|
+
Use a unique descriptive filename for every new test file. For a resource with existing tests, update the existing file — always prefer UPDATE over creating a new file.`;
|
|
129
140
|
}
|
|
130
141
|
export function buildUpdateExecutionRules() {
|
|
131
|
-
return
|
|
142
|
+
return `<execution_rules>
|
|
143
|
+
## Update Execution Rules
|
|
132
144
|
|
|
133
145
|
When applying UPDATE actions to existing test files, follow these rules in addition to the drift-detected changes:
|
|
134
146
|
|
|
135
|
-
### Test file ordering
|
|
147
|
+
### Test file ordering
|
|
136
148
|
Place mutation test functions (PATCH, PUT, POST) **before** any DELETE test function targeting the same resource. DELETE removes the resource — any mutation call after it will 404. When inserting a new mutation test, place it above the DELETE function and above the DELETE call in the \`if __name__ == "__main__"\` block (or equivalent runner entrypoint).
|
|
137
149
|
|
|
138
|
-
### Happy path first
|
|
139
|
-
When adding a new HTTP method (PUT, PATCH, POST) to an existing test file, always
|
|
150
|
+
### Happy path first
|
|
151
|
+
When adding a new HTTP method (PUT, PATCH, POST) to an existing test file, always include a 2xx success assertion first. Error-path tests (404, 422) may follow, but the happy path case is required.
|
|
140
152
|
|
|
141
|
-
### All test files for a resource
|
|
142
|
-
When a diff adds a new HTTP method to a resource, UPDATE covers **all** existing test files for that resource — contract, integration, and UI.
|
|
153
|
+
### All test files for a resource
|
|
154
|
+
When a diff adds a new HTTP method to a resource, UPDATE covers **all** existing test files for that resource — contract, integration, and UI. Apply UPDATE to every file the analyze tool reported for that resource path; do not stop after updating the first one.
|
|
143
155
|
|
|
144
|
-
### PATCH/PUT with child collections
|
|
145
|
-
|
|
146
|
-
1.
|
|
156
|
+
### PATCH/PUT with child collections
|
|
157
|
+
Child collection arrays (e.g. \`items\`, \`products\`, \`line_items\`) drive computed totals — a test that omits them cannot catch the most common mutation bugs. When the request/response includes a child collection:
|
|
158
|
+
1. Include the child array with at least one item containing the FK field (e.g. \`product_id\`) and a \`quantity\` field.
|
|
147
159
|
2. Assert each item's FK field and \`quantity\` match the sent values.
|
|
148
160
|
3. Assert the top-level computed total (e.g. \`total_amount\`) equals the expected math from the items.
|
|
149
|
-
A test that only sends/asserts metadata (discount, status, notes) without asserting the items array is INCOMPLETE and will produce false passes even when the items/total logic is broken.
|
|
150
161
|
|
|
151
162
|
### REGENERATE
|
|
152
163
|
Call the appropriate generation tool to replace the existing test from scratch. Use the same filename so it overwrites the old file.
|
|
@@ -154,49 +165,39 @@ Call the appropriate generation tool to replace the existing test from scratch.
|
|
|
154
165
|
### DELETE
|
|
155
166
|
Remove the test file when ALL endpoints it covers were removed from the codebase. If only SOME methods were removed, use UPDATE instead — remove the test functions for deleted methods and keep the rest.
|
|
156
167
|
|
|
157
|
-
### Test data isolation
|
|
168
|
+
### Test data isolation
|
|
158
169
|
Never use hardcoded resource IDs (e.g. \`order_id=1\`) in any test step, including GET or DELETE steps. Always create required resources via prior POST steps and chain IDs dynamically. Use timestamp-based unique names for created resources (e.g. \`"Product-\${int(time.time())}"\`) to prevent collisions across test runs.
|
|
159
170
|
|
|
160
|
-
### Enhance assertions after UPDATE
|
|
171
|
+
### Enhance assertions after UPDATE
|
|
161
172
|
Call \`skyramp_enhance_assertions\` with \`testFile\` set to the absolute path of the test file you just updated, \`enhanceType: "maintenance"\`, and the matching \`testType\` based on the file you are editing:
|
|
162
173
|
- **Integration test file** (multi-step chained requests): call with \`testType: "integration"\`
|
|
163
174
|
- **Contract-provider test file** (single endpoint with \`beforeAll\`/\`afterAll\` setup, provider mode): call with \`testType: "contract"\`. Skip for consumer-mode contract tests.
|
|
164
175
|
- **UI test file** (imports \`@playwright/test\`, uses \`page.\` calls): call with \`testType: "ui"\`
|
|
165
176
|
|
|
166
|
-
Then apply every instruction returned by the tool to the test file
|
|
177
|
+
Then apply every instruction returned by the tool to the test file.
|
|
178
|
+
</execution_rules>`;
|
|
167
179
|
}
|
|
168
180
|
export function buildDriftOutputChecklist(existingTestCount, newEndpointCount, inlineMode = false, stateFile) {
|
|
169
181
|
const finalStep = inlineMode
|
|
170
182
|
? `### Final step
|
|
171
|
-
Apply all maintenance actions (UPDATE / REGENERATE / DELETE) directly by editing the test files.
|
|
183
|
+
Apply all maintenance actions (UPDATE / REGENERATE / DELETE) directly by editing the test files. Apply IGNORE, VERIFY, UPDATE, REGENERATE, or DELETE only — ADD is handled in the next task.`
|
|
172
184
|
: `### Final step
|
|
173
|
-
After completing all assessments above, call \`skyramp_actions\` with \`stateFile: "${stateFile}"\` to
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
For each existing test reported by \`skyramp_analyze_changes\`:
|
|
181
|
-
- **IGNORE/VERIFY tests**: list on a single line: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY (score {N})\`. Do NOT write detailed rationale.
|
|
182
|
-
- **UPDATE/REGENERATE/DELETE tests**: output the full block:
|
|
183
|
-
\`\`\`
|
|
184
|
-
Test: {testFile}
|
|
185
|
-
Drift Score: {0-100}
|
|
186
|
-
Action: {UPDATE | REGENERATE | DELETE}
|
|
187
|
-
Rationale: {1-2 sentence explanation}
|
|
188
|
-
\`\`\`
|
|
189
|
-
Focus your analysis on tests that need action — do not spend time analyzing unchanged tests.`
|
|
190
|
-
: `### Existing tests (${existingTestCount} total)
|
|
185
|
+
After completing all assessments above, call \`skyramp_actions\` with \`stateFile: "${stateFile ?? "<stateFile>"}"\` and a \`recommendations\` entry for every test assessed. For each entry include: \`testFile\` (absolute path as reported by the analysis tools), \`action\`, \`rationale\`, \`updateInstructions\` (free-form summary of what this test must change — new fields to assert, constraint details, auth changes, new request params, or any other drift specifics; \`skyramp_actions\` passes this directly to the downstream LLM editing the file), and \`renamedEndpoints\` (for path-rename updates).
|
|
186
|
+
|
|
187
|
+
Call \`skyramp_actions\` as the sole final action — skip all other file writes.`;
|
|
188
|
+
const existingTestHeader = inlineMode
|
|
189
|
+
? "### Existing tests (reported by skyramp_analyze_changes)"
|
|
190
|
+
: `### Existing tests (${existingTestCount} total)`;
|
|
191
|
+
const existingTestSection = `${existingTestHeader}
|
|
191
192
|
For each existing test:
|
|
192
|
-
- **IGNORE/VERIFY tests**:
|
|
193
|
+
- **IGNORE/VERIFY tests**: one line each: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY\`. Rationale omitted for brevity.
|
|
193
194
|
- **UPDATE/REGENERATE/DELETE tests**: output the full block:
|
|
194
195
|
\`\`\`
|
|
195
196
|
Test: {testFile}
|
|
196
|
-
Drift Score: {0-100}
|
|
197
197
|
Action: {UPDATE | REGENERATE | DELETE}
|
|
198
198
|
Rationale: {1-2 sentence explanation}
|
|
199
|
-
|
|
199
|
+
\`\`\`
|
|
200
|
+
Focus your analysis on tests that need action — keep reasoning for unchanged tests to a single line.`;
|
|
200
201
|
const newEndpointSection = inlineMode
|
|
201
202
|
? ""
|
|
202
203
|
: newEndpointCount > 0
|
|
@@ -211,9 +212,12 @@ Rationale: {1 sentence}
|
|
|
211
212
|
: `### New endpoints
|
|
212
213
|
No new endpoints detected in this diff.`;
|
|
213
214
|
const sections = [existingTestSection, newEndpointSection, finalStep].filter(s => s.length > 0);
|
|
214
|
-
return
|
|
215
|
+
return `<output_format>
|
|
216
|
+
## Output Checklist
|
|
215
217
|
|
|
216
218
|
Complete ALL of the following:
|
|
217
219
|
|
|
218
|
-
${sections.join("\n\n")}
|
|
220
|
+
${sections.join("\n\n")}
|
|
221
|
+
Be brief. Decide the action for each test and apply edits immediately. Limit reasoning for IGNORE'd tests to a single line.
|
|
222
|
+
</output_format>`;
|
|
219
223
|
}
|