@skyramp/mcp 0.0.60-rc.1 → 0.0.60-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/prompts/test-recommendation/recommendationSections.js +119 -102
- package/build/prompts/testbot/testbot-prompts.js +96 -132
- package/build/services/ScenarioGenerationService.js +1 -3
- package/build/tools/generate-tests/generateIntegrationRestTool.js +8 -5
- package/build/tools/generate-tests/generateScenarioRestTool.js +1 -4
- package/build/tools/submitReportTool.js +4 -0
- package/package.json +1 -1
|
@@ -62,62 +62,60 @@ export function buildTestPatternGuidelines() {
|
|
|
62
62
|
effects occur (e.g., POST /orders triggers email notification)`;
|
|
63
63
|
}
|
|
64
64
|
export function buildTestQualityCriteria() {
|
|
65
|
-
return `##
|
|
66
|
-
|
|
67
|
-
**
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
search/filter → add to cart → checkout. NOT just "navigate to /products and check 200".
|
|
81
|
-
2. **Frontend-to-backend validation** — Verify that frontend actions trigger the correct API
|
|
82
|
-
calls and that the UI reflects the backend state correctly.
|
|
83
|
-
3. **Domain-specific scenarios** — If the PR adds search, test: enter search term → verify
|
|
84
|
-
results appear → click result → verify detail page loads with correct data.
|
|
85
|
-
|
|
86
|
-
**What makes a MEANINGFUL UI test:**
|
|
87
|
-
1. **Component behavior** — Test that UI components render correctly and respond to user
|
|
88
|
-
interactions (clicks, typing, form submissions).
|
|
89
|
-
2. **Visual state changes** — Test loading states, error states, empty states.
|
|
90
|
-
3. **User interaction flows** — Test complete interaction sequences: fill form → validate
|
|
91
|
-
inputs → submit → see confirmation. NOT just "page renders".
|
|
92
|
-
4. **Accessibility** — Verify keyboard navigation, ARIA labels, and focus management.`;
|
|
65
|
+
return `## What Makes a Good Test
|
|
66
|
+
|
|
67
|
+
**Integration tests** should demonstrate cross-resource data flow — step A creates data
|
|
68
|
+
that step B depends on (e.g., create product \u2192 create order referencing that product's ID \u2192
|
|
69
|
+
verify order contains correct product). Single-resource CRUD alone is not an integration test.
|
|
70
|
+
Use realistic request bodies from source code schemas and verify response data, not just
|
|
71
|
+
status codes.
|
|
72
|
+
|
|
73
|
+
**E2E tests** should follow realistic user journeys end-to-end: browse products \u2192 search \u2192
|
|
74
|
+
add to cart \u2192 checkout. Verify that frontend actions trigger the correct API calls and
|
|
75
|
+
that the UI reflects backend state.
|
|
76
|
+
|
|
77
|
+
**UI tests** should exercise component behavior and interaction flows: fill form \u2192 validate
|
|
78
|
+
inputs \u2192 submit \u2192 see confirmation. Include visual state changes (loading, error, empty)
|
|
79
|
+
and accessibility checks.`;
|
|
93
80
|
}
|
|
94
81
|
export function buildGenerationRules(isUIOnlyPR) {
|
|
95
|
-
return
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
**
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
**
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
82
|
+
return `## Generation Guidelines
|
|
83
|
+
|
|
84
|
+
**Scenario fidelity:** Every workflow scenario should reflect the actual resource
|
|
85
|
+
relationships in the code. If the pre-drafted scenarios don't match the real data model,
|
|
86
|
+
replace them with accurate ones.
|
|
87
|
+
|
|
88
|
+
**Priority ordering by PR type:**
|
|
89
|
+
${isUIOnlyPR ? `This is a **UI-only PR**. The most valuable tests are UI and E2E tests.
|
|
90
|
+
|
|
91
|
+
If Playwright traces exist for the changed pages, prioritize UI/E2E tests in the top 4.
|
|
92
|
+
If no traces exist, UI/E2E tests are still the highest-value recommendations — rank them
|
|
93
|
+
in the top 7 with scenario steps and trace recording instructions. The testbot will not
|
|
94
|
+
generate tests without traces, so all 7 become additionalRecommendations.
|
|
95
|
+
|
|
96
|
+
1. **UI tests** — per changed component/page
|
|
97
|
+
2. **E2E tests** — per user flow spanning frontend to backend
|
|
98
|
+
3. **Integration tests** — only when the changed UI calls backend APIs
|
|
99
|
+
` : `1. **Multi-resource integration tests** — one per cross-resource workflow (2-3 max).
|
|
100
|
+
2. **Fuzz tests** — per POST/PUT endpoint with complex request bodies. Tests boundary values,
|
|
101
|
+
type coercion, missing/extra fields, and edge cases the schema allows.
|
|
102
|
+
3. **Contract tests** — per endpoint with new/changed response schemas. Validates the response
|
|
103
|
+
structure matches expectations (field types, required fields, nested objects).
|
|
104
|
+
4. **E2E tests** — per distinct user flow if the API serves a frontend or client
|
|
105
|
+
5. **CRUD lifecycle integration tests** — only for resources with new/changed endpoints
|
|
106
|
+
where multi-resource tests don't already cover them.
|
|
107
|
+
`}When no Playwright trace exists, still recommend the test with instructions for recording
|
|
108
|
+
a trace using \`skyramp_start_trace_collection\` with \`playwright: true\`.
|
|
109
|
+
|
|
110
|
+
**Mixed PRs with frontend changes:** Include at least 1 E2E or UI test in the top 7,
|
|
111
|
+
ranked by value regardless of trace availability. If traces exist, place it in the top 4.
|
|
112
|
+
If no traces, it can still rank highly — the testbot will handle trace-dependent generation.
|
|
113
|
+
|
|
114
|
+
**Before finalizing:** Check that the top 4 aren't filled with CRUD tests for unchanged
|
|
115
|
+
resources when PR-relevant tests exist lower in the ranking. Swap if needed.
|
|
116
|
+
|
|
117
|
+
**No duplicate coverage.** If an existing test already covers an endpoint + test type,
|
|
118
|
+
recommend a multi-resource workflow that includes that endpoint alongside others instead.`;
|
|
121
119
|
}
|
|
122
120
|
export function buildToolWorkflows(authHeaderValue) {
|
|
123
121
|
return `## How to Generate Tests — Tool Workflows
|
|
@@ -126,20 +124,24 @@ export function buildToolWorkflows(authHeaderValue) {
|
|
|
126
124
|
|
|
127
125
|
**For multi-endpoint workflows (integration tests) — Scenario → Integration pipeline:**
|
|
128
126
|
1. Call \`skyramp_scenario_test_generation\` once per step: \`scenarioName\`, \`destination\`,
|
|
129
|
-
\`baseURL\`, \`method\`, \`path\`, \`requestBody\`, \`
|
|
127
|
+
\`baseURL\`, \`method\`, \`path\`, \`requestBody\`, \`authHeader: "${authHeaderValue}"\`.
|
|
128
|
+
\`statusCode\` is optional — defaults: POST→201, DELETE→204, GET/PUT/PATCH→200. Only override for non-standard codes.
|
|
130
129
|
**OpenAPI spec is NOT required.** \`apiSchema\` is OPTIONAL — omit it if no spec exists.
|
|
131
|
-
\`requestBody\`
|
|
132
|
-
|
|
130
|
+
\`requestBody\` should use realistic field values from source code schemas (Zod, Pydantic, DTOs).
|
|
131
|
+
Inspect the source code to determine the correct request body shape — avoid sending \`{}\`.
|
|
133
132
|
Use unique names with timestamp suffix to avoid conflicts on re-runs.
|
|
134
133
|
For GET/PUT/DELETE with path IDs, use a placeholder — chaining resolves the real ID.
|
|
135
|
-
2. Produces a \`scenario_<name>.json\`
|
|
134
|
+
2. Produces a \`scenario_<name>.json\` in the same \`outputDir\` as the test files (not \`.skyramp/\`).
|
|
136
135
|
3. Call \`skyramp_integration_test_generation\` with \`scenarioFile\` AND \`authHeader: "${authHeaderValue}"\`.
|
|
137
|
-
Do NOT pass \`chainingKey\` —
|
|
136
|
+
Do NOT pass \`chainingKey\` — defaults to \`response.id\`. After generation, the testbot
|
|
137
|
+
will verify and fix path param chaining in the generated test.
|
|
138
138
|
|
|
139
139
|
**For single-endpoint tests (contract/fuzz):**
|
|
140
140
|
\`skyramp_{type}_test_generation\` with \`endpointURL\` (full URL incl. base + path), \`method\`,
|
|
141
141
|
\`authHeader: "${authHeaderValue}"\`, and \`requestData\` from source code schemas.
|
|
142
|
-
|
|
142
|
+
If an OpenAPI spec exists, ALSO pass \`apiSchema\` — it enables schema-aware validation
|
|
143
|
+
(contract tests verify response structure, fuzz tests generate smarter boundary values).
|
|
144
|
+
Without a spec, \`endpointURL\` alone is sufficient.
|
|
143
145
|
|
|
144
146
|
**For UI tests (no Playwright recording):**
|
|
145
147
|
1. \`skyramp_start_trace_collection\` (playwright: true)
|
|
@@ -152,58 +154,73 @@ Same trace flow, pass both trace file and playwright zip to \`skyramp_e2e_test_g
|
|
|
152
154
|
}
|
|
153
155
|
export function buildCoverageChecklist(openApiSpec, isUIOnlyPR, hasFrontendChanges, authHeaderValue, topN) {
|
|
154
156
|
const specNote = openApiSpec
|
|
155
|
-
? `\n**OpenAPI Spec**: \`${openApiSpec.path}\`
|
|
157
|
+
? `\n**OpenAPI Spec available**: \`${openApiSpec.path}\`
|
|
158
|
+
Use it actively:
|
|
159
|
+
- **Contract tests**: pass \`apiSchema: "${openApiSpec.path}"\` — the CLI validates response schemas against the spec.
|
|
160
|
+
- **Fuzz tests**: pass \`apiSchema: "${openApiSpec.path}"\` — the CLI generates boundary values from schema constraints.
|
|
161
|
+
- **Integration tests**: pass \`apiSchema\` to \`skyramp_scenario_test_generation\` — it extracts destination and request/response shapes.
|
|
162
|
+
- **Single-endpoint tests**: pass both \`endpointURL\` AND \`apiSchema\` for schema-aware generation.
|
|
163
|
+
\n`
|
|
156
164
|
: "";
|
|
157
165
|
const distribution = isUIOnlyPR
|
|
158
|
-
? `-
|
|
166
|
+
? `- Prioritize UI tests (≥3), then E2E tests (≥2), then integration only if UI calls APIs. 0% smoke.`
|
|
159
167
|
: hasFrontendChanges
|
|
160
|
-
? `-
|
|
161
|
-
: `-
|
|
162
|
-
const skipUI = isUIOnlyPR ? " (Skip for UI-only PRs)" : "";
|
|
168
|
+
? `- Mix: integration (2-3), E2E (1-2), UI (1-2), fuzz or contract (1). 0% smoke.`
|
|
169
|
+
: `- Mix: integration (2-3, multi-resource first), fuzz (1-2), contract (1-2), E2E (1 if user-facing flows exist). 0% smoke.`;
|
|
163
170
|
return `## Coverage Checklist
|
|
164
171
|
${specNote}
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
172
|
+
${isUIOnlyPR ? `**UI-only PR** — This PR has no backend changes. Focus on UI and E2E tests.
|
|
173
|
+
|
|
174
|
+
With Playwright traces: prioritize UI tests (one per changed component) and E2E tests
|
|
175
|
+
(one per page-level user flow). Integration tests are relevant only if the UI calls APIs.
|
|
176
|
+
|
|
177
|
+
Without traces: recommend UI/E2E tests with scenario steps and trace recording instructions
|
|
178
|
+
(\`skyramp_start_trace_collection\` with \`playwright: true\`). The testbot will skip generation
|
|
179
|
+
entirely for frontend-only PRs without traces — all recommendations become additional
|
|
180
|
+
recommendations in the report. Skip fuzz, contract, and smoke tests.
|
|
181
|
+
` : `For each endpoint, recommend the most valuable test types — aim for variety:
|
|
182
|
+
1. **Integration** — multi-resource workflows (not just single-resource CRUD)
|
|
183
|
+
2. **Fuzz** — POST/PUT endpoints with request bodies (validates edge cases, type safety)
|
|
184
|
+
3. **Contract** — endpoints with new/changed response schemas (validates structure)
|
|
185
|
+
4. **E2E** — user flows spanning frontend to backend${hasFrontendChanges ? " (include at least 1 for this PR)" : ""}
|
|
186
|
+
5. **UI** — changed frontend components${hasFrontendChanges ? " (include at least 1)" : ""}
|
|
187
|
+
6. No smoke tests.
|
|
188
|
+
Do NOT recommend 7 integration tests — diversify across test types.
|
|
189
|
+
`}
|
|
190
|
+
|
|
191
|
+
## For Each Recommendation Include:
|
|
174
192
|
1. Test type 2. Priority (high/medium/low) 3. Target endpoint/scenario
|
|
175
|
-
4.
|
|
176
|
-
5.
|
|
193
|
+
4. What it validates (business logic, not just "tests the endpoint")
|
|
194
|
+
5. Skyramp tool call details — exact tool + key params for zero-editing execution
|
|
177
195
|
6. For integration/E2E: reference draftedScenario by scenarioName
|
|
178
196
|
|
|
179
|
-
## When Artifacts Are Missing
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
- **No Playwright recording**
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
197
|
+
## When Artifacts Are Missing
|
|
198
|
+
Recommend the test anyway — never mark it "blocked":
|
|
199
|
+
- **No OpenAPI spec** \u2192 use \`endpointURL\` and \`requestBody\` from source code
|
|
200
|
+
- **No Playwright recording** \u2192 provide trace recording instructions
|
|
201
|
+
- **No backend trace** \u2192 use the scenario generation pipeline
|
|
202
|
+
|
|
203
|
+
## Select the Top ${topN}
|
|
204
|
+
Consider all possible tests (endpoints \u00d7 interaction types + scenarios), then select the
|
|
205
|
+
top ${topN} most valuable. Include \`totalConsidered\` count in your output. The top 4 will
|
|
206
|
+
be generated; recommendations #5-${topN} will appear in the report but won't be generated,
|
|
207
|
+
so ensure the top 4 are the highest-impact tests.
|
|
208
|
+
|
|
209
|
+
**Before outputting, verify:**
|
|
210
|
+
${isUIOnlyPR ? `- If traces exist, at least 2 of the top 4 should be UI/E2E tests.
|
|
211
|
+
- Without traces, all 7 become additionalRecommendations (no generation). Rank UI/E2E highest.
|
|
212
|
+
- Avoid CRUD tests for unchanged resources the UI doesn't call.` : `- If the PR includes frontend changes, include at least 1 E2E/UI test in the top 4.
|
|
213
|
+
- CRUD tests for unchanged resources should not displace PR-relevant tests in the top 4.`}
|
|
214
|
+
- Each integration scenario's step sequence should be logically valid — preconditions
|
|
215
|
+
met by prior steps.
|
|
216
|
+
|
|
217
|
+
Preferred ordering: ${isUIOnlyPR ? "UI \u2192 E2E \u2192 integration (if UI calls APIs)." : "integration \u2192 fuzz \u2192 contract \u2192 E2E \u2192 UI."}
|
|
196
218
|
${distribution}
|
|
197
219
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
7. Every recommendation = enough detail for direct tool invocation.
|
|
203
|
-
|
|
204
|
-
**FINAL CHECK:** Count: workflow scenarios → integration tests, resources → CRUD tests,
|
|
205
|
-
user flows → E2E, components → UI, POST/PUT → fuzz${skipUI}, schemas → contract${skipUI}.
|
|
206
|
-
Total must be ≥ ${topN}.
|
|
220
|
+
Each recommendation should include enough detail for direct tool invocation.
|
|
221
|
+
Reference draftedScenarios by name and interactions by description.
|
|
222
|
+
Use "high"/"medium"/"low" for priority — no numeric scores.
|
|
223
|
+
Total candidates should be \u2265 ${topN}.
|
|
207
224
|
|
|
208
225
|
Generate recommendations now.`;
|
|
209
226
|
}
|
|
@@ -9,138 +9,102 @@ function getTestbotPrompt(prTitle, prDescription, diffFile, testDirectory, summa
|
|
|
9
9
|
<TEST DIRECTORY>${testDirectory}</TEST DIRECTORY>
|
|
10
10
|
<REPOSITORY PATH>${repositoryPath}</REPOSITORY PATH>
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
- \`
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
- **
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
## Task 3: Submit Report (MANDATORY)
|
|
109
|
-
|
|
110
|
-
**CHECKPOINT: Before submitting, verify you completed BOTH tasks:**
|
|
111
|
-
- Task 1: Did you call \`skyramp_recommend_tests\` and generate tests? (or skip if no application code changed?)
|
|
112
|
-
- Task 2: Did you call \`skyramp_discover_tests\`? If it found tests, did you run drift analysis and report before/after results in \`testMaintenance\`?
|
|
113
|
-
**If you skipped Task 2, GO BACK and complete it now before submitting.**
|
|
114
|
-
|
|
115
|
-
After completing Tasks 1 and 2, you MUST call the Skyramp MCP tool "skyramp_submit_report" to submit your report.
|
|
116
|
-
Pass '${summaryOutputFile}' as the summaryOutputFile parameter.
|
|
117
|
-
|
|
118
|
-
For the commitMessage parameter, write a succinct summary (under 72 chars) of what you did, without any prefix. Examples:
|
|
119
|
-
- "add contract tests for /products endpoint"
|
|
120
|
-
- "add multiple integration tests including cross-resource workflow"
|
|
121
|
-
- "add integration and e2e tests for new /reviews endpoint"
|
|
122
|
-
|
|
123
|
-
Do NOT write the report to a file yourself. Do NOT skip this step. The skyramp_submit_report tool is the ONLY way to submit the report.
|
|
124
|
-
|
|
125
|
-
**additionalRecommendations:** For the remaining recommendations (ranked #5-#7 from step 2) that you did NOT generate, include them in the \`additionalRecommendations\` array. For each, provide:
|
|
126
|
-
- \`testType\`, \`scenarioName\`, \`priority\` (high/medium/low), \`description\` (why it is valuable)
|
|
127
|
-
- \`steps\`: ordered sequence — each step has \`description\`, and for API steps: \`method\`, \`path\`, \`expectedStatusCode\`, \`requestBody\` (example values), \`responseBody\` (key fields to verify)
|
|
128
|
-
- \`openApiSpec\`: path to spec file if one exists in the repo
|
|
129
|
-
- \`backendTrace\`: path to backend trace file if found (used by E2E and integration tests)
|
|
130
|
-
- \`frontendTrace\`: path to Playwright/UI trace file if found (used by UI tests; E2E tests need BOTH backend + frontend traces) of what the test would cover and why it is valuable.
|
|
131
|
-
|
|
132
|
-
## Report Guidelines
|
|
133
|
-
|
|
134
|
-
**businessCaseAnalysis:** Base this ONLY on facts from the PR title, description, and what the tools reported. If \`skyramp_analyze_repository\` reported 0 new endpoints, do NOT claim new endpoints were added — instead describe the change accurately (e.g. "frontend changes to consume existing API endpoints", "refactored service layer", "updated test configuration"). Never infer new backend endpoints from frontend fetch/API calls in the diff.
|
|
135
|
-
|
|
136
|
-
When reporting test results, if you chose to skip executing a test, you MUST explain WHY you skipped it.
|
|
137
|
-
NEVER use the phrase "CI timeout" or imply a timeout occurred unless a tool call actually timed out.
|
|
138
|
-
Instead, set the status to "Skipped" and provide an honest reason in the details, for example:
|
|
139
|
-
- "Skipped: no code changes affect this endpoint"
|
|
140
|
-
- "Skipped: skyramp_discover_tests found no existing Skyramp tests"
|
|
141
|
-
- "Skipped: only CI/config changes in this PR, no API changes"
|
|
142
|
-
|
|
143
|
-
Reminder: Use the Skyramp MCP tools available to you for test analysis, generation, and execution.`;
|
|
12
|
+
Use the Skyramp MCP server tools for all tasks below.
|
|
13
|
+
|
|
14
|
+
## Task 1: Recommend & Generate New Tests
|
|
15
|
+
|
|
16
|
+
Read the diff at \`${diffFile}\`. Skip Task 1 if all changed files are non-application
|
|
17
|
+
(CI/CD, docs, lock files, config). Otherwise proceed:
|
|
18
|
+
|
|
19
|
+
### Steps
|
|
20
|
+
|
|
21
|
+
1. Call \`skyramp_analyze_repository\` with \`repositoryPath\`: "${repositoryPath}", \`analysisScope\`: "current_branch_diff"${baseBranch ? `\n , \`baseBranch\`: "${baseBranch}"` : ''}
|
|
22
|
+
2. Call \`skyramp_recommend_tests\` with the returned \`sessionId\`.
|
|
23
|
+
It returns 7 ranked recommendations. Generate the top 4, report the remaining 3
|
|
24
|
+
as \`additionalRecommendations\`.
|
|
25
|
+
|
|
26
|
+
3. **Generate** at most 4 tests from the top 4 recommendations. Stop after 4.
|
|
27
|
+
Keep a list of every file the CLI creates (test files AND scenario JSON files).
|
|
28
|
+
|
|
29
|
+
**Frontend-only PRs** (no backend/API changes): only generate tests if relevant
|
|
30
|
+
Playwright traces exist. If no traces are available, skip generation entirely and
|
|
31
|
+
move all 7 recommendations to \`additionalRecommendations\` with scenario steps and
|
|
32
|
+
trace recording instructions. Do not generate integration tests for unchanged backend
|
|
33
|
+
APIs just to fill the quota — those tests don't validate the PR's changes.
|
|
34
|
+
|
|
35
|
+
**How to generate each type:**
|
|
36
|
+
- **Integration**: call \`skyramp_scenario_test_generation\` per step, then
|
|
37
|
+
\`skyramp_integration_test_generation\` with the scenario file.
|
|
38
|
+
The scenario JSON is written to the same \`outputDir\` as the test files
|
|
39
|
+
(e.g. \`tests/scenario_<name>.json\`), not \`.skyramp/\`.
|
|
40
|
+
- **Contract**: call \`skyramp_contract_test_generation\` with \`endpointURL\`, \`method\`,
|
|
41
|
+
and \`requestData\` for POST/PUT endpoints.
|
|
42
|
+
Pass \`apiSchema\` if an OpenAPI spec exists — it validates response structure.
|
|
43
|
+
- **Fuzz**: call \`skyramp_fuzz_test_generation\` with \`endpointURL\`, \`method\`, \`requestData\`.
|
|
44
|
+
Pass \`apiSchema\` if available — it generates smarter boundary values.
|
|
45
|
+
- **E2E/UI**: only generate when relevant Playwright traces exist (see step 5).
|
|
46
|
+
Without traces, move the test to \`additionalRecommendations\` with scenario steps
|
|
47
|
+
and trace recording instructions instead.
|
|
48
|
+
- Skip smoke tests entirely.
|
|
49
|
+
|
|
50
|
+
**Scenario quality:** Before generating, verify each step's preconditions are met by
|
|
51
|
+
prior steps. For example, you can't update a membership that was never created — check
|
|
52
|
+
the controller code for existence checks and ensure the scenario creates records first.
|
|
53
|
+
|
|
54
|
+
**Filenames:** Pass a descriptive \`--output\` name per test to avoid CLI overwrites.
|
|
55
|
+
|
|
56
|
+
4. **Execute** the generated tests and record results.
|
|
57
|
+
|
|
58
|
+
5. **Trace search** for E2E/UI: look in \`\${testDirectory}\`, repo root, and \`.skyramp/\` for
|
|
59
|
+
trace files (\`*trace*.json\`, \`*playwright*.zip\`). Only use a trace if it covers code
|
|
60
|
+
changed in this PR and targets localhost — skip traces for external hosts or unrelated code.
|
|
61
|
+
|
|
62
|
+
With relevant traces: backend + Playwright → \`skyramp_e2e_test_generation\`,
|
|
63
|
+
Playwright only → \`skyramp_ui_test_generation\`.
|
|
64
|
+
|
|
65
|
+
**After generation, fix chaining only.** The CLI may use literal/hardcoded IDs instead
|
|
66
|
+
of dynamic values from prior responses. Fix these two cases:
|
|
67
|
+
1. **Path params:** variables like \`product_id = 'product_id'\` → use the response accessor
|
|
68
|
+
(e.g. \`getResponseValue(response, "response.id")\` in TS, \`skyramp.get_response_value(response, "id")\` in Python).
|
|
69
|
+
2. **Request body refs:** hardcoded IDs in request bodies (e.g. \`"product_id": 1\`) → replace
|
|
70
|
+
with the dynamic ID extracted from the prior POST response (e.g. \`product_id\` variable or
|
|
71
|
+
\`dataOverride\`/\`data_override\` for the field).
|
|
72
|
+
|
|
73
|
+
Change ONLY chaining-related values (path param assignments and body ID references).
|
|
74
|
+
Preserve everything else exactly as the CLI generated it — headers, auth code, assertions,
|
|
75
|
+
imports, and all other request body fields.
|
|
76
|
+
|
|
77
|
+
## Task 2: Existing Test Maintenance
|
|
78
|
+
|
|
79
|
+
Run this task regardless of Task 1 outcome — even if Task 1 was skipped or generated zero tests.
|
|
80
|
+
|
|
81
|
+
1. Call \`skyramp_discover_tests\` with \`repositoryPath\`: "${repositoryPath}".
|
|
82
|
+
2. If zero Skyramp tests found, report \`testMaintenance\` as an empty array with
|
|
83
|
+
a note in \`issuesFound\`: "No existing Skyramp tests found for maintenance."
|
|
84
|
+
3. If tests exist:
|
|
85
|
+
a. Baseline them (from CI status or by executing).
|
|
86
|
+
b. Run \`skyramp_analyze_test_drift\` → \`skyramp_calculate_health_scores\` → \`skyramp_actions\`.
|
|
87
|
+
c. Apply actions (path renames, schema updates) in-place. Do not regenerate.
|
|
88
|
+
d. Execute modified tests. Report before/after in \`testMaintenance\`.
|
|
89
|
+
|
|
90
|
+
## Task 3: Submit Report
|
|
91
|
+
|
|
92
|
+
Verify Tasks 1 and 2 are complete, then call \`skyramp_submit_report\` with
|
|
93
|
+
\`summaryOutputFile\`: "${summaryOutputFile}".
|
|
94
|
+
|
|
95
|
+
\`commitMessage\`: under 72 chars, e.g. "add integration tests for /products and /orders"
|
|
96
|
+
|
|
97
|
+
**newTestsCreated** — list every generated test file (at most 4):
|
|
98
|
+
\`testType\`, \`endpoint\`, \`fileName\`, \`description\`, \`scenarioFile\`, \`traceFile\`, \`frontendTrace\`
|
|
99
|
+
Use the actual file path returned by the generation tool for \`scenarioFile\`.
|
|
100
|
+
Include scenario JSON files in the git commit alongside test files.
|
|
101
|
+
Every test file in the commit should appear here. If you over-generated, delete extras first.
|
|
102
|
+
If no tests were generated (e.g. frontend-only PR without traces), pass an empty array.
|
|
103
|
+
|
|
104
|
+
**additionalRecommendations** — remaining recommendations not generated:
|
|
105
|
+
\`testType\`, \`scenarioName\`, \`priority\`, \`description\`, \`steps\`, artifact paths
|
|
106
|
+
|
|
107
|
+
**businessCaseAnalysis** — based only on PR data and tool outputs.`;
|
|
144
108
|
}
|
|
145
109
|
export function registerTestbotPrompt(server) {
|
|
146
110
|
logger.info("Registering testbot prompt");
|
|
@@ -21,8 +21,6 @@ export class ScenarioGenerationService {
|
|
|
21
21
|
};
|
|
22
22
|
}
|
|
23
23
|
// Handle file writing
|
|
24
|
-
//add hyphen to the scenario name
|
|
25
|
-
//make file in tmp directory
|
|
26
24
|
const scenarioName = params.scenarioName.replace(/ /g, "-").toLowerCase();
|
|
27
25
|
const fileName = `scenario_${scenarioName}.json`;
|
|
28
26
|
const filePath = path.join(params.outputDir, fileName);
|
|
@@ -130,7 +128,7 @@ ${JSON.stringify(traceRequest, null, 2)}
|
|
|
130
128
|
const timestamp = new Date().toISOString();
|
|
131
129
|
const method = params.method;
|
|
132
130
|
const path = params.path;
|
|
133
|
-
const statusCode = params.statusCode
|
|
131
|
+
const statusCode = params.statusCode;
|
|
134
132
|
const requestBody = params.requestBody ||
|
|
135
133
|
(method === "GET" || method === "DELETE" ? "" : "{}");
|
|
136
134
|
let responseBody = params.responseBody;
|
|
@@ -17,7 +17,6 @@ const integrationTestSchema = z
|
|
|
17
17
|
.describe("Path to the scenario file to be used for test generation. This file is generated by the skyramp_scenario_test_generation tool.")
|
|
18
18
|
.optional(),
|
|
19
19
|
...codeRefactoringSchema.shape,
|
|
20
|
-
...baseTestSchema,
|
|
21
20
|
endpointURL: baseTestSchema.endpointURL.default(""),
|
|
22
21
|
})
|
|
23
22
|
.omit({ method: true }).shape;
|
|
@@ -56,12 +55,16 @@ multi-resource workflows often need **different chaining per step**. You MUST:
|
|
|
56
55
|
(e.g., \`/products/{product_id}\` uses the product's ID, not the order's ID)
|
|
57
56
|
- POST calls that create a child resource must include the **parent's ID** in the request body
|
|
58
57
|
or path (e.g., \`POST /orders\` body should include \`product_id\` from the products POST response)
|
|
59
|
-
4. **Fix
|
|
60
|
-
- Replace hardcoded IDs (like \`/products/1\`) with the dynamic variable from the POST response
|
|
58
|
+
4. **Fix ONLY chaining** (path params AND request body ID references) — nothing else:
|
|
59
|
+
- Replace hardcoded path IDs (like \`/products/1\`) with the dynamic variable from the POST response
|
|
60
|
+
- Replace hardcoded IDs in request bodies (like \`"product_id": 1\`) with the dynamic variable
|
|
61
|
+
(use \`data_override\`/\`dataOverride\` or direct variable substitution)
|
|
61
62
|
- Rename duplicate variable names so each resource has its own ID variable
|
|
62
|
-
- Ensure request bodies reference the correct chained IDs
|
|
63
63
|
- For Python: use \`skyramp.get_response_value(response_N, "id")\` to extract and f-strings for paths
|
|
64
|
-
- For TypeScript: use \`
|
|
64
|
+
- For TypeScript: use \`getResponseValue(response, "response.id")\` or the appropriate accessor
|
|
65
|
+
⚠️ **Preserve everything else exactly as generated** — do not add, remove, or modify
|
|
66
|
+
auth headers, cookies, tokens, env vars, imports, assertions, or non-chaining request body fields.
|
|
67
|
+
The CLI output for auth/headers is intentional.
|
|
65
68
|
|
|
66
69
|
**Example fix for a products → orders workflow:**
|
|
67
70
|
\`\`\`
|
|
@@ -36,8 +36,7 @@ const scenarioTestSchema = {
|
|
|
36
36
|
.describe("JSON string of the response body parsed by AI from the scenario"),
|
|
37
37
|
statusCode: z
|
|
38
38
|
.number()
|
|
39
|
-
.
|
|
40
|
-
.describe("HTTP status code (e.g., 200, 201, 204) parsed by AI from the scenario"),
|
|
39
|
+
.describe("Expected HTTP status code. Read status codes from the API schema file else defaults: POST→201, DELETE→204, GET/PUT/PATCH→200."),
|
|
41
40
|
outputDir: baseSchema.shape.outputDir,
|
|
42
41
|
authHeader: z
|
|
43
42
|
.string()
|
|
@@ -83,8 +82,6 @@ The AI should parse the natural language scenario and provide:
|
|
|
83
82
|
- AI-parsed HTTP method and path (required)
|
|
84
83
|
- AI-parsed request/response bodies (optional)
|
|
85
84
|
|
|
86
|
-
**IMPORTANT: If an apiSchema parameter (OpenAPI/Swagger file path or URL) is provided, DO NOT attempt to read or analyze the file contents. These files can be very large. Simply pass the path/URL to the tool - the backend will handle reading and processing the schema file.**
|
|
87
|
-
|
|
88
85
|
**Note:** This tool generates one request at a time. Call multiple times for multi-step scenarios.
|
|
89
86
|
|
|
90
87
|
**CRITICAL - Integration Test Generation After Scenario Creation:**
|
|
@@ -15,6 +15,10 @@ const newTestSchema = z.object({
|
|
|
15
15
|
testType: z.string().describe("Type of test created: Smoke, Contract, Integration, etc."),
|
|
16
16
|
endpoint: z.string().describe("HTTP verb and path, e.g. 'GET /api/v1/products'"),
|
|
17
17
|
fileName: z.string().describe("Name of the generated test file"),
|
|
18
|
+
description: z.string().optional().describe("What the test scenario covers, e.g. 'Creates a collection, adds a link, then verifies the link exists'"),
|
|
19
|
+
scenarioFile: z.string().optional().describe("Path to the scenario JSON file if one was generated (e.g. 'tests/scenario_collections-links.json')"),
|
|
20
|
+
traceFile: z.string().optional().describe("Path to the backend trace file if used or created"),
|
|
21
|
+
frontendTrace: z.string().optional().describe("Path to the Playwright/UI trace file if used or created"),
|
|
18
22
|
});
|
|
19
23
|
const descriptionSchema = z.object({
|
|
20
24
|
description: z.string().describe("One-line description"),
|