@skyramp/mcp 0.2.1-rc.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/playwright/registerPlaywrightTools.js +1 -0
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +98 -87
- package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +92 -60
- package/build/prompts/test-maintenance/driftAnalysisSections.js +139 -197
- package/build/prompts/testbot/testbot-prompts.js +4 -7
- package/build/prompts/testbot/testbot-prompts.test.js +17 -22
- package/build/services/TestDiscoveryService.js +39 -9
- package/build/tools/test-management/actionsTool.js +166 -148
- package/build/tools/test-management/analyzeChangesTool.js +2 -10
- package/build/tools/test-management/analyzeTestHealthTool.js +10 -22
- package/build/utils/docker.test.js +1 -1
- package/build/utils/versions.js +1 -1
- package/node_modules/playwright/lib/mcp/skyramp/assertApiRequestTool.js +46 -0
- package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +298 -51
- package/node_modules/playwright/lib/mcp/test/skyRampExport.js +5 -0
- package/package.json +2 -2
- package/node_modules/playwright/lib/mcp/browser/tools/domAnalyzer.js +0 -261
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.3.tgz +0 -0
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.4.tgz +0 -0
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.5.tgz +0 -0
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.6.tgz +0 -0
|
@@ -2,42 +2,37 @@
|
|
|
2
2
|
* Modular section builders for the Drift Analysis prompt,
|
|
3
3
|
* mirroring the recommendationSections.ts pattern.
|
|
4
4
|
*/
|
|
5
|
-
|
|
6
|
-
export function buildActionDecisionTree() {
|
|
5
|
+
export function buildActionDecisionMatrix() {
|
|
7
6
|
return `<decision_rules>
|
|
8
|
-
|
|
7
|
+
## Action Decision Tree
|
|
9
8
|
|
|
10
|
-
|
|
9
|
+
For each existing test, work through these checks in order — the first match wins:
|
|
11
10
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
- Scope narrowed: \`+ requireRole\`, \`+ raise PermissionError\`, \`+ if not is_owner\`, \`+ [x for x in xs if x.owner == caller_id]\`
|
|
21
|
-
- Behavioral: \`+ raise ValidationError\`/\`+ HTTPException(409)\` on new \`if\`, \`+ VALID_TRANSITIONS\`, sync→async (\`- return 200\`/\`+ return 202\`), formula change (\`- total = a - b\`/\`+ total = a + tax - b\`)
|
|
22
|
-
|
|
23
|
-
Then, for each test where the changed code *is* reachable, work through the individual checks below using your pre-built detection list. Collect **all** matching signals, then assign the single highest-severity action across all matches. Severity order (highest first): **DELETE > REGENERATE > UPDATE > VERIFY > IGNORE**. **Before assigning UPDATE, REGENERATE, or DELETE, quote the specific diff line(s) that triggered it in the rationale. If you cannot point to a diff line this test's endpoint can observe, the action is IGNORE or VERIFY, not UPDATE.**
|
|
11
|
+
1. **All endpoints the test covers were removed** → **DELETE**
|
|
12
|
+
2. **Some endpoints removed, some renamed** → **UPDATE**
|
|
13
|
+
3. **New response field added to a covered endpoint** → **UPDATE** — the test needs a new assertion even if existing assertions still pass
|
|
14
|
+
4. **Shape change breaks assertions (field-level: ≤2 fields changed, renamed, or type-swapped)** → **UPDATE**
|
|
15
|
+
**Shape change restructures the root response (flat→nested, new wrapper object, root key renamed, ≥50% of test assertions broken)** → **REGENERATE**
|
|
16
|
+
5. **Auth added or auth method changed** → **UPDATE**
|
|
17
|
+
**Auth removed** → **VERIFY**
|
|
18
|
+
6. **No breaking changes detected** → **IGNORE** or **VERIFY** for minor drift
|
|
24
19
|
|
|
25
20
|
Rules:
|
|
26
|
-
-
|
|
27
|
-
-
|
|
28
|
-
- REGENERATE when
|
|
29
|
-
- Prefer IGNORE over VERIFY when all changed files are unrelated to the test's endpoint.
|
|
21
|
+
- DELETE when all covered endpoints no longer exist; REGENERATE when they still exist but changed drastically.
|
|
22
|
+
- REGENERATE means: the top-level response shape changed (flat→nested, new wrapper object added, root key renamed), OR ≥50% of the test's assertions reference fields that were removed or restructured. In all other cases, prefer UPDATE.
|
|
23
|
+
- Prefer UPDATE over REGENERATE when changes are field-level (≤2 fields added, removed, renamed, or type-swapped).
|
|
24
|
+
- Prefer IGNORE over VERIFY when all changed files are unrelated to the test's endpoint.
|
|
30
25
|
- ADD actions belong in the next step — complete this assessment with IGNORE / VERIFY / UPDATE / REGENERATE / DELETE only.
|
|
31
26
|
|
|
32
27
|
<examples>
|
|
33
28
|
<example>
|
|
34
|
-
Diff adds one field to a
|
|
29
|
+
Diff adds one field to a response object and renames a URL path segment:
|
|
35
30
|
\`\`\`
|
|
36
31
|
- @app.route("/users/<id>/orders")
|
|
37
32
|
+ @app.route("/users/<id>/purchases")
|
|
38
|
-
+ "total_items": len(order.items)
|
|
33
|
+
+ "total_items": len(order.items)
|
|
39
34
|
\`\`\`
|
|
40
|
-
→ **UPDATE**:
|
|
35
|
+
→ **UPDATE**: path rename + one new field — both are field-level changes. Patch the URL and add an assertion for \`total_items\`.
|
|
41
36
|
</example>
|
|
42
37
|
<example>
|
|
43
38
|
Diff wraps the entire response in a new envelope object:
|
|
@@ -45,197 +40,135 @@ Diff wraps the entire response in a new envelope object:
|
|
|
45
40
|
- return Response({"id": ..., "status": ..., "items": [...]})
|
|
46
41
|
+ return Response({"data": {"id": ..., "status": ..., "items": [...]}, "meta": {"page": 1}})
|
|
47
42
|
\`\`\`
|
|
48
|
-
→ **REGENERATE**: root
|
|
49
|
-
</example>
|
|
50
|
-
<example>
|
|
51
|
-
Diff adds a field to a model/migration only — project uses explicit serializers (DRF, FastAPI, etc.):
|
|
52
|
-
\`\`\`
|
|
53
|
-
+ sort_order = Column(Integer, nullable=True) # in models.py
|
|
54
|
-
\`\`\`
|
|
55
|
-
No serializer or field-inclusion list changed.
|
|
56
|
-
→ **VERIFY**: model-only signal in a project with explicit serializers — cannot confirm from the diff whether the field is included in the serializer's \`fields\` list and therefore exposed in API responses.
|
|
57
|
-
</example>
|
|
58
|
-
<example>
|
|
59
|
-
Diff adds a field to a schema/model only — project has no explicit serializer layer (ORM fields passed through directly):
|
|
60
|
-
\`\`\`
|
|
61
|
-
+ sort_order: {type: 'integer', nullable: true} # in db/schema.js
|
|
62
|
-
\`\`\`
|
|
63
|
-
No serializer file changed. No \`fields =\` or field-exclusion list exists for this resource.
|
|
64
|
-
→ **UPDATE**: project has no serializer gate, so new schema columns are auto-exposed in API responses. Augment the test to assert the new field is present and round-trips correctly.
|
|
65
|
-
</example>
|
|
66
|
-
<example>
|
|
67
|
-
Diff changes a status code in the handler:
|
|
68
|
-
\`\`\`
|
|
69
|
-
- res.status(200).json(...)
|
|
70
|
-
+ res.status(201).json(...)
|
|
71
|
-
\`\`\`
|
|
72
|
-
→ **UPDATE**: the test asserts \`toBe(200)\` which now fails. Patch the status assertion.
|
|
73
|
-
</example>
|
|
74
|
-
<example>
|
|
75
|
-
Diff adds a role gate to a route the test covers:
|
|
76
|
-
\`\`\`
|
|
77
|
-
+ if (user.role !== "owner") {
|
|
78
|
-
+ return res.status(403).json({ error: "forbidden_role" });
|
|
79
|
-
+ }
|
|
80
|
-
\`\`\`
|
|
81
|
-
→ **UPDATE**: the test's existing token now gets 403. Send a token with sufficient role and add a 403 negative assertion for the restricted role. (Authorization scope narrowed — not an auth-mechanism change; the Auth/AuthZ and Behavioral Contract checks cover this.)
|
|
82
|
-
</example>
|
|
83
|
-
<example>
|
|
84
|
-
Diff adds a state-transition guard:
|
|
85
|
-
\`\`\`
|
|
86
|
-
+ const VALID_TRANSITIONS = { draft: ["review"], review: ["published"] };
|
|
87
|
-
+ if (!VALID_TRANSITIONS[currentStatus]?.includes(newStatus)) {
|
|
88
|
-
+ throw new HTTPException(409, "invalid_transition");
|
|
89
|
-
+ }
|
|
90
|
-
\`\`\`
|
|
91
|
-
→ **UPDATE**: an integration test that previously posted \`draft→published\` directly now gets 409. Chain through the valid states (draft→review→published) and add a 409 assertion for the direct skip.
|
|
43
|
+
→ **REGENERATE**: root shape changed from a flat object to \`{data, meta}\`. Every existing assertion (e.g. \`response["id"]\`, \`response["status"]\`) is broken — rewrite the test from scratch.
|
|
92
44
|
</example>
|
|
93
45
|
</examples>
|
|
94
46
|
</decision_rules>`;
|
|
95
47
|
}
|
|
96
|
-
// Retained for backwards compatibility — no longer rendered in the prompt.
|
|
97
|
-
// Diff signals are now inlined into each individual check function.
|
|
98
|
-
/** @deprecated use the individual check functions; this function is no longer part of the prompt */
|
|
99
48
|
export function buildBreakingChangePatterns() {
|
|
100
|
-
return
|
|
101
|
-
}
|
|
102
|
-
export function buildCheckEndpointExistence() {
|
|
103
|
-
return `Does the endpoint the test targets still exist in the codebase?
|
|
49
|
+
return `## Breaking Change Patterns to Detect
|
|
104
50
|
|
|
105
|
-
|
|
106
|
-
- Route removed: \`- @app.route("/path")\`, \`- router.get("/path")\`, \`- @GetMapping("/path")\`
|
|
107
|
-
- Route renamed: paired \`-\` and \`+\` on a route decorator with a different path
|
|
51
|
+
Scan the diff lines for these high-signal patterns:
|
|
108
52
|
|
|
109
|
-
|
|
110
|
-
-
|
|
111
|
-
-
|
|
112
|
-
-
|
|
113
|
-
|
|
114
|
-
export function buildCheckResponseShape() {
|
|
115
|
-
return `Has the request body or response structure changed in a way that breaks the test?
|
|
53
|
+
### Endpoint-level breaking changes
|
|
54
|
+
- \`- @app.route("/old-path")\` / \`+ @app.route("/new-path")\` — renamed endpoint
|
|
55
|
+
- \`- router.get("/old")\` / \`+ router.get("/new")\` — renamed route
|
|
56
|
+
- \`- @GetMapping("/old")\` / \`+ @GetMapping("/new")\` — Spring rename
|
|
57
|
+
- Lines removing a route decorator entirely (endpoint removed)
|
|
116
58
|
|
|
117
|
-
|
|
118
|
-
- Field type
|
|
119
|
-
- Required
|
|
120
|
-
- Required query param added with no default
|
|
59
|
+
### Request/response shape changes
|
|
60
|
+
- Field type changes: \`- field: int\` → \`+ field: string\`
|
|
61
|
+
- Required field added: \`+ required: [..., "newField"]\`
|
|
121
62
|
- Response field removed: \`- "responseField":\`
|
|
122
|
-
- Enum value
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
-
|
|
132
|
-
-
|
|
133
|
-
|
|
63
|
+
- Enum value changes: \`- status: "active"\` → \`+ status: "enabled"\`
|
|
64
|
+
|
|
65
|
+
### Auth changes
|
|
66
|
+
- \`+ @require_auth\`, \`+ @login_required\`, \`+ middleware(authMiddleware)\`
|
|
67
|
+
- \`- @require_auth\` (auth removed)
|
|
68
|
+
- Token type changed: Bearer → Cookie
|
|
69
|
+
|
|
70
|
+
### Status code changes
|
|
71
|
+
- \`- return 200\` → \`+ return 201\`
|
|
72
|
+
- \`- status_code=200\` → \`+ status_code=204\`
|
|
73
|
+
- \`- res.status(201)\` → \`+ res.status(200)\`
|
|
74
|
+
|
|
75
|
+
### Additive response field changes (non-breaking but coverage gap)
|
|
76
|
+
These do NOT break existing assertions but leave the new field untested. Always flag as UPDATE for covered endpoints.
|
|
77
|
+
- \`+ "newField": queryset.filter(...).count()\` added inside a \`Response({...})\` or \`res.json({...})\`
|
|
78
|
+
- \`+ newField = serializers.XXXField()\` added to a serializer used by a tested endpoint
|
|
79
|
+
- \`+ "newField":\` added to a response body dict returned by the endpoint
|
|
80
|
+
- New key added inside an existing dict/object returned by the endpoint`;
|
|
134
81
|
}
|
|
135
|
-
export function
|
|
136
|
-
return
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
-
|
|
144
|
-
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
82
|
+
export function buildTestAssessmentGuidelines() {
|
|
83
|
+
return `## Per-Test Assessment (4 Checks)
|
|
84
|
+
|
|
85
|
+
For each existing test file, run these checks:
|
|
86
|
+
|
|
87
|
+
### Check A: Endpoint existence
|
|
88
|
+
Does the endpoint the test targets still exist in the codebase?
|
|
89
|
+
- If ALL endpoints the test covers were removed → action: DELETE (the entire test file is obsolete)
|
|
90
|
+
- If SOME methods were removed but others remain → action: UPDATE (remove the test functions for deleted methods, keep the rest)
|
|
91
|
+
- If the endpoint was renamed → action: UPDATE (path substitution)
|
|
92
|
+
|
|
93
|
+
### Check B: Request/response shape (breaking changes)
|
|
94
|
+
Has the request body or response structure changed in a way that breaks the test?
|
|
95
|
+
- Compare test's expected fields against current schema/model definitions
|
|
96
|
+
- Type changes (string→int, int→string) on individual fields → action: UPDATE
|
|
97
|
+
- Type change restructures the root object or makes the entire request body invalid → action: REGENERATE
|
|
98
|
+
- New required fields the test doesn't send → action: UPDATE
|
|
99
|
+
- Response fields the test asserts on have been removed → action: UPDATE
|
|
100
|
+
- ≥50% of the test's assertions reference fields that were removed or restructured → action: REGENERATE
|
|
101
|
+
|
|
102
|
+
**UPDATE vs REGENERATE:** choose UPDATE when changes are field-level (≤2 fields added, removed, renamed, or type-swapped). Choose REGENERATE only when the root response shape changed (flat→nested, new wrapper object, root key renamed) or ≥50% of assertions are broken.
|
|
103
|
+
|
|
104
|
+
### Check B2: Additive response field changes (coverage gaps)
|
|
105
|
+
**Even if existing assertions still pass**, does the diff add a new field to the response of an endpoint this test already covers?
|
|
106
|
+
- Look at the diff for lines like \`+ "newField":\` or \`+ newField =\` inside a view/serializer this test hits
|
|
107
|
+
- If YES → action: UPDATE
|
|
108
|
+
- This applies even when the test only checks status codes — the test should be extended to cover the new field
|
|
109
|
+
- A new response field on a covered endpoint always triggers UPDATE — even when existing assertions still pass.
|
|
110
|
+
|
|
111
|
+
### Check C: Auth changes
|
|
112
|
+
Has the authentication mechanism for this endpoint changed?
|
|
113
|
+
- Auth added where none existed → action: UPDATE
|
|
114
|
+
- Auth method changed (bearer→cookie) → action: UPDATE
|
|
115
|
+
- Auth removed → action: VERIFY
|
|
116
|
+
|
|
117
|
+
### Check D: Assign action
|
|
118
|
+
Based on the above, choose the action (IGNORE / VERIFY / UPDATE / REGENERATE / DELETE) and provide a 1-2 sentence rationale.
|
|
119
|
+
- If Check B2 flagged an additive field → action must be UPDATE, even if Checks B/C found no breaking changes.`;
|
|
157
120
|
}
|
|
158
|
-
export function
|
|
159
|
-
return
|
|
160
|
-
|
|
161
|
-
Diff signals and actions:
|
|
162
|
-
- **Validation tightened**: \`+ raise ValidationError\`/\`+ throw new ValidationError\` gated on field value, \`+ Field(pattern=...)\`/\`ge=\`/\`le=\`/\`max_length=\` on an existing field → UPDATE (fix the payload to satisfy the new constraint; add the 4xx negative case)
|
|
163
|
-
- **New conditional rejection / state guard**: \`+ raise HTTPException(status_code=409)\`/\`+ res.status(409)\` inside a new \`if\`, \`+ VALID_TRANSITIONS\`, \`+ allowed_states = ...\` → UPDATE (chain through valid states; assert the rejection status for the now-illegal path)
|
|
164
|
-
- **Sync → async**: \`- return 200 result\` / \`+ return 202 {job_id}\` → UPDATE (assert \`202\` and the job/id field; remove old result-field assertions from the immediate response)
|
|
165
|
-
- **Computed-field formula changed**: \`- total = a - b\` / \`+ total = a + tax - b\` on an existing asserted field → UPDATE; describe the new formula in \`updateInstructions\` and provide the recomputed expected value where inputs are known from the diff
|
|
166
|
-
- **Behavior gated on a request header**: old shape returned only when a version header is sent; new shape is now the default → UPDATE (migrate assertions to the new default shape, or pin the old shape by sending the version header)
|
|
121
|
+
export function buildAddRecommendationGuidelines() {
|
|
122
|
+
return `## ADD — New Tests for New Endpoints
|
|
167
123
|
|
|
168
|
-
**
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
return `Based on the above checks, choose the action (IGNORE / VERIFY / UPDATE / REGENERATE / DELETE) and provide a 1-2 sentence rationale.
|
|
172
|
-
|
|
173
|
-
**Every action requires a specific rationale — including IGNORE:**
|
|
174
|
-
- UPDATE / REGENERATE / DELETE: quote the specific diff line that triggered it.
|
|
175
|
-
- VERIFY: name the uncertain element (e.g. "shared serializer, cannot confirm field exposure without reading the file").
|
|
176
|
-
- IGNORE: name the specific reason the changed code cannot reach this test's endpoint (e.g. "diff only touches \`auth/session-service.js\` — this test targets \`/api/v1/orders\` which has no session dependency"). Generic "unrelated endpoint" or "service boundary" without a diff reference is not sufficient.
|
|
177
|
-
|
|
178
|
-
- If the Additive Fields check flagged a new field with serializer signal confirmed in the diff → action is UPDATE. If the Additive Fields check returned VERIFY (model/schema only, serializer not in diff) → action remains VERIFY.
|
|
179
|
-
- **Service/layer scope gate is terminal:** If the changed code is clearly not reachable through the service or base URL this test targets, assign IGNORE — this overrides all other checks. When reachability is uncertain, assign VERIFY rather than IGNORE.
|
|
180
|
-
- **Pre-commit verification — confirm all three before finalizing UPDATE/REGENERATE/DELETE:**
|
|
181
|
-
1. You can quote a specific diff line this test's endpoint observes that triggered the action.
|
|
182
|
-
2. The changed code is reachable through this test's service and base URL.
|
|
183
|
-
3. For REGENERATE: every assertion in the file is invalid, not just some — if you can patch N paths, it is UPDATE.
|
|
184
|
-
If any check fails, downgrade to VERIFY or IGNORE.
|
|
185
|
-
- **For user-written (external) tests** marked \`[external]\` in the test list:
|
|
186
|
-
- UPDATE is permitted — targeted edits (fix renamed URL, add assertion for new field).
|
|
187
|
-
- REGENERATE and DELETE are **not permitted** — assign those actions in your recommendations but \`skyramp_actions\` will surface them as report-only findings for the developer to act on. Do NOT attempt to rewrite or delete a user-authored test file.`;
|
|
188
|
-
}
|
|
189
|
-
export function buildCheckAdditiveFields() {
|
|
190
|
-
return `Even if existing assertions still pass, new response fields on a covered endpoint may need a new assertion.
|
|
124
|
+
**ADD applies only when:**
|
|
125
|
+
- The diff introduces a brand-new route that has **no existing test coverage at all**, OR
|
|
126
|
+
- The diff introduces a new auth path, error branch, or fundamentally separate scenario that no existing test covers.
|
|
191
127
|
|
|
192
|
-
|
|
193
|
-
-
|
|
194
|
-
-
|
|
195
|
-
- \`+ newField = Column(...)\` or \`+ newField:\` in a model/migration only, with no serializer change → model-only signal
|
|
128
|
+
**Use UPDATE instead of ADD when:**
|
|
129
|
+
- The resource already has existing tests and the diff only adds a new HTTP method — add the new method's test cases to the existing file.
|
|
130
|
+
- The endpoint existed before this diff but lacks tests — log it in \`additionalRecommendations\` and skip it; pre-existing coverage gaps are out of scope for ADD.
|
|
196
131
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
132
|
+
**Test type priority by HTTP method:**
|
|
133
|
+
| Method | Recommended test types |
|
|
134
|
+
|--------|----------------------|
|
|
135
|
+
| POST / PUT / PATCH | integration, contract |
|
|
136
|
+
| GET | contract, smoke |
|
|
137
|
+
| DELETE | integration, smoke |
|
|
201
138
|
|
|
202
|
-
|
|
139
|
+
Use a unique descriptive filename for every new test file. For a resource with existing tests, update the existing file — always prefer UPDATE over creating a new file.`;
|
|
203
140
|
}
|
|
204
141
|
export function buildUpdateExecutionRules() {
|
|
205
142
|
return `<execution_rules>
|
|
206
|
-
|
|
207
|
-
UPDATE instructs you to modify the existing file using the Edit tool. Do NOT call \`skyramp_contract_test_generation\`, \`skyramp_integration_test_generation\`, or any other generation tool for an UPDATE action — generation tools create a new file and will overwrite or duplicate the existing one. Only use generation tools for REGENERATE actions.
|
|
143
|
+
## Update Execution Rules
|
|
208
144
|
|
|
209
145
|
When applying UPDATE actions to existing test files, follow these rules in addition to the drift-detected changes:
|
|
210
146
|
|
|
211
|
-
|
|
147
|
+
### Test file ordering
|
|
212
148
|
Place mutation test functions (PATCH, PUT, POST) **before** any DELETE test function targeting the same resource. DELETE removes the resource — any mutation call after it will 404. When inserting a new mutation test, place it above the DELETE function and above the DELETE call in the \`if __name__ == "__main__"\` block (or equivalent runner entrypoint).
|
|
213
149
|
|
|
214
|
-
|
|
150
|
+
### Happy path first
|
|
215
151
|
When adding a new HTTP method (PUT, PATCH, POST) to an existing test file, always include a 2xx success assertion first. Error-path tests (404, 422) may follow, but the happy path case is required.
|
|
216
152
|
|
|
217
|
-
|
|
153
|
+
### All test files for a resource
|
|
218
154
|
When a diff adds a new HTTP method to a resource, UPDATE covers **all** existing test files for that resource — contract, integration, and UI. Apply UPDATE to every file the analyze tool reported for that resource path; do not stop after updating the first one.
|
|
219
155
|
|
|
220
|
-
|
|
156
|
+
### PATCH/PUT with child collections
|
|
221
157
|
Child collection arrays (e.g. \`items\`, \`products\`, \`line_items\`) drive computed totals — a test that omits them cannot catch the most common mutation bugs. When the request/response includes a child collection:
|
|
222
158
|
1. Include the child array with at least one item containing the FK field (e.g. \`product_id\`) and a \`quantity\` field.
|
|
223
159
|
2. Assert each item's FK field and \`quantity\` match the sent values.
|
|
224
160
|
3. Assert the top-level computed total (e.g. \`total_amount\`) equals the expected math from the items.
|
|
225
161
|
|
|
226
|
-
|
|
162
|
+
### REGENERATE
|
|
227
163
|
Call the appropriate generation tool to replace the existing test from scratch. Use the same filename so it overwrites the old file.
|
|
228
164
|
|
|
229
|
-
|
|
230
|
-
|
|
165
|
+
### DELETE
|
|
166
|
+
Remove the test file when ALL endpoints it covers were removed from the codebase. If only SOME methods were removed, use UPDATE instead — remove the test functions for deleted methods and keep the rest.
|
|
231
167
|
|
|
232
|
-
|
|
168
|
+
### Test data isolation
|
|
233
169
|
Never use hardcoded resource IDs (e.g. \`order_id=1\`) in any test step, including GET or DELETE steps. Always create required resources via prior POST steps and chain IDs dynamically. Use timestamp-based unique names for created resources (e.g. \`"Product-\${int(time.time())}"\`) to prevent collisions across test runs.
|
|
234
170
|
|
|
235
|
-
|
|
236
|
-
When adding assertions, assert response *values* (field equals expected), not just field presence or status code — match the assertion depth the test already uses for other fields.
|
|
237
|
-
|
|
238
|
-
**Enhance assertions after UPDATE**
|
|
171
|
+
### Enhance assertions after UPDATE
|
|
239
172
|
Call \`skyramp_enhance_assertions\` with \`testFile\` set to the absolute path of the test file you just updated, \`enhanceType: "maintenance"\`, and the matching \`testType\` based on the file you are editing:
|
|
240
173
|
- **Integration test file** (multi-step chained requests): call with \`testType: "integration"\`
|
|
241
174
|
- **Contract-provider test file** (single endpoint with \`beforeAll\`/\`afterAll\` setup, provider mode): call with \`testType: "contract"\`. Skip for consumer-mode contract tests.
|
|
@@ -244,35 +177,44 @@ Call \`skyramp_enhance_assertions\` with \`testFile\` set to the absolute path o
|
|
|
244
177
|
Then apply every instruction returned by the tool to the test file.
|
|
245
178
|
</execution_rules>`;
|
|
246
179
|
}
|
|
247
|
-
export function buildDriftOutputChecklist(existingTestCount, newEndpointCount, stateFile) {
|
|
248
|
-
const finalStep =
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
const existingTestSection = `**Existing tests (${existingTestCount} total)**
|
|
180
|
+
export function buildDriftOutputChecklist(existingTestCount, newEndpointCount, inlineMode = false, stateFile) {
|
|
181
|
+
const finalStep = inlineMode
|
|
182
|
+
? `### Final step
|
|
183
|
+
Apply all maintenance actions (UPDATE / REGENERATE / DELETE) directly by editing the test files. Apply IGNORE, VERIFY, UPDATE, REGENERATE, or DELETE only — ADD is handled in the next task.`
|
|
184
|
+
: `### Final step
|
|
185
|
+
After completing all assessments above, call \`skyramp_actions\` with \`stateFile: "${stateFile ?? "<stateFile>"}"\` and a \`recommendations\` entry for every test assessed. For each entry include: \`testFile\` (absolute path as reported by the analysis tools), \`action\`, \`rationale\`, \`updateInstructions\` (free-form summary of what this test must change — new fields to assert, constraint details, auth changes, new request params, or any other drift specifics; \`skyramp_actions\` passes this directly to the downstream LLM editing the file), and \`renamedEndpoints\` (for path-rename updates).
|
|
186
|
+
|
|
187
|
+
Call \`skyramp_actions\` as the sole final action — skip all other file writes.`;
|
|
188
|
+
const existingTestHeader = inlineMode
|
|
189
|
+
? "### Existing tests (reported by skyramp_analyze_changes)"
|
|
190
|
+
: `### Existing tests (${existingTestCount} total)`;
|
|
191
|
+
const existingTestSection = `${existingTestHeader}
|
|
260
192
|
For each existing test:
|
|
261
193
|
- **IGNORE/VERIFY tests**: one line each: \`{testFile} — IGNORE\` or \`{testFile} — VERIFY\`. Rationale omitted for brevity.
|
|
262
194
|
- **UPDATE/REGENERATE/DELETE tests**: output the full block:
|
|
263
195
|
\`\`\`
|
|
264
196
|
Test: {testFile}
|
|
265
197
|
Action: {UPDATE | REGENERATE | DELETE}
|
|
266
|
-
Rationale: {
|
|
198
|
+
Rationale: {1-2 sentence explanation}
|
|
267
199
|
\`\`\`
|
|
268
200
|
Focus your analysis on tests that need action — keep reasoning for unchanged tests to a single line.`;
|
|
269
|
-
const newEndpointSection =
|
|
270
|
-
?
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
201
|
+
const newEndpointSection = inlineMode
|
|
202
|
+
? ""
|
|
203
|
+
: newEndpointCount > 0
|
|
204
|
+
? `### New endpoints (${newEndpointCount} detected)
|
|
205
|
+
For EACH new endpoint, output:
|
|
206
|
+
\`\`\`
|
|
207
|
+
Endpoint: {METHOD} {path}
|
|
208
|
+
Action: ADD
|
|
209
|
+
Test types: {contract | integration | smoke | ...}
|
|
210
|
+
Rationale: {1 sentence}
|
|
211
|
+
\`\`\``
|
|
212
|
+
: `### New endpoints
|
|
213
|
+
No new endpoints detected in this diff.`;
|
|
214
|
+
const sections = [existingTestSection, newEndpointSection, finalStep].filter(s => s.length > 0);
|
|
275
215
|
return `<output_format>
|
|
216
|
+
## Output Checklist
|
|
217
|
+
|
|
276
218
|
Complete ALL of the following:
|
|
277
219
|
|
|
278
220
|
${sections.join("\n\n")}
|
|
@@ -2,6 +2,7 @@ import { z } from "zod";
|
|
|
2
2
|
import { logger } from "../../utils/logger.js";
|
|
3
3
|
import { AnalyticsService } from "../../services/AnalyticsService.js";
|
|
4
4
|
import { MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, PATH_PARAM_UUID_GUIDANCE, AUTH_CONFLICT_ERROR_MSG, } from "../test-recommendation/recommendationSections.js";
|
|
5
|
+
import { buildDriftAnalysisPrompt } from "../test-maintenance/drift-analysis-prompt.js";
|
|
5
6
|
import { getTraceRecordingPromptText } from "../../playwright/traceRecordingPrompt.js";
|
|
6
7
|
import { isContractConsumerModeEnabled } from "../../utils/featureFlags.js";
|
|
7
8
|
import { resolveServiceDetailsRef } from "../../utils/utils.js";
|
|
@@ -65,13 +66,9 @@ Use those recommendations as your baseline. Only add or remove tests that the us
|
|
|
65
66
|
**If \`skyramp_analyze_changes\` returns an error:** retry once only if the error is transient (timeout, network blip, temporary unavailability) — do NOT retry for permanent errors (invalid repository path, missing required parameter, authentication failure). If it fails again, call \`skyramp_submit_report\` with a minimal valid payload: leave all test arrays empty and add the error to \`issuesFound\`. Refer to the \`skyramp_submit_report\` schema for required fields. Do NOT attempt Task 2 without a valid stateFile.
|
|
66
67
|
**If all changed files are non-application** (CI/CD, docs, lock files, config) → skip to Task 3 (Submit Report) with empty arrays and a single \`issuesFound\` entry explaining why (same format as the zero-test path below).
|
|
67
68
|
|
|
68
|
-
3. **Maintain existing tests
|
|
69
|
+
3. **Maintain existing tests** using the rules in \`<drift_analysis_rules>\` below. For each existing test reported by \`skyramp_analyze_changes\`, score it and choose the action exactly as directed by the Action Decision Matrix in \`<drift_analysis_rules>\`. Only read test files that require action per that matrix — do NOT read files that will be IGNORED. **Do NOT read source files (routers, models, CRUD, components) — all the information you need is in the \`skyramp_analyze_changes\` output and the diff.** When reading multiple test files, **read them all in a single parallel batch** — do NOT read them one at a time. Apply actions directly. Results go in \`testMaintenance\`.
|
|
69
70
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
b. For each test scored UPDATE or REGENERATE, write \`updateInstructions\` (a concise description of what must change) **before** calling \`skyramp_actions\`. This articulation step prevents the LLM from letting file content override diff-based reasoning.
|
|
73
|
-
|
|
74
|
-
c. Call \`skyramp_actions\` with \`stateFile\` (from step 2) and your \`recommendations[]\` — one entry per test assessed, including IGNORE and VERIFY. The tool returns file content for each UPDATE/REGENERATE test — apply the edits. Results go in \`testMaintenance\`.
|
|
71
|
+
${buildDriftAnalysisPrompt({ existingTests: [], scannedEndpoints: [], repositoryPath })}
|
|
75
72
|
|
|
76
73
|
4. **Code review:** From the \`skyramp_analyze_changes\` output and the existing test files you read for maintenance, note any logic bugs. Do NOT read additional source files just for code review — use what is already available from the analysis and test file reads. Common patterns to flag:
|
|
77
74
|
- Computed fields not recalculated after mutation (e.g. \`total_amount\` unchanged after items are added/removed)
|
|
@@ -334,7 +331,7 @@ Call \`skyramp_submit_report\` with \`summaryOutputFile\`: "${summaryOutputFile}
|
|
|
334
331
|
- **additionalRecommendations**: AT MOST ${maxRecommendations - maxGenerate} items.
|
|
335
332
|
- For \`testType: "contract"\` entries: **\`primaryEndpoint\` is required** (e.g. \`"GET /api/v1/users/{user_id}"\`). The tool will reject the submission without it — do not omit it or you will be forced to resubmit.
|
|
336
333
|
- For \`testType: "integration"\` or \`"e2e"\` entries: omit \`primaryEndpoint\` — use \`description\` to list the endpoints involved instead.
|
|
337
|
-
- **testMaintenance**: Use \`[]\` **only** if no existing Skyramp tests were found in the repository. If existing tests were found (any score), include one entry per test. Set \`action\` to the exact drift action
|
|
334
|
+
- **testMaintenance**: Use \`[]\` **only** if no existing Skyramp tests were found in the repository. If existing tests were found (any score), include one entry per test. Set \`action\` to the exact drift action you chose from the Action Decision Matrix (\`UPDATE\`, \`REGENERATE\`, \`DELETE\`, \`VERIFY\`, or \`IGNORE\`). For UPDATE/REGENERATE/DELETE tests that were modified and executed, populate all fields from real before/after execution results. For VERIFY/IGNORE tests (not modified), derive \`beforeStatus\` from the \`skyramp_analyze_test_health\` health score (typically \`"Pass"\` if drift score is 0 and no health issues were flagged), set \`afterStatus\` to \`"Skipped"\`, and use \`afterDetails\` to explain why (e.g. "IGNORE: drift score 0 — endpoint not modified in this PR"). Do **not** add entries for tests that were not returned by the health analysis.
|
|
338
335
|
|
|
339
336
|
---
|
|
340
337
|
|
|
@@ -202,40 +202,35 @@ describe("uiCredentials in getTestbotPrompt", () => {
|
|
|
202
202
|
.toThrow("</ui-credentials>");
|
|
203
203
|
});
|
|
204
204
|
});
|
|
205
|
-
describe("drift analysis
|
|
206
|
-
|
|
207
|
-
|
|
205
|
+
describe("drift analysis inline embedding", () => {
|
|
206
|
+
beforeAll(() => { process.env.SKYRAMP_FEATURE_TESTBOT = "1"; });
|
|
207
|
+
afterAll(() => { delete process.env.SKYRAMP_FEATURE_TESTBOT; });
|
|
208
208
|
function basePrompt() {
|
|
209
209
|
return getTestbotPrompt(baseArgs.prTitle, baseArgs.prDescription, baseArgs.summaryOutputFile, baseArgs.repositoryPath);
|
|
210
210
|
}
|
|
211
|
-
it("
|
|
211
|
+
it("wraps inline drift rules in XML tags", () => {
|
|
212
212
|
const prompt = basePrompt();
|
|
213
|
-
expect(prompt).toContain("
|
|
213
|
+
expect(prompt).toContain("<drift_analysis_rules>");
|
|
214
|
+
expect(prompt).toContain("</drift_analysis_rules>");
|
|
214
215
|
});
|
|
215
|
-
it("
|
|
216
|
+
it("does not include a persona statement inside the inline XML block", () => {
|
|
216
217
|
const prompt = basePrompt();
|
|
217
|
-
|
|
218
|
-
|
|
218
|
+
const start = prompt.indexOf("<drift_analysis_rules>");
|
|
219
|
+
const end = prompt.indexOf("</drift_analysis_rules>");
|
|
220
|
+
const block = prompt.slice(start, end);
|
|
221
|
+
expect(block).not.toContain("You are acting as a Skyramp Integration Architect");
|
|
219
222
|
});
|
|
220
|
-
it("
|
|
223
|
+
it("drift_analysis_rules block appears inside Task 1, before Task 2", () => {
|
|
221
224
|
const prompt = basePrompt();
|
|
222
225
|
const task1Pos = prompt.indexOf("## Task 1");
|
|
223
|
-
const
|
|
226
|
+
const rulesPos = prompt.indexOf("<drift_analysis_rules>");
|
|
224
227
|
const task2Pos = prompt.indexOf("## Task 2");
|
|
225
|
-
expect(
|
|
226
|
-
expect(
|
|
228
|
+
expect(rulesPos).toBeGreaterThan(task1Pos);
|
|
229
|
+
expect(rulesPos).toBeLessThan(task2Pos);
|
|
227
230
|
});
|
|
228
|
-
it("
|
|
229
|
-
// The rules are now fetched at runtime via skyramp_analyze_test_health —
|
|
230
|
-
// the <drift_analysis_rules> tag may appear as a reference in prose,
|
|
231
|
-
// but the actual rule content (Action Decision Tree) must not be baked in.
|
|
231
|
+
it("Task 1 step 3 prose references drift_analysis_rules tag", () => {
|
|
232
232
|
const prompt = basePrompt();
|
|
233
|
-
expect(prompt).
|
|
234
|
-
expect(prompt).not.toContain("Update Execution Rules\n\nWhen applying UPDATE actions");
|
|
235
|
-
});
|
|
236
|
-
it("does not contain a persona statement (no nested identity from old embed)", () => {
|
|
237
|
-
const prompt = basePrompt();
|
|
238
|
-
expect(prompt).not.toContain("You are acting as a Skyramp Integration Architect");
|
|
233
|
+
expect(prompt).toContain("rules in `<drift_analysis_rules>`");
|
|
239
234
|
});
|
|
240
235
|
});
|
|
241
236
|
describe("UI grounding via Task 2 capture-act-capture", () => {
|