@skyramp/mcp 0.0.64-rc.4 → 0.0.64-rc.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +182 -16
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +580 -11
- package/build/prompts/testbot/testbot-prompts.js +6 -6
- package/build/utils/trace-parser.js +21 -3
- package/build/utils/trace-parser.test.js +140 -0
- package/package.json +1 -1
|
@@ -44,7 +44,16 @@ function computeTiebreakerSeed(endpoints, diffFiles) {
|
|
|
44
44
|
return crypto.createHash("sha256").update(canonical).digest("hex").slice(0, 8);
|
|
45
45
|
}
|
|
46
46
|
// ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
|
|
47
|
-
function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false) {
|
|
47
|
+
function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, isDiffScope = false, isFrontendProject = false, isFrontendOnlyProject = false) {
|
|
48
|
+
// Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
|
|
49
|
+
// Capped so E2E+UI together never exceed topN.
|
|
50
|
+
// Referenced in supplementNote below, but the ternary conditions that use them
|
|
51
|
+
// (`isFrontendProject && !isDiffScope`) are always false in PR/diff mode.
|
|
52
|
+
const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
|
|
53
|
+
const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
|
|
54
|
+
const slotsFloor = Math.floor(topN / 2);
|
|
55
|
+
const minE2ESlots = Math.min(rawE2E, slotsFloor);
|
|
56
|
+
const minUISlots = Math.min(rawUI, Math.max(0, topN - minE2ESlots));
|
|
48
57
|
const generateItems = scored.slice(0, Math.min(maxGen, scored.length));
|
|
49
58
|
const additionalItems = scored.slice(maxGen, topN);
|
|
50
59
|
const authRef = authHeaderValue
|
|
@@ -109,9 +118,10 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
109
118
|
prereqNote);
|
|
110
119
|
}
|
|
111
120
|
}).join("\n\n");
|
|
112
|
-
//
|
|
113
|
-
// traces
|
|
114
|
-
|
|
121
|
+
// Reserve slots for UI/E2E ADDITIONAL recommendations on mixed PRs.
|
|
122
|
+
// E2E requires traces to generate — only reserve the slot when traces are available.
|
|
123
|
+
// UI can be recommended without traces (agent can record inline).
|
|
124
|
+
const needsE2ESlot = hasFrontendChanges && !isUIOnlyPR && hasTraces;
|
|
115
125
|
const needsUISlot = hasFrontendChanges && !isUIOnlyPR;
|
|
116
126
|
const frontendSlots = (needsE2ESlot ? 1 : 0) + (needsUISlot ? 1 : 0);
|
|
117
127
|
const backendAdditionalItems = frontendSlots > 0
|
|
@@ -143,8 +153,159 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
143
153
|
})() : "";
|
|
144
154
|
const supplementCount = topN - generateItems.length - backendAdditionalItems.length - frontendSlots;
|
|
145
155
|
const supplementNote = supplementCount > 0
|
|
146
|
-
? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them
|
|
156
|
+
? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them in this priority order — exhaust each tier before moving to the next:\n\n**Tier 1:** Edge-case and error-path tests for endpoints already in the ${isDiffScope ? "GENERATE set" : "list"} — boundary values for numeric fields (e.g. 0%, 100%, >100% discount), invalid/non-existent IDs (→ 404), empty arrays where a minimum is required, missing required fields (→ 422), auth boundary (call without Authorization header → 403/401).\n\n**Tier 2:** Auth-boundary contract tests for any endpoint not yet covered.\n\n**Tier 3:** Cross-resource integration tests — ONLY when one resource's POST body contains the other's \`_id\` field. NEVER pair resources where neither POST body has the other's ID.\n\n**Tier 4:** CRUD lifecycle tests for any resource not yet covered.\n\nUse the same 5-dimension rubric to assign priority (HIGH/MEDIUM/LOW), testType, and category. For each supplement item, apply the same source-code enrichment from Step 1 — use real field names from the route handler, not generic placeholders.${isFrontendOnlyProject && !isDiffScope ? ` Since this is a frontend repo, the supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.` : isFrontendProject && !isDiffScope ? ` Since this is a full-stack repo, the supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\` — full browser-to-backend flow) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\` — component-level interaction flows). Add these before exhausting backend tiers.` : hasFrontendChanges && !isUIOnlyPR ? " Since this PR has frontend changes, at least 1 of these should be a UI or E2E test targeting the changed components." : ""} Do NOT produce fewer than ${topN} total.`
|
|
147
157
|
: "";
|
|
158
|
+
// ── Full-repo mode: recommendations only, no execution ──────────────────
|
|
159
|
+
if (!isDiffScope) {
|
|
160
|
+
const toTitle = (name) => name.replace(/-/g, " ").replace(/\b\w/g, c => c.toUpperCase());
|
|
161
|
+
// Coverage ranking (highest to lowest breadth):
|
|
162
|
+
// E2E first: full browser-to-backend flow — exercises both frontend and backend.
|
|
163
|
+
// UI second: frontend components call backend APIs — also exercises backend.
|
|
164
|
+
// Integration third: backend API chains validated directly.
|
|
165
|
+
// Contract last: single-endpoint boundary only.
|
|
166
|
+
const TYPE_ORDER = ["e2e", "ui", "integration", "contract"];
|
|
167
|
+
const TYPE_LABEL = {
|
|
168
|
+
e2e: "E2E", ui: "UI", integration: "Integration", contract: "Contract",
|
|
169
|
+
};
|
|
170
|
+
// All scored items up to topN, already sorted by priority/novelty
|
|
171
|
+
const allItems = scored.slice(0, topN);
|
|
172
|
+
// Group by test type while preserving priority ordering within each group
|
|
173
|
+
const byType = new Map();
|
|
174
|
+
for (const t of TYPE_ORDER)
|
|
175
|
+
byType.set(t, []);
|
|
176
|
+
for (const item of allItems) {
|
|
177
|
+
const t = item.scenario.testType ?? (item.scenario.steps.length === 1 ? "contract" : "integration");
|
|
178
|
+
if (!byType.has(t))
|
|
179
|
+
byType.set(t, []);
|
|
180
|
+
byType.get(t).push(item);
|
|
181
|
+
}
|
|
182
|
+
const renderItem = (item, rank) => {
|
|
183
|
+
const s = item.scenario;
|
|
184
|
+
const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
|
|
185
|
+
const title = toTitle(s.scenarioName);
|
|
186
|
+
if (testType === "contract") {
|
|
187
|
+
const step = s.steps[0];
|
|
188
|
+
const endpointURL = `${baseUrl}${step.path}`;
|
|
189
|
+
const isBodyMethod = ["POST", "PUT", "PATCH"].includes(step.method);
|
|
190
|
+
const dataParam = isBodyMethod
|
|
191
|
+
? `, requestData: <${step.method} ${step.path} required fields from source code>`
|
|
192
|
+
: "";
|
|
193
|
+
return [
|
|
194
|
+
`**${rank}. ${title}**`,
|
|
195
|
+
` ${s.description}`,
|
|
196
|
+
` ${step.method} ${step.path} → ${step.expectedStatusCode}`,
|
|
197
|
+
` Tool: \`skyramp_contract_test_generation({ endpointURL: "${endpointURL}", method: "${step.method}"${authRef}${dataParam} })\``,
|
|
198
|
+
` From source: fill in requestData field names and the specific production boundary this validates`,
|
|
199
|
+
].join("\n");
|
|
200
|
+
}
|
|
201
|
+
else {
|
|
202
|
+
const stepLines = s.steps.map(st => {
|
|
203
|
+
const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
|
|
204
|
+
const bodyHint = isBody ? ` — body: <${st.method} ${st.path} required fields from source>` : "";
|
|
205
|
+
return ` ${st.order}. ${st.method} ${st.path} → ${st.expectedStatusCode}: ${st.description}${bodyHint}`;
|
|
206
|
+
}).join("\n");
|
|
207
|
+
const toolCalls = s.steps.map(st => {
|
|
208
|
+
const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
|
|
209
|
+
const dataParam = isBody
|
|
210
|
+
? `, requestBody: <${st.method} ${st.path} required fields from source>`
|
|
211
|
+
: "";
|
|
212
|
+
return ` skyramp_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${s.scenarioName}", baseURL: "${baseUrl}", method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${scenarioAuthRef}${dataParam} })`;
|
|
213
|
+
}).join("\n");
|
|
214
|
+
// E2E and UI use trace-based generation, not the scenario pipeline.
|
|
215
|
+
// Only emit per-step skyramp_scenario_test_generation calls for integration type.
|
|
216
|
+
const isTraceBased = testType === "e2e" || testType === "ui";
|
|
217
|
+
const finalTool = testType === "e2e"
|
|
218
|
+
? `skyramp_e2e_test_generation({ playwrightZip: "<trace zip path>", traceFile: "<backend trace path>"${authHeaderOnlyRef} })`
|
|
219
|
+
: testType === "ui"
|
|
220
|
+
? `skyramp_ui_test_generation({ playwrightZip: "<trace zip path>"${authHeaderOnlyRef} })`
|
|
221
|
+
: `skyramp_integration_test_generation({ scenarioFile: "scenario_${s.scenarioName}.json"${authHeaderOnlyRef} })`;
|
|
222
|
+
const toolCallsBlock = isTraceBased
|
|
223
|
+
? ` ${finalTool}`
|
|
224
|
+
: `${toolCalls}\n ${finalTool}`;
|
|
225
|
+
return [
|
|
226
|
+
`**${rank}. ${title}**`,
|
|
227
|
+
` ${s.description}`,
|
|
228
|
+
` Steps:`,
|
|
229
|
+
stepLines,
|
|
230
|
+
` Tool calls:`,
|
|
231
|
+
toolCallsBlock,
|
|
232
|
+
` From source: fill in requestBody field values and assert all computed response fields`,
|
|
233
|
+
].join("\n");
|
|
234
|
+
}
|
|
235
|
+
};
|
|
236
|
+
const sections = TYPE_ORDER
|
|
237
|
+
.filter(t => (byType.get(t) ?? []).length > 0)
|
|
238
|
+
.map(t => {
|
|
239
|
+
const items = byType.get(t);
|
|
240
|
+
const label = TYPE_LABEL[t];
|
|
241
|
+
let globalRank = 0;
|
|
242
|
+
for (const prev of TYPE_ORDER) {
|
|
243
|
+
if (prev === t)
|
|
244
|
+
break;
|
|
245
|
+
globalRank += (byType.get(prev) ?? []).length;
|
|
246
|
+
}
|
|
247
|
+
const entries = items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n");
|
|
248
|
+
return `### ${label} (${items.length})\n\n${entries}`;
|
|
249
|
+
})
|
|
250
|
+
.join("\n\n");
|
|
251
|
+
const repoSupplementNote = supplementNote; // already built above with isDiffScope=false tier ordering
|
|
252
|
+
return `## Test Recommendations (${topN} total)
|
|
253
|
+
|
|
254
|
+
> **Repo mode — no tests are executed.** Use the tool calls below to generate any recommendation on demand.
|
|
255
|
+
> Highest-value tests appear first within each type. Use the "From source" hint in each item to fill in field names and assertions before calling the tool.
|
|
256
|
+
|
|
257
|
+
**Step 1 — Source-Code Enrichment (MANDATORY before presenting anything)**
|
|
258
|
+
For each endpoint listed in the Repository Context above, read the route handler source code. Look for:
|
|
259
|
+
- **All required request body fields** (names and types) for POST/PUT/PATCH — use in step descriptions and tool call params
|
|
260
|
+
- **Computed/derived response fields** (e.g. \`total_amount\`, \`discount_percent\`) and their formulas — assert these
|
|
261
|
+
- **Auth middleware** (HTTPBearer, \`Depends(get_current_user)\`, \`@UseGuards\`, \`jwt.verify\`) — set \`authHeader\`/\`authScheme\`; FastAPI HTTPBearer → **403** (not 401)
|
|
262
|
+
- **Storage backend** — if Redis or schema-less, discard unique-constraint and cascade-delete scenarios
|
|
263
|
+
- **Delete behavior** — hard-delete → 204; soft-delete/cancel → 200
|
|
264
|
+
|
|
265
|
+
Do NOT present generic placeholders. Replace every \`<... from source>\` hint with actual field names and realistic values.
|
|
266
|
+
|
|
267
|
+
${buildTestPatternGuidelines()}
|
|
268
|
+
|
|
269
|
+
${buildTestExamples()}
|
|
270
|
+
|
|
271
|
+
**Cascade vs referential integrity:** Keep only the scenario that matches the actual FK delete policy in source (CASCADE → cascade-delete; RESTRICT/no cascade → delete-blocked). Remove the other silently — do NOT create a "Removed Recommendations" or "Not Applicable" section.
|
|
272
|
+
|
|
273
|
+
**Unique constraints:** If the storage backend is Redis, in-memory, or schema-less with no explicit \`UNIQUE\` index, discard unique-constraint scenarios entirely and replace them with a different high-value test. Do NOT list removed scenarios.
|
|
274
|
+
|
|
275
|
+
**NEVER create a "Removed Recommendations", "Not Applicable", or similar section.** If a scenario is inapplicable, silently replace it with an equivalent-priority scenario from the supplement tiers. The output must contain ONLY the ${topN} recommendations.
|
|
276
|
+
|
|
277
|
+
${buildTestQualityCriteria()}
|
|
278
|
+
|
|
279
|
+
**5-dimension rubric — use to assign priority for supplement items:**
|
|
280
|
+
| Dimension | What to assess |
|
|
281
|
+
| Production Safety | Guards a critical boundary (auth, unique constraint, cascade delete, data integrity, breaking migration)? → HIGH |
|
|
282
|
+
| Bug-Finding Potential | Targets a known failure mode (race condition, data consistency, state transition, cascade effect)? → HIGH |
|
|
283
|
+
| User Journey Relevance | Reflects how real users interact (from traces, business flows, critical paths)? → HIGH or MEDIUM |
|
|
284
|
+
| Coverage Gap | Addresses an area with zero existing test coverage? → bump up one tier |
|
|
285
|
+
| Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
|
|
286
|
+
|
|
287
|
+
**Per-recommendation format (apply to ALL items):**
|
|
288
|
+
- Title and one-sentence description of what it validates (business rule, not just "tests the endpoint")
|
|
289
|
+
- Steps with concrete field names and realistic values derived from source code
|
|
290
|
+
- Ready-to-use tool call — replace all \`<...>\` placeholders with real values before presenting
|
|
291
|
+
- "From source" note — the specific production risk or business rule this prevents
|
|
292
|
+
|
|
293
|
+
**MANDATORY: Every pre-ranked item listed above MUST appear in your output — do not drop or skip any.**
|
|
294
|
+
|
|
295
|
+
${sections}
|
|
296
|
+
${repoSupplementNote}
|
|
297
|
+
|
|
298
|
+
**Test type mix — MANDATORY:**
|
|
299
|
+
${isFrontendOnlyProject
|
|
300
|
+
? `This is a frontend repo. Focus on E2E and UI tests only — E2E covers the full browser-to-backend flow (highest coverage), UI exercises frontend components that call backend APIs. Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.`
|
|
301
|
+
: isFrontendProject
|
|
302
|
+
? `This is a full-stack repo. Coverage ranking: E2E (full browser-to-backend flow) > UI (frontend exercises backend APIs) > Integration (backend chains) > Contract (single endpoint). Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`), in addition to backend integration and contract tests.`
|
|
303
|
+
: `Focus on integration and contract tests for all API endpoints.`}
|
|
304
|
+
**No smoke tests. No fuzz tests.**
|
|
305
|
+
|
|
306
|
+
**You MUST present EXACTLY ${topN} recommendations. Do NOT execute any tests. Do NOT produce fewer than ${topN}.**`;
|
|
307
|
+
}
|
|
308
|
+
// ── PR / branch-diff mode: execution plan ────────────────────────────────
|
|
148
309
|
return `## Execution Plan
|
|
149
310
|
Seed: ${seed} | Endpoints: ${endpointCount} | Budget: ${generateItems.length} generate + ${Math.max(topN - generateItems.length, 0)} additional = ${topN} total
|
|
150
311
|
|
|
@@ -370,7 +531,7 @@ ${detailBlocks}
|
|
|
370
531
|
const errorA = a.scenario.steps.some(s => s.interactionType === "error" || s.interactionType === "edge-case") ? 1 : 0;
|
|
371
532
|
const errorB = b.scenario.steps.some(s => s.interactionType === "error" || s.interactionType === "edge-case") ? 1 : 0;
|
|
372
533
|
if (errorB !== errorA)
|
|
373
|
-
return
|
|
534
|
+
return errorA - errorB;
|
|
374
535
|
// Use locale-independent comparison to avoid runtime-locale non-determinism
|
|
375
536
|
const nameA = a.scenario.scenarioName;
|
|
376
537
|
const nameB = b.scenario.scenarioName;
|
|
@@ -427,7 +588,10 @@ Do not churn recommendations without cause.
|
|
|
427
588
|
`;
|
|
428
589
|
}
|
|
429
590
|
else if (scored.length > 0) {
|
|
430
|
-
|
|
591
|
+
const projectType = analysis.projectClassification.projectType;
|
|
592
|
+
const isFrontendProject = projectType === "full-stack" || projectType === "frontend";
|
|
593
|
+
const isFrontendOnlyProject = projectType === "frontend";
|
|
594
|
+
mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, isDiffScope, isFrontendProject, isFrontendOnlyProject);
|
|
431
595
|
}
|
|
432
596
|
else {
|
|
433
597
|
mainSection = `
|
|
@@ -501,13 +665,12 @@ and adjust the test approach if needed.
|
|
|
501
665
|
historyBody += `
|
|
502
666
|
### Previously Recommended (not generated)
|
|
503
667
|
${recLines}
|
|
504
|
-
**Stability rule**:
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
Only add NEW recommendations for code paths introduced in the latest commit.
|
|
668
|
+
**Stability rule**: Carry forward previously recommended tests unchanged in
|
|
669
|
+
additionalRecommendations if they still apply — match by scenarioName (for multi-step
|
|
670
|
+
scenarios) or by endpoint (for single-endpoint tests). Re-derive category and priority
|
|
671
|
+
from the test content. Drop only if the underlying endpoint was removed, business logic
|
|
672
|
+
changed, or the test is now covered by a generated test.
|
|
673
|
+
GENERATE items are always executed regardless of prior recommendations — do not suppress them.
|
|
511
674
|
`;
|
|
512
675
|
}
|
|
513
676
|
prHistorySection = `
|
|
@@ -515,8 +678,11 @@ Only add NEW recommendations for code paths introduced in the latest commit.
|
|
|
515
678
|
Tests from prior bot runs are still in the working tree — the maintenance pipeline
|
|
516
679
|
(Task 2) keeps them up to date. Use the history below to **avoid duplicating** existing
|
|
517
680
|
coverage and to fill gaps:
|
|
518
|
-
- **
|
|
519
|
-
|
|
681
|
+
- **GENERATE section is unaffected by prior history** — always execute ALL pre-ranked
|
|
682
|
+
GENERATE items regardless of what was generated in prior runs. The execution pipeline
|
|
683
|
+
handles deduplication at the file level.
|
|
684
|
+
- Tests listed under "Previously Generated Tests" are maintained automatically by Task 2 —
|
|
685
|
+
do NOT include them in additionalRecommendations.
|
|
520
686
|
- **Carry forward** previously recommended-but-not-generated tests unchanged in
|
|
521
687
|
additionalRecommendations if they still apply. Promote the highest-priority ones
|
|
522
688
|
into generation slots if capacity allows.
|
|
@@ -202,14 +202,15 @@ describe("buildRecommendationPrompt — PR History section", () => {
|
|
|
202
202
|
expect(prompt).toContain("Promote the highest-priority ones");
|
|
203
203
|
expect(prompt).toContain("into generation slots if capacity allows");
|
|
204
204
|
});
|
|
205
|
-
it("
|
|
205
|
+
it("instructs that GENERATE is unaffected by prior history for implemented tests (Gap 4)", () => {
|
|
206
206
|
const ctx = makePRContext({
|
|
207
207
|
previousRecommendations: [
|
|
208
208
|
{ testType: "contract", endpoint: "GET /api/items", status: "implemented", commentId: "1" },
|
|
209
209
|
],
|
|
210
210
|
});
|
|
211
211
|
const prompt = buildRecommendationPrompt(minimalAnalysis(), "current_branch_diff", 10, ctx);
|
|
212
|
-
expect(prompt).toContain("
|
|
212
|
+
expect(prompt).toContain("GENERATE section is unaffected by prior history");
|
|
213
|
+
expect(prompt).not.toContain("Do NOT re-recommend");
|
|
213
214
|
expect(prompt).toContain("Previously Generated Tests");
|
|
214
215
|
});
|
|
215
216
|
it("de-duplicates multi-step scenario entries to one line per scenario", () => {
|
|
@@ -281,29 +282,31 @@ function minimalScenario(overrides = {}) {
|
|
|
281
282
|
};
|
|
282
283
|
}
|
|
283
284
|
describe("buildRecommendationPrompt — Stability and supplement section", () => {
|
|
284
|
-
|
|
285
|
+
// Recommendation Stability is a PR-mode (branch_diff) concept — carry-forward across bot runs.
|
|
286
|
+
// Full-repo mode is presentation-only; there is no previous-run state to carry forward.
|
|
287
|
+
it("includes Recommendation Stability section in output when scenarios exist (PR mode)", () => {
|
|
285
288
|
const analysis = minimalAnalysis({
|
|
286
289
|
businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
|
|
287
290
|
});
|
|
288
|
-
const prompt = buildRecommendationPrompt(analysis, "
|
|
291
|
+
const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10);
|
|
289
292
|
expect(prompt).toContain("## Recommendation Stability");
|
|
290
293
|
});
|
|
291
|
-
it("stability section uses scenarioName/endpoint matching strategy", () => {
|
|
294
|
+
it("stability section uses scenarioName/endpoint matching strategy (PR mode)", () => {
|
|
292
295
|
const analysis = minimalAnalysis({
|
|
293
296
|
businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
|
|
294
297
|
});
|
|
295
|
-
const prompt = buildRecommendationPrompt(analysis, "
|
|
298
|
+
const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10);
|
|
296
299
|
const stabilityStart = prompt.indexOf("## Recommendation Stability");
|
|
297
300
|
const stabilityBlock = prompt.slice(stabilityStart, stabilityStart + 500);
|
|
298
301
|
expect(stabilityBlock).toContain("scenarioName");
|
|
299
302
|
expect(stabilityBlock).toContain("endpoint");
|
|
300
303
|
expect(stabilityBlock).toContain("Re-derive category and priority");
|
|
301
304
|
});
|
|
302
|
-
it("stability section specifies when to drop a recommendation", () => {
|
|
305
|
+
it("stability section specifies when to drop a recommendation (PR mode)", () => {
|
|
303
306
|
const analysis = minimalAnalysis({
|
|
304
307
|
businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
|
|
305
308
|
});
|
|
306
|
-
const prompt = buildRecommendationPrompt(analysis, "
|
|
309
|
+
const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10);
|
|
307
310
|
expect(prompt).toContain("target endpoint was removed");
|
|
308
311
|
expect(prompt).toContain("business logic changed");
|
|
309
312
|
expect(prompt).toContain("covered by a generated test");
|
|
@@ -321,12 +324,12 @@ describe("buildRecommendationPrompt — Stability and supplement section", () =>
|
|
|
321
324
|
it("MAX_TESTS_TO_GENERATE is 3", () => {
|
|
322
325
|
expect(MAX_TESTS_TO_GENERATE).toBe(3);
|
|
323
326
|
});
|
|
324
|
-
it("uses MAX_CRITICAL_TESTS in category-aware selection rules", () => {
|
|
327
|
+
it("uses MAX_CRITICAL_TESTS in category-aware selection rules (PR mode)", () => {
|
|
325
328
|
const analysis = minimalAnalysis({
|
|
326
329
|
businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
|
|
327
330
|
});
|
|
328
|
-
|
|
329
|
-
|
|
331
|
+
// MAX_CRITICAL_TESTS applies to PR mode (GENERATE items) — full_repo mode only presents, does not execute
|
|
332
|
+
const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10);
|
|
330
333
|
expect(prompt).toContain("GENERATE items MUST be from HIGH-priority categories");
|
|
331
334
|
});
|
|
332
335
|
});
|
|
@@ -369,3 +372,569 @@ describe("PATH_PARAM_UUID_GUIDANCE — no hardcoded UUID anchor", () => {
|
|
|
369
372
|
expect(prompt).not.toMatch(UUID_V4_REGEX);
|
|
370
373
|
});
|
|
371
374
|
});
|
|
375
|
+
// ---------------------------------------------------------------------------
|
|
376
|
+
// Regression tests — PR #110 quality baseline
|
|
377
|
+
//
|
|
378
|
+
// Guard against regressions in recommendation quality. These tests assert that
|
|
379
|
+
// the key signals that made PR #110's recommendations excellent are present in
|
|
380
|
+
// both full_repo and branch_diff (PR) modes.
|
|
381
|
+
// Baseline: https://github.com/letsramp/demoshop-fullstack/pull/110
|
|
382
|
+
// ---------------------------------------------------------------------------
|
|
383
|
+
function mockDiffScenario(overrides = {}) {
|
|
384
|
+
return {
|
|
385
|
+
scenarioName: "orders-update-with-discount",
|
|
386
|
+
description: "PUT /api/v1/orders/{order_id} with discount_percent — verifies total_amount formula",
|
|
387
|
+
category: "business_rule",
|
|
388
|
+
priority: "high",
|
|
389
|
+
steps: [
|
|
390
|
+
{ order: 1, method: "POST", path: "/api/v1/products", expectedStatusCode: 201, description: "Create product", interactionType: "success" },
|
|
391
|
+
{ order: 2, method: "POST", path: "/api/v1/orders", expectedStatusCode: 201, description: "Create order", interactionType: "success", chainsFrom: { sourceField: "id", sourceStep: 1, sourceLocation: "body", targetParam: "product_id", targetLocation: "body" } },
|
|
392
|
+
{ order: 3, method: "PUT", path: "/api/v1/orders/{order_id}", expectedStatusCode: 200, description: "Apply discount", interactionType: "success", chainsFrom: { sourceField: "order_id", sourceStep: 2, sourceLocation: "body", targetParam: "order_id", targetLocation: "path" } },
|
|
393
|
+
],
|
|
394
|
+
chainingKeys: ["id", "order_id"],
|
|
395
|
+
requiresAuth: true,
|
|
396
|
+
estimatedComplexity: "moderate",
|
|
397
|
+
testType: "integration",
|
|
398
|
+
...overrides,
|
|
399
|
+
};
|
|
400
|
+
}
|
|
401
|
+
function analysisWithScenario(scope) {
|
|
402
|
+
const base = minimalAnalysis({
|
|
403
|
+
businessContext: {
|
|
404
|
+
mainPurpose: "E-commerce demo",
|
|
405
|
+
userFlows: [],
|
|
406
|
+
dataFlows: [],
|
|
407
|
+
integrationPatterns: [],
|
|
408
|
+
draftedScenarios: [mockDiffScenario()],
|
|
409
|
+
},
|
|
410
|
+
});
|
|
411
|
+
if (scope === "current_branch_diff") {
|
|
412
|
+
return {
|
|
413
|
+
...base,
|
|
414
|
+
branchDiffContext: {
|
|
415
|
+
currentBranch: "shiny/edit-order",
|
|
416
|
+
baseBranch: "main",
|
|
417
|
+
changedFiles: ["backend/app/routers/orders.py"],
|
|
418
|
+
newEndpoints: [{
|
|
419
|
+
path: "/api/v1/orders/{order_id}",
|
|
420
|
+
methods: [{ method: "PUT", sourceFile: "orders.py", interactionCount: 3 }],
|
|
421
|
+
}],
|
|
422
|
+
modifiedEndpoints: [],
|
|
423
|
+
affectedServices: ["orders"],
|
|
424
|
+
},
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
return base;
|
|
428
|
+
}
|
|
429
|
+
describe("PR #110 quality baseline — full_repo mode", () => {
|
|
430
|
+
let prompt;
|
|
431
|
+
beforeAll(() => { prompt = buildRecommendationPrompt(analysisWithScenario("full_repo"), "full_repo", 20); });
|
|
432
|
+
it("source enrichment targets each endpoint's route handler, not 'changed files'", () => {
|
|
433
|
+
expect(prompt).toContain("Source-Code Enrichment");
|
|
434
|
+
expect(prompt).toContain("route handler");
|
|
435
|
+
expect(prompt).not.toContain("Read the source code for ALL changed files");
|
|
436
|
+
});
|
|
437
|
+
it("includes test pattern guidelines for quality anchoring", () => {
|
|
438
|
+
expect(prompt).toContain("Test Pattern Guidelines");
|
|
439
|
+
});
|
|
440
|
+
it("includes concrete impressive/deprioritise examples", () => {
|
|
441
|
+
expect(prompt).toContain("Impressive (these catch prod bugs)");
|
|
442
|
+
expect(prompt).toContain("Deprioritise");
|
|
443
|
+
});
|
|
444
|
+
it("supplement ordering puts edge cases before cross-resource (Tier 1 before Tier 3)", () => {
|
|
445
|
+
const tier1Idx = prompt.indexOf("Tier 1");
|
|
446
|
+
const tier3Idx = prompt.indexOf("Tier 3");
|
|
447
|
+
expect(tier1Idx).toBeGreaterThan(-1);
|
|
448
|
+
expect(tier3Idx).toBeGreaterThan(-1);
|
|
449
|
+
expect(tier1Idx).toBeLessThan(tier3Idx);
|
|
450
|
+
});
|
|
451
|
+
it("supplement Tier 1 calls out boundary values and invalid IDs explicitly", () => {
|
|
452
|
+
expect(prompt).toMatch(/Tier 1.*boundary values/s);
|
|
453
|
+
expect(prompt).toMatch(/Tier 1.*invalid.*non-existent IDs/s);
|
|
454
|
+
});
|
|
455
|
+
it("includes 5-dimension quality rubric", () => {
|
|
456
|
+
expect(prompt).toContain("Production Safety");
|
|
457
|
+
expect(prompt).toContain("Bug-Finding Potential");
|
|
458
|
+
expect(prompt).toContain("Coverage Gap");
|
|
459
|
+
});
|
|
460
|
+
it("includes per-recommendation format instruction", () => {
|
|
461
|
+
// Full-repo mode hides category/priority from user output — check for format label and key fields
|
|
462
|
+
expect(prompt).toContain("Per-recommendation format");
|
|
463
|
+
expect(prompt).toContain("tool call");
|
|
464
|
+
expect(prompt).toContain("From source");
|
|
465
|
+
});
|
|
466
|
+
it("includes unique-constraint storage gating for Redis", () => {
|
|
467
|
+
expect(prompt).toContain("Unique constraints");
|
|
468
|
+
expect(prompt).toContain("Redis");
|
|
469
|
+
});
|
|
470
|
+
});
|
|
471
|
+
// ---------------------------------------------------------------------------
|
|
472
|
+
// Tests — full_repo output format and execution guardrails
|
|
473
|
+
//
|
|
474
|
+
// Guard that full_repo mode:
|
|
475
|
+
// - never emits execution/GENERATE language
|
|
476
|
+
// - groups items by test type with section headers
|
|
477
|
+
// - hides category/priority labels from user-facing rendered items
|
|
478
|
+
// - emits "Do NOT execute any tests"
|
|
479
|
+
// - renders pre-ranked item names
|
|
480
|
+
// - includes cascade guidance
|
|
481
|
+
// - scopes Tier 1 supplement to "list" (not "GENERATE set")
|
|
482
|
+
// ---------------------------------------------------------------------------
|
|
483
|
+
function fullRepoAnalysisWithScenarios(overrides = {}, scenarios = []) {
|
|
484
|
+
return minimalAnalysis({
|
|
485
|
+
businessContext: {
|
|
486
|
+
mainPurpose: "E-commerce API",
|
|
487
|
+
userFlows: [],
|
|
488
|
+
dataFlows: [],
|
|
489
|
+
integrationPatterns: [],
|
|
490
|
+
draftedScenarios: scenarios.length > 0 ? scenarios : [mockDiffScenario()],
|
|
491
|
+
},
|
|
492
|
+
...overrides,
|
|
493
|
+
});
|
|
494
|
+
}
|
|
495
|
+
function makeContractScenario() {
|
|
496
|
+
return {
|
|
497
|
+
scenarioName: "create-product-contract",
|
|
498
|
+
description: "POST /api/v1/products auth boundary",
|
|
499
|
+
category: "security_boundary",
|
|
500
|
+
priority: "high",
|
|
501
|
+
steps: [{ order: 1, method: "POST", path: "/api/v1/products", expectedStatusCode: 201, description: "Create product", interactionType: "success" }],
|
|
502
|
+
chainingKeys: [],
|
|
503
|
+
requiresAuth: true,
|
|
504
|
+
estimatedComplexity: "simple",
|
|
505
|
+
testType: "contract",
|
|
506
|
+
};
|
|
507
|
+
}
|
|
508
|
+
describe("full_repo mode — output format and execution guardrails", () => {
|
|
509
|
+
let prompt;
|
|
510
|
+
beforeAll(() => {
|
|
511
|
+
prompt = buildRecommendationPrompt(fullRepoAnalysisWithScenarios({}, [mockDiffScenario(), makeContractScenario()]), "full_repo", 10);
|
|
512
|
+
});
|
|
513
|
+
it("does NOT contain GENERATE execution language", () => {
|
|
514
|
+
expect(prompt).not.toContain("### GENERATE");
|
|
515
|
+
expect(prompt).not.toContain("execute these in order");
|
|
516
|
+
expect(prompt).not.toContain("one retry on failure then skip");
|
|
517
|
+
});
|
|
518
|
+
it("does NOT contain the PR-mode ADDITIONAL section header", () => {
|
|
519
|
+
// The '### ADDITIONAL (list in additionalRecommendations...)' header is a PR-mode structural
|
|
520
|
+
// concept; it must not appear in the full_repo grouped output.
|
|
521
|
+
expect(prompt).not.toContain("### ADDITIONAL (list in additionalRecommendations");
|
|
522
|
+
});
|
|
523
|
+
it("contains explicit 'Do NOT execute any tests' instruction", () => {
|
|
524
|
+
expect(prompt).toContain("Do NOT execute any tests");
|
|
525
|
+
});
|
|
526
|
+
it("contains 'Repo mode' header or preamble", () => {
|
|
527
|
+
expect(prompt).toContain("Repo mode");
|
|
528
|
+
});
|
|
529
|
+
it("groups items by test type — Integration section header present", () => {
|
|
530
|
+
expect(prompt).toMatch(/### (Integration|Contract)/);
|
|
531
|
+
});
|
|
532
|
+
it("E2E section appears before Integration section (E2E ranked highest coverage)", () => {
|
|
533
|
+
const e2eIdx = prompt.indexOf("### E2E");
|
|
534
|
+
const integrationIdx = prompt.indexOf("### Integration");
|
|
535
|
+
// If E2E section exists, it must appear before Integration
|
|
536
|
+
if (e2eIdx !== -1 && integrationIdx !== -1) {
|
|
537
|
+
expect(e2eIdx).toBeLessThan(integrationIdx);
|
|
538
|
+
}
|
|
539
|
+
// At minimum, E2E appears before Contract
|
|
540
|
+
const contractIdx = prompt.indexOf("### Contract");
|
|
541
|
+
if (e2eIdx !== -1 && contractIdx !== -1) {
|
|
542
|
+
expect(e2eIdx).toBeLessThan(contractIdx);
|
|
543
|
+
}
|
|
544
|
+
});
|
|
545
|
+
it("UI section appears before Integration and Contract sections", () => {
|
|
546
|
+
const uiIdx = prompt.indexOf("### UI");
|
|
547
|
+
const integrationIdx = prompt.indexOf("### Integration");
|
|
548
|
+
const contractIdx = prompt.indexOf("### Contract");
|
|
549
|
+
if (uiIdx !== -1 && integrationIdx !== -1) {
|
|
550
|
+
expect(uiIdx).toBeLessThan(integrationIdx);
|
|
551
|
+
}
|
|
552
|
+
if (uiIdx !== -1 && contractIdx !== -1) {
|
|
553
|
+
expect(uiIdx).toBeLessThan(contractIdx);
|
|
554
|
+
}
|
|
555
|
+
});
|
|
556
|
+
it("prompt forbids the LLM from creating a 'Removed Recommendations' section", () => {
|
|
557
|
+
// The prompt must contain the 'NEVER create' instruction so the LLM doesn't add such a section
|
|
558
|
+
expect(prompt).toContain("NEVER create a");
|
|
559
|
+
// The prompt must NOT have an actual section heading titled 'Removed Recommendations'
|
|
560
|
+
// (it may contain the phrase inside the NEVER instruction itself, which is expected)
|
|
561
|
+
expect(prompt).not.toMatch(/^##+ Removed Recommendations/m);
|
|
562
|
+
expect(prompt).not.toMatch(/^##+ Not Applicable/m);
|
|
563
|
+
});
|
|
564
|
+
it("rendered item does NOT contain 'priority=' label visible to user", () => {
|
|
565
|
+
// priority= is a PR-mode label; must not appear in rendered sections
|
|
566
|
+
expect(prompt).not.toMatch(/priority=(HIGH|MEDIUM|LOW|CRITICAL)/);
|
|
567
|
+
});
|
|
568
|
+
it("rendered item does NOT contain pipe-delimited category label", () => {
|
|
569
|
+
// | category | pattern used in PR-mode GENERATE blocks
|
|
570
|
+
expect(prompt).not.toMatch(/\| (security_boundary|business_rule|data_integrity|crud|workflow) \|/);
|
|
571
|
+
});
|
|
572
|
+
it("renders the pre-ranked scenario name in the output", () => {
|
|
573
|
+
expect(prompt).toContain("orders-update-with-discount");
|
|
574
|
+
});
|
|
575
|
+
it("includes cascade vs referential integrity guidance", () => {
|
|
576
|
+
expect(prompt).toContain("Cascade vs referential integrity");
|
|
577
|
+
});
|
|
578
|
+
it("supplement Tier 1 is scoped to 'list' (not 'GENERATE set') in full_repo", () => {
|
|
579
|
+
// In full_repo there is no GENERATE set — supplement references the pre-ranked list
|
|
580
|
+
expect(prompt).toMatch(/Tier 1.*list/s);
|
|
581
|
+
expect(prompt).not.toMatch(/Tier 1.*GENERATE set/s);
|
|
582
|
+
});
|
|
583
|
+
it("supplement note references 5-dimension rubric for priority assignment", () => {
|
|
584
|
+
expect(prompt).toContain("5-dimension rubric");
|
|
585
|
+
});
|
|
586
|
+
it("cascade guidance instructs silent removal — no 'Removed Recommendations' section", () => {
|
|
587
|
+
// The cascade guidance must say to remove silently, not to list removed items
|
|
588
|
+
expect(prompt).toContain("silently");
|
|
589
|
+
expect(prompt).toContain("Do NOT list removed scenarios");
|
|
590
|
+
});
|
|
591
|
+
});
|
|
592
|
+
// ---------------------------------------------------------------------------
|
|
593
|
+
// Tests — full_repo mode: full-stack vs backend-only test mix
|
|
594
|
+
// ---------------------------------------------------------------------------
|
|
595
|
+
describe("full_repo mode — full-stack repo test mix", () => {
|
|
596
|
+
function fullStackAnalysis() {
|
|
597
|
+
return fullRepoAnalysisWithScenarios({
|
|
598
|
+
projectClassification: {
|
|
599
|
+
projectType: "full-stack",
|
|
600
|
+
primaryLanguage: "TypeScript",
|
|
601
|
+
primaryFramework: "Next.js",
|
|
602
|
+
deploymentPattern: "full-stack",
|
|
603
|
+
},
|
|
604
|
+
});
|
|
605
|
+
}
|
|
606
|
+
function backendOnlyAnalysis() {
|
|
607
|
+
return fullRepoAnalysisWithScenarios({
|
|
608
|
+
projectClassification: {
|
|
609
|
+
projectType: "rest-api",
|
|
610
|
+
primaryLanguage: "Python",
|
|
611
|
+
primaryFramework: "FastAPI",
|
|
612
|
+
deploymentPattern: "traditional",
|
|
613
|
+
},
|
|
614
|
+
});
|
|
615
|
+
}
|
|
616
|
+
// topN=10 → 15% × 10 = 1.5 → round → 2 for both E2E and UI
|
|
617
|
+
it("full-stack repo mandates percentage-based UI slots (topN=10 → ≥2)", () => {
|
|
618
|
+
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
|
|
619
|
+
expect(prompt).toContain("skyramp_ui_test_generation");
|
|
620
|
+
expect(prompt).toMatch(/at least 2 UI test/);
|
|
621
|
+
});
|
|
622
|
+
it("full-stack repo mandates percentage-based E2E slots (topN=10 → ≥2)", () => {
|
|
623
|
+
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
|
|
624
|
+
expect(prompt).toContain("skyramp_e2e_test_generation");
|
|
625
|
+
expect(prompt).toMatch(/at least 2 E2E test/);
|
|
626
|
+
});
|
|
627
|
+
// topN=20 → 15% × 20 = 3 for both E2E and UI (scales up vs fixed ≥1/≥2)
|
|
628
|
+
it("full-stack repo scales to ≥3 E2E and ≥3 UI at topN=20", () => {
|
|
629
|
+
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 20);
|
|
630
|
+
expect(prompt).toMatch(/at least 3 E2E test/);
|
|
631
|
+
expect(prompt).toMatch(/at least 3 UI test/);
|
|
632
|
+
});
|
|
633
|
+
// topN=5 → 15% × 5 = 0.75 → round → 1, floor at 1
|
|
634
|
+
it("full-stack repo floors at ≥1 E2E and ≥1 UI for small topN=5", () => {
|
|
635
|
+
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 5);
|
|
636
|
+
expect(prompt).toMatch(/at least 1 E2E test/);
|
|
637
|
+
expect(prompt).toMatch(/at least 1 UI test/);
|
|
638
|
+
});
|
|
639
|
+
it("full-stack repo explicitly excludes smoke and fuzz tests", () => {
|
|
640
|
+
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
|
|
641
|
+
expect(prompt).toContain("No smoke tests");
|
|
642
|
+
expect(prompt).toContain("No fuzz tests");
|
|
643
|
+
});
|
|
644
|
+
it("backend-only (rest-api) repo does NOT mandate UI/E2E tests", () => {
|
|
645
|
+
const prompt = buildRecommendationPrompt(backendOnlyAnalysis(), "full_repo", 10);
|
|
646
|
+
// Tool names appear in generic buildToolWorkflows docs — check for the mandate text instead
|
|
647
|
+
expect(prompt).not.toMatch(/at least \d+ (UI|E2E) test/);
|
|
648
|
+
expect(prompt).not.toContain("supplement MUST include");
|
|
649
|
+
expect(prompt).not.toContain("full-stack repo");
|
|
650
|
+
});
|
|
651
|
+
it("backend-only repo focuses on integration and contract tests", () => {
|
|
652
|
+
const prompt = buildRecommendationPrompt(backendOnlyAnalysis(), "full_repo", 10);
|
|
653
|
+
expect(prompt).toContain("integration and contract tests");
|
|
654
|
+
});
|
|
655
|
+
it("backend-only repo still excludes smoke and fuzz tests", () => {
|
|
656
|
+
const prompt = buildRecommendationPrompt(backendOnlyAnalysis(), "full_repo", 10);
|
|
657
|
+
expect(prompt).toContain("No smoke tests");
|
|
658
|
+
expect(prompt).toContain("No fuzz tests");
|
|
659
|
+
});
|
|
660
|
+
it("'frontend' project type focuses on UI/E2E only — NOT backend tests", () => {
|
|
661
|
+
const frontendAnalysis = fullRepoAnalysisWithScenarios({
|
|
662
|
+
projectClassification: {
|
|
663
|
+
projectType: "frontend",
|
|
664
|
+
primaryLanguage: "TypeScript",
|
|
665
|
+
primaryFramework: "React",
|
|
666
|
+
deploymentPattern: "traditional",
|
|
667
|
+
},
|
|
668
|
+
});
|
|
669
|
+
const prompt = buildRecommendationPrompt(frontendAnalysis, "full_repo", 10);
|
|
670
|
+
// topN=10 → 15% × 10 = 1.5 → round → 2 for both
|
|
671
|
+
expect(prompt).toMatch(/at least 2 UI test/);
|
|
672
|
+
expect(prompt).toMatch(/at least 2 E2E test/);
|
|
673
|
+
// Should NOT say "in addition to backend integration and contract tests"
|
|
674
|
+
expect(prompt).not.toContain("in addition to backend integration and contract tests");
|
|
675
|
+
// Should explicitly say no integration/contract
|
|
676
|
+
expect(prompt).toContain("Do NOT add integration or contract tests");
|
|
677
|
+
});
|
|
678
|
+
it("'frontend' project type says 'frontend repo' not 'full-stack repo'", () => {
|
|
679
|
+
const frontendAnalysis = fullRepoAnalysisWithScenarios({
|
|
680
|
+
projectClassification: {
|
|
681
|
+
projectType: "frontend",
|
|
682
|
+
primaryLanguage: "TypeScript",
|
|
683
|
+
primaryFramework: "React",
|
|
684
|
+
deploymentPattern: "traditional",
|
|
685
|
+
},
|
|
686
|
+
});
|
|
687
|
+
const prompt = buildRecommendationPrompt(frontendAnalysis, "full_repo", 10);
|
|
688
|
+
expect(prompt).toContain("frontend repo");
|
|
689
|
+
expect(prompt).not.toContain("full-stack repo");
|
|
690
|
+
});
|
|
691
|
+
it("'full-stack' project type includes BOTH backend and frontend tests", () => {
|
|
692
|
+
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
|
|
693
|
+
expect(prompt).toContain("full-stack repo");
|
|
694
|
+
expect(prompt).toContain("in addition to backend integration and contract tests");
|
|
695
|
+
});
|
|
696
|
+
it("full-stack repo explains E2E > UI > Integration > Contract coverage ranking", () => {
|
|
697
|
+
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
|
|
698
|
+
expect(prompt).toContain("Coverage ranking");
|
|
699
|
+
expect(prompt).toContain("E2E");
|
|
700
|
+
expect(prompt).toContain("UI");
|
|
701
|
+
});
|
|
702
|
+
// Critical: scenarioDrafting.ts NEVER generates UI or E2E testType —
|
|
703
|
+
// they only come from the LLM supplement. The supplement note MUST
|
|
704
|
+
// explicitly tell the LLM to add UI/E2E for full-stack repos, otherwise
|
|
705
|
+
// the LLM fills the supplement with backend-only tiers (edge cases, CRUD)
|
|
706
|
+
// and never produces UI/E2E recommendations (PR #110 regression risk).
|
|
707
|
+
it("full-stack supplement note explicitly mandates UI and E2E with percentage-based counts (PR #110 regression guard)", () => {
|
|
708
|
+
// topN=20, 15% → 3 E2E + 3 UI mandated in the supplement note
|
|
709
|
+
const analysis = fullRepoAnalysisWithScenarios({
|
|
710
|
+
projectClassification: {
|
|
711
|
+
projectType: "full-stack",
|
|
712
|
+
primaryLanguage: "TypeScript",
|
|
713
|
+
primaryFramework: "Next.js",
|
|
714
|
+
deploymentPattern: "full-stack",
|
|
715
|
+
},
|
|
716
|
+
});
|
|
717
|
+
const prompt = buildRecommendationPrompt(analysis, "full_repo", 20);
|
|
718
|
+
// Tool names must appear in supplement (not just test-mix footer)
|
|
719
|
+
const requiredIdx = prompt.indexOf("REQUIRED — You MUST add");
|
|
720
|
+
const e2eIdx = prompt.indexOf("skyramp_e2e_test_generation");
|
|
721
|
+
expect(requiredIdx).toBeGreaterThan(-1);
|
|
722
|
+
expect(e2eIdx).toBeGreaterThan(-1);
|
|
723
|
+
expect(e2eIdx).toBeGreaterThan(requiredIdx); // inside supplement note
|
|
724
|
+
// Percentage-based count: topN=20 → 3
|
|
725
|
+
expect(prompt).toMatch(/at least 3 E2E test/);
|
|
726
|
+
expect(prompt).toMatch(/at least 3 UI test/);
|
|
727
|
+
});
|
|
728
|
+
it("backend-only repo supplement note does NOT add UI/E2E mandate", () => {
|
|
729
|
+
const analysis = fullRepoAnalysisWithScenarios({
|
|
730
|
+
projectClassification: {
|
|
731
|
+
projectType: "rest-api",
|
|
732
|
+
primaryLanguage: "Python",
|
|
733
|
+
primaryFramework: "FastAPI",
|
|
734
|
+
deploymentPattern: "traditional",
|
|
735
|
+
},
|
|
736
|
+
});
|
|
737
|
+
const prompt = buildRecommendationPrompt(analysis, "full_repo", 20);
|
|
738
|
+
const requiredIdx = prompt.indexOf("REQUIRED — You MUST add");
|
|
739
|
+
if (requiredIdx === -1)
|
|
740
|
+
return; // no supplement needed
|
|
741
|
+
const supplementBlock = prompt.slice(requiredIdx, requiredIdx + 800);
|
|
742
|
+
// Backend-only repos should NOT mandate UI/E2E in the supplement tiers
|
|
743
|
+
expect(supplementBlock).not.toContain("full-stack repo, the supplement MUST include");
|
|
744
|
+
});
|
|
745
|
+
});
|
|
746
|
+
// ---------------------------------------------------------------------------
|
|
747
|
+
// Tests — full_repo mode: PR mode must NOT be affected by these changes
|
|
748
|
+
// ---------------------------------------------------------------------------
|
|
749
|
+
describe("full_repo mode — PR mode unchanged by full_repo changes", () => {
|
|
750
|
+
let prPrompt;
|
|
751
|
+
beforeAll(() => {
|
|
752
|
+
prPrompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 10);
|
|
753
|
+
});
|
|
754
|
+
it("PR mode still contains GENERATE execution language", () => {
|
|
755
|
+
expect(prPrompt).toContain("### GENERATE");
|
|
756
|
+
});
|
|
757
|
+
it("PR mode still shows priority= labels on GENERATE items", () => {
|
|
758
|
+
expect(prPrompt).toMatch(/priority=(HIGH|MEDIUM|LOW|CRITICAL)/);
|
|
759
|
+
});
|
|
760
|
+
it("PR mode does not show 'Do NOT execute any tests'", () => {
|
|
761
|
+
expect(prPrompt).not.toContain("Do NOT execute any tests");
|
|
762
|
+
});
|
|
763
|
+
it("PR mode does not show 'Repo mode' preamble", () => {
|
|
764
|
+
expect(prPrompt).not.toContain("Repo mode — no tests are executed");
|
|
765
|
+
});
|
|
766
|
+
});
|
|
767
|
+
describe("PR #110 quality baseline — branch_diff (PR) mode", () => {
|
|
768
|
+
let prompt;
|
|
769
|
+
beforeAll(() => { prompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 20); });
|
|
770
|
+
it("source enrichment references changed files (not 'each endpoint')", () => {
|
|
771
|
+
expect(prompt).toContain("Source-Code Enrichment");
|
|
772
|
+
expect(prompt).toContain("changed files");
|
|
773
|
+
expect(prompt).not.toContain("For each endpoint listed in the Repository Context above, read the route handler");
|
|
774
|
+
});
|
|
775
|
+
it("supplement Tier 1 scoped to GENERATE set", () => {
|
|
776
|
+
expect(prompt).toMatch(/Tier 1.*GENERATE set/s);
|
|
777
|
+
});
|
|
778
|
+
it("supplement ordering puts edge cases before cross-resource", () => {
|
|
779
|
+
const tier1Idx = prompt.indexOf("Tier 1");
|
|
780
|
+
const tier3Idx = prompt.indexOf("Tier 3");
|
|
781
|
+
expect(tier1Idx).toBeGreaterThan(-1);
|
|
782
|
+
expect(tier3Idx).toBeGreaterThan(-1);
|
|
783
|
+
expect(tier1Idx).toBeLessThan(tier3Idx);
|
|
784
|
+
});
|
|
785
|
+
it("includes cascade vs referential integrity guidance", () => {
|
|
786
|
+
expect(prompt).toContain("Cascade vs referential integrity");
|
|
787
|
+
});
|
|
788
|
+
it("includes per-recommendation format requirements", () => {
|
|
789
|
+
expect(prompt).toContain("Per-recommendation format");
|
|
790
|
+
});
|
|
791
|
+
it("GENERATE block present for the business_rule scenario", () => {
|
|
792
|
+
expect(prompt).toContain("GENERATE");
|
|
793
|
+
expect(prompt).toContain("orders-update-with-discount");
|
|
794
|
+
});
|
|
795
|
+
});
|
|
796
|
+
// ---------------------------------------------------------------------------
|
|
797
|
+
// Regression tests — v3 gap fixes
|
|
798
|
+
// ---------------------------------------------------------------------------
|
|
799
|
+
describe("Gap 1 — happy-path ranking: success scenarios ranked before error/edge-case scenarios", () => {
|
|
800
|
+
function makeScenarioByInteraction(name, interactionType) {
|
|
801
|
+
return mockDiffScenario({
|
|
802
|
+
scenarioName: name,
|
|
803
|
+
steps: [
|
|
804
|
+
{ order: 1, method: "POST", path: "/api/items", expectedStatusCode: interactionType === "success" ? 201 : 404, description: "step", interactionType },
|
|
805
|
+
{ order: 2, method: "GET", path: "/api/items/{id}", expectedStatusCode: interactionType === "success" ? 200 : 404, description: "verify", interactionType },
|
|
806
|
+
{ order: 3, method: "DELETE", path: "/api/items/{id}", expectedStatusCode: interactionType === "success" ? 204 : 404, description: "cleanup", interactionType },
|
|
807
|
+
],
|
|
808
|
+
});
|
|
809
|
+
}
|
|
810
|
+
it("happy-path scenario ranked before error-path scenario in GENERATE block", () => {
|
|
811
|
+
const analysis = {
|
|
812
|
+
...analysisWithScenario("current_branch_diff"),
|
|
813
|
+
businessContext: {
|
|
814
|
+
mainPurpose: "Test",
|
|
815
|
+
userFlows: [], dataFlows: [], integrationPatterns: [],
|
|
816
|
+
draftedScenarios: [
|
|
817
|
+
makeScenarioByInteraction("error-path-scenario", "error"),
|
|
818
|
+
makeScenarioByInteraction("happy-path-scenario", "success"),
|
|
819
|
+
],
|
|
820
|
+
},
|
|
821
|
+
};
|
|
822
|
+
const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 5);
|
|
823
|
+
const happyIdx = prompt.indexOf("happy-path-scenario");
|
|
824
|
+
const errorIdx = prompt.indexOf("error-path-scenario");
|
|
825
|
+
expect(happyIdx).toBeGreaterThan(-1);
|
|
826
|
+
expect(errorIdx).toBeGreaterThan(-1);
|
|
827
|
+
// Happy path should appear first (lower index = earlier in the output)
|
|
828
|
+
expect(happyIdx).toBeLessThan(errorIdx);
|
|
829
|
+
});
|
|
830
|
+
});
|
|
831
|
+
describe("Gap 2 — E2E ADDITIONAL slot gated on hasTraces", () => {
|
|
832
|
+
function makeMixedPRAnalysis(hasTraceFiles) {
|
|
833
|
+
// Needs draftedScenarios so scored.length > 0 and buildExecutionPlan is reached
|
|
834
|
+
const base = analysisWithScenario("current_branch_diff");
|
|
835
|
+
return {
|
|
836
|
+
...base,
|
|
837
|
+
artifacts: {
|
|
838
|
+
openApiSpecs: [],
|
|
839
|
+
playwrightRecordings: [],
|
|
840
|
+
traceFiles: hasTraceFiles ? [{ path: "/repo/tests/trace.json", format: "skyramp" }] : [],
|
|
841
|
+
notFound: [],
|
|
842
|
+
},
|
|
843
|
+
branchDiffContext: {
|
|
844
|
+
currentBranch: "test",
|
|
845
|
+
baseBranch: "main",
|
|
846
|
+
// frontend/components/.tsx triggers hasFrontendChanges; newEndpoints makes it a mixed PR (not UI-only)
|
|
847
|
+
changedFiles: ["frontend/components/App.tsx", "backend/routers/orders.py"],
|
|
848
|
+
newEndpoints: [{ path: "/api/v1/orders/{order_id}", methods: [{ method: "PUT", sourceFile: "orders.py", interactionCount: 3 }] }],
|
|
849
|
+
modifiedEndpoints: [],
|
|
850
|
+
affectedServices: ["orders"],
|
|
851
|
+
},
|
|
852
|
+
};
|
|
853
|
+
}
|
|
854
|
+
it("E2E [ADDITIONAL] slot present when hasTraces=true and frontend+API changes exist", () => {
|
|
855
|
+
const prompt = buildRecommendationPrompt(makeMixedPRAnalysis(true), "current_branch_diff", 10);
|
|
856
|
+
expect(prompt).toMatch(/\[ADDITIONAL\].*E2E/s);
|
|
857
|
+
});
|
|
858
|
+
it("E2E [ADDITIONAL] slot absent when hasTraces=false and frontend+API changes exist", () => {
|
|
859
|
+
const prompt = buildRecommendationPrompt(makeMixedPRAnalysis(false), "current_branch_diff", 10);
|
|
860
|
+
// UI slot should still be present, E2E slot should not
|
|
861
|
+
expect(prompt).toMatch(/\[ADDITIONAL\].*UI/s);
|
|
862
|
+
// [ADDITIONAL] E2E label must not appear (tool docs contain "E2E" but not as [ADDITIONAL] label)
|
|
863
|
+
expect(prompt).not.toContain("[ADDITIONAL] | E2E |");
|
|
864
|
+
});
|
|
865
|
+
});
|
|
866
|
+
describe("Gap 4 — PR history does NOT suppress GENERATE items on 2nd+ run", () => {
|
|
867
|
+
it("prompt contains GENERATE-unaffected instruction when prior history exists", () => {
|
|
868
|
+
const ctx = makePRContext({
|
|
869
|
+
previousRecommendations: [
|
|
870
|
+
{ testType: "integration", endpoint: "POST /api/v1/orders", scenarioName: "orders-update-with-discount", status: "implemented", commentId: "1" },
|
|
871
|
+
],
|
|
872
|
+
});
|
|
873
|
+
const prompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 5, ctx);
|
|
874
|
+
expect(prompt).toContain("GENERATE section is unaffected by prior history");
|
|
875
|
+
});
|
|
876
|
+
it("prompt does NOT contain old suppression text 'Do NOT re-recommend'", () => {
|
|
877
|
+
const ctx = makePRContext({
|
|
878
|
+
previousRecommendations: [
|
|
879
|
+
{ testType: "integration", endpoint: "POST /api/v1/orders", status: "implemented", commentId: "1" },
|
|
880
|
+
],
|
|
881
|
+
});
|
|
882
|
+
const prompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 5, ctx);
|
|
883
|
+
expect(prompt).not.toContain("Do NOT re-recommend");
|
|
884
|
+
});
|
|
885
|
+
});
|
|
886
|
+
describe("renderItem — correct tool for E2E and UI testTypes in full_repo mode", () => {
|
|
887
|
+
function makeTypedScenario(testType) {
|
|
888
|
+
return mockDiffScenario({
|
|
889
|
+
scenarioName: `${testType}-scenario`,
|
|
890
|
+
testType,
|
|
891
|
+
steps: [
|
|
892
|
+
{ order: 1, method: "GET", path: "/api/items", expectedStatusCode: 200, description: "list items", interactionType: "success" },
|
|
893
|
+
{ order: 2, method: "POST", path: "/api/items", expectedStatusCode: 201, description: "create item", interactionType: "success" },
|
|
894
|
+
],
|
|
895
|
+
});
|
|
896
|
+
}
|
|
897
|
+
it("integration scenario uses skyramp_integration_test_generation in full_repo", () => {
|
|
898
|
+
const analysis = minimalAnalysis({
|
|
899
|
+
businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("integration")] },
|
|
900
|
+
});
|
|
901
|
+
const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
|
|
902
|
+
expect(prompt).toContain("skyramp_integration_test_generation");
|
|
903
|
+
});
|
|
904
|
+
it("e2e scenario uses skyramp_e2e_test_generation and omits scenario step calls in full_repo", () => {
|
|
905
|
+
const analysis = minimalAnalysis({
|
|
906
|
+
businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("e2e")] },
|
|
907
|
+
});
|
|
908
|
+
const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
|
|
909
|
+
// Extract recommendation content only (before Tool Workflows docs which list all tools)
|
|
910
|
+
const toolWorkflowsIdx = prompt.indexOf("## How to Generate Tests");
|
|
911
|
+
const mainContent = toolWorkflowsIdx > 0 ? prompt.slice(0, toolWorkflowsIdx) : prompt;
|
|
912
|
+
expect(mainContent).toContain("skyramp_e2e_test_generation");
|
|
913
|
+
expect(mainContent).not.toContain("skyramp_integration_test_generation");
|
|
914
|
+
// E2E does not use per-step scenario pipeline
|
|
915
|
+
expect(mainContent).not.toContain("skyramp_scenario_test_generation");
|
|
916
|
+
});
|
|
917
|
+
it("ui scenario uses skyramp_ui_test_generation and omits scenario step calls in full_repo", () => {
|
|
918
|
+
const analysis = minimalAnalysis({
|
|
919
|
+
businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("ui")] },
|
|
920
|
+
});
|
|
921
|
+
const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
|
|
922
|
+
// Extract recommendation content only (before Tool Workflows docs which list all tools)
|
|
923
|
+
const toolWorkflowsIdx = prompt.indexOf("## How to Generate Tests");
|
|
924
|
+
const mainContent = toolWorkflowsIdx > 0 ? prompt.slice(0, toolWorkflowsIdx) : prompt;
|
|
925
|
+
expect(mainContent).toContain("skyramp_ui_test_generation");
|
|
926
|
+
expect(mainContent).not.toContain("skyramp_integration_test_generation");
|
|
927
|
+
// UI does not use per-step scenario pipeline
|
|
928
|
+
expect(mainContent).not.toContain("skyramp_scenario_test_generation");
|
|
929
|
+
});
|
|
930
|
+
it("integration scenario still emits per-step skyramp_scenario_test_generation calls in full_repo", () => {
|
|
931
|
+
const analysis = minimalAnalysis({
|
|
932
|
+
businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("integration")] },
|
|
933
|
+
});
|
|
934
|
+
const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
|
|
935
|
+
const toolWorkflowsIdx = prompt.indexOf("## How to Generate Tests");
|
|
936
|
+
const mainContent = toolWorkflowsIdx > 0 ? prompt.slice(0, toolWorkflowsIdx) : prompt;
|
|
937
|
+
expect(mainContent).toContain("skyramp_scenario_test_generation");
|
|
938
|
+
expect(mainContent).toContain("skyramp_integration_test_generation");
|
|
939
|
+
});
|
|
940
|
+
});
|
|
@@ -120,6 +120,7 @@ Generate a net-new test. Use a unique descriptive filename to avoid overwriting
|
|
|
120
120
|
**How to generate each type (for ADD and REGENERATE):**
|
|
121
121
|
- **Integration**: call \`skyramp_scenario_test_generation\` per step (sequentially), then \`skyramp_integration_test_generation\` with the scenario file.
|
|
122
122
|
Scenario JSON goes in the same \`outputDir\` (e.g. \`tests/scenario_<name>.json\`), not \`.skyramp/\`.
|
|
123
|
+
**Required fields (MANDATORY before generating any scenario step):** For every POST/PUT/PATCH step — including prerequisite/setup steps (e.g. create a product before creating an order) — read the route handler source code or OpenAPI schema to identify ALL required request body fields. Include every required field with a realistic value. Do NOT omit fields just because they are not the focus of the test.
|
|
123
124
|
- **Contract**: call \`skyramp_contract_test_generation\` with \`endpointURL\`, \`method\`, and \`requestData\` for POST/PUT/PATCH.
|
|
124
125
|
Pass \`apiSchema\` if an OpenAPI spec exists.
|
|
125
126
|
For internal/microservice APIs: add \`providerMode: true\` to verify implementation matches the contract.
|
|
@@ -159,11 +160,10 @@ await page.waitForTimeout(1500);
|
|
|
159
160
|
\`\`\`
|
|
160
161
|
Then re-run the test. This is a common issue with SSR/SPA frameworks where the DOM is rendered but not yet interactive.
|
|
161
162
|
|
|
162
|
-
**After generation,
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
- Change ONLY chaining values and enhance assertions. Preserve everything else exactly as generated.
|
|
163
|
+
**After generation, you MUST do exactly two things — nothing more, nothing less:**
|
|
164
|
+
1. **Fix chaining**: replace hardcoded IDs with dynamic response values — path params like \`id = 'id'\` → \`skyramp.get_response_value(prev_response, "id")\`, and hardcoded IDs in request bodies → dynamic values from prior responses.
|
|
165
|
+
2. **Enhance assertions**: for integration tests and contract provider tests, follow the assertion enhancement instructions returned in the tool output. Add response body assertions for every request. This step is MANDATORY — do NOT skip it even if chaining is already correct.
|
|
166
|
+
Do not make any other changes to the generated test file.
|
|
167
167
|
|
|
168
168
|
After all actions, execute ONLY the test files you created (ADD), regenerated (REGENERATE),
|
|
169
169
|
or edited (UPDATE). Do NOT execute VERIFY'd tests — they are unaffected by the diff and do not
|
|
@@ -191,7 +191,7 @@ Call \`skyramp_submit_report\` with \`summaryOutputFile\`: "${summaryOutputFile}
|
|
|
191
191
|
VERIFY: note that the test was verified as unaffected by the diff — no file changes made.
|
|
192
192
|
Do NOT include files that were newly created in this run (those go in \`newTestsCreated\`).
|
|
193
193
|
|
|
194
|
-
**additionalRecommendations** —
|
|
194
|
+
**additionalRecommendations** — remaining recommendations from the ranked list (MUST contain EXACTLY ${maxRecommendations - maxGenerate} items):
|
|
195
195
|
\`testId\` (human-readable kebab-case, e.g. \`integration-products-orders-workflow\`), \`testType\`, \`category\`, \`scenarioName\`, \`priority\` (high/medium/low — used for sorting, not displayed), \`description\`, \`steps\`, \`reasoning\`
|
|
196
196
|
Keep each \`description\` to one sentence. Omit \`requestBody\` and \`responseBody\` from steps.
|
|
197
197
|
Include at most 3 steps per recommendation.
|
|
@@ -156,6 +156,8 @@ export async function parseTraceFile(filePath) {
|
|
|
156
156
|
return { entries, userFlows, format };
|
|
157
157
|
}
|
|
158
158
|
const SKIP_DIRS = new Set(["node_modules", ".git", "dist", "build", ".next", ".nuxt", "coverage", "__pycache__", ".venv", "venv"]);
|
|
159
|
+
/** Known test-artifact directories where testbot-generated traces are written. */
|
|
160
|
+
const TRACE_SCAN_DIRS = [".skyramp", "tests", "test", "e2e", "playwright"];
|
|
159
161
|
/**
|
|
160
162
|
* Recursively scan a directory for files matching a predicate, up to maxDepth levels.
|
|
161
163
|
*/
|
|
@@ -180,6 +182,22 @@ function scanDir(dir, predicate, maxDepth, results) {
|
|
|
180
182
|
}
|
|
181
183
|
}
|
|
182
184
|
}
|
|
185
|
+
/**
|
|
186
|
+
* Scan only known test-artifact directories for trace files.
|
|
187
|
+
* Root-level files are checked at depth 0; named test-artifact subdirs are scanned
|
|
188
|
+
* at full depth. This prevents picking up committed demo assets (e.g. frontend/public/traces/).
|
|
189
|
+
*/
|
|
190
|
+
function scanTraceArtifactDirs(repositoryPath, predicate, results) {
|
|
191
|
+
// Root-level files only (depth 0)
|
|
192
|
+
scanDir(repositoryPath, predicate, 0, results);
|
|
193
|
+
// Named test-artifact subdirectories (full depth)
|
|
194
|
+
for (const dir of TRACE_SCAN_DIRS) {
|
|
195
|
+
const full = path.join(repositoryPath, dir);
|
|
196
|
+
if (fs.existsSync(full)) {
|
|
197
|
+
scanDir(full, predicate, 5, results);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
183
201
|
/**
|
|
184
202
|
* Discover trace JSON files in a repository path.
|
|
185
203
|
*/
|
|
@@ -191,12 +209,12 @@ export function discoverTraceFiles(repositoryPath) {
|
|
|
191
209
|
if (fs.existsSync(full))
|
|
192
210
|
found.push(full);
|
|
193
211
|
}
|
|
194
|
-
// Recursive scan: any *trace*.json|har,
|
|
212
|
+
// Recursive scan scoped to test-artifact dirs: any *trace*.json|har, excluding scenario/test output files
|
|
195
213
|
const isTraceJson = (name) => /\.(json|har)$/i.test(name) &&
|
|
196
214
|
/trace/i.test(name) &&
|
|
197
215
|
!/^scenario_/i.test(name) &&
|
|
198
216
|
!/_test\.(json|har)$/i.test(name);
|
|
199
|
-
|
|
217
|
+
scanTraceArtifactDirs(repositoryPath, isTraceJson, found);
|
|
200
218
|
// Deduplicate and sort for deterministic ordering
|
|
201
219
|
return [...new Set(found)].sort();
|
|
202
220
|
}
|
|
@@ -209,6 +227,6 @@ export function discoverPlaywrightZips(repositoryPath) {
|
|
|
209
227
|
const isPlaywrightZip = (name) => /\.zip$/i.test(name) && (/playwright/i.test(name) ||
|
|
210
228
|
/_trace\.zip$/i.test(name) ||
|
|
211
229
|
name.toLowerCase() === "trace.zip");
|
|
212
|
-
|
|
230
|
+
scanTraceArtifactDirs(repositoryPath, isPlaywrightZip, found);
|
|
213
231
|
return [...new Set(found)].sort();
|
|
214
232
|
}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for trace-parser.ts — specifically the scanTraceArtifactDirs scoping
|
|
3
|
+
* introduced to prevent demo/fixture files (e.g. frontend/public/traces/) from being
|
|
4
|
+
* misidentified as testbot-generated traces.
|
|
5
|
+
*/
|
|
6
|
+
import * as fs from "fs";
|
|
7
|
+
import * as os from "os";
|
|
8
|
+
import * as path from "path";
|
|
9
|
+
import { discoverTraceFiles, discoverPlaywrightZips } from "./trace-parser.js";
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Helpers
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
function mkdirp(dir) {
|
|
14
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
15
|
+
}
|
|
16
|
+
function touch(file) {
|
|
17
|
+
mkdirp(path.dirname(file));
|
|
18
|
+
fs.writeFileSync(file, "");
|
|
19
|
+
}
|
|
20
|
+
function withTempRepo(fn) {
|
|
21
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), "trace-parser-test-"));
|
|
22
|
+
try {
|
|
23
|
+
fn(dir);
|
|
24
|
+
}
|
|
25
|
+
finally {
|
|
26
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// discoverPlaywrightZips — scoping tests
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
describe("discoverPlaywrightZips — scanTraceArtifactDirs scoping", () => {
|
|
33
|
+
it("does NOT discover playwright zip in frontend/public/traces/ (demo fixture dir)", () => {
|
|
34
|
+
withTempRepo(repo => {
|
|
35
|
+
touch(path.join(repo, "frontend", "public", "traces", "ui_test_playwright.zip"));
|
|
36
|
+
expect(discoverPlaywrightZips(repo)).toEqual([]);
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
it("discovers playwright zip in tests/ (test-artifact dir)", () => {
|
|
40
|
+
withTempRepo(repo => {
|
|
41
|
+
const zip = path.join(repo, "tests", "ui_test_playwright.zip");
|
|
42
|
+
touch(zip);
|
|
43
|
+
expect(discoverPlaywrightZips(repo)).toContain(zip);
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
it("discovers playwright zip in .skyramp/ (test-artifact dir)", () => {
|
|
47
|
+
withTempRepo(repo => {
|
|
48
|
+
const zip = path.join(repo, ".skyramp", "recording_playwright.zip");
|
|
49
|
+
touch(zip);
|
|
50
|
+
expect(discoverPlaywrightZips(repo)).toContain(zip);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
it("discovers playwright zip in e2e/ (test-artifact dir)", () => {
|
|
54
|
+
withTempRepo(repo => {
|
|
55
|
+
const zip = path.join(repo, "e2e", "flow_playwright.zip");
|
|
56
|
+
touch(zip);
|
|
57
|
+
expect(discoverPlaywrightZips(repo)).toContain(zip);
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
it("discovers playwright zip in playwright/ (test-artifact dir)", () => {
|
|
61
|
+
withTempRepo(repo => {
|
|
62
|
+
const zip = path.join(repo, "playwright", "trace.zip");
|
|
63
|
+
touch(zip);
|
|
64
|
+
expect(discoverPlaywrightZips(repo)).toContain(zip);
|
|
65
|
+
});
|
|
66
|
+
});
|
|
67
|
+
it("does NOT discover zip in src/ (not a test-artifact dir)", () => {
|
|
68
|
+
withTempRepo(repo => {
|
|
69
|
+
touch(path.join(repo, "src", "recordings", "ui_playwright.zip"));
|
|
70
|
+
expect(discoverPlaywrightZips(repo)).toEqual([]);
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
it("does NOT discover zip in deeply nested non-test dir", () => {
|
|
74
|
+
withTempRepo(repo => {
|
|
75
|
+
touch(path.join(repo, "frontend", "src", "assets", "demo_playwright.zip"));
|
|
76
|
+
expect(discoverPlaywrightZips(repo)).toEqual([]);
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
// discoverTraceFiles — scoping tests
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
describe("discoverTraceFiles — scanTraceArtifactDirs scoping", () => {
|
|
84
|
+
it("does NOT discover trace.json nested under frontend/public/traces/", () => {
|
|
85
|
+
withTempRepo(repo => {
|
|
86
|
+
touch(path.join(repo, "frontend", "public", "traces", "backend_trace.json"));
|
|
87
|
+
const found = discoverTraceFiles(repo);
|
|
88
|
+
// fixed-name root candidates don't match "backend_trace.json", and scan won't reach frontend/
|
|
89
|
+
expect(found.some(f => f.includes("frontend"))).toBe(false);
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
it("discovers trace.json in tests/ dir", () => {
|
|
93
|
+
withTempRepo(repo => {
|
|
94
|
+
const f = path.join(repo, "tests", "backend_trace.json");
|
|
95
|
+
touch(f);
|
|
96
|
+
expect(discoverTraceFiles(repo)).toContain(f);
|
|
97
|
+
});
|
|
98
|
+
});
|
|
99
|
+
it("discovers trace.json in .skyramp/ dir", () => {
|
|
100
|
+
withTempRepo(repo => {
|
|
101
|
+
const f = path.join(repo, ".skyramp", "skyramp_trace.json");
|
|
102
|
+
touch(f);
|
|
103
|
+
expect(discoverTraceFiles(repo)).toContain(f);
|
|
104
|
+
});
|
|
105
|
+
});
|
|
106
|
+
it("discovers root-level trace.json", () => {
|
|
107
|
+
withTempRepo(repo => {
|
|
108
|
+
const f = path.join(repo, "trace.json");
|
|
109
|
+
touch(f);
|
|
110
|
+
expect(discoverTraceFiles(repo)).toContain(f);
|
|
111
|
+
});
|
|
112
|
+
});
|
|
113
|
+
it("discovers root-level skyramp_traces.json via fixed-name check", () => {
|
|
114
|
+
withTempRepo(repo => {
|
|
115
|
+
const f = path.join(repo, "skyramp_traces.json");
|
|
116
|
+
touch(f);
|
|
117
|
+
expect(discoverTraceFiles(repo)).toContain(f);
|
|
118
|
+
});
|
|
119
|
+
});
|
|
120
|
+
it("does NOT discover scenario_ json files (excluded by predicate)", () => {
|
|
121
|
+
withTempRepo(repo => {
|
|
122
|
+
touch(path.join(repo, "tests", "scenario_orders_trace.json"));
|
|
123
|
+
expect(discoverTraceFiles(repo)).toEqual([]);
|
|
124
|
+
});
|
|
125
|
+
});
|
|
126
|
+
it("does NOT discover _test.json files (excluded by predicate)", () => {
|
|
127
|
+
withTempRepo(repo => {
|
|
128
|
+
touch(path.join(repo, "tests", "orders_trace_test.json"));
|
|
129
|
+
expect(discoverTraceFiles(repo)).toEqual([]);
|
|
130
|
+
});
|
|
131
|
+
});
|
|
132
|
+
it("results are deduplicated when fixed-name and scan both find the same root file", () => {
|
|
133
|
+
withTempRepo(repo => {
|
|
134
|
+
const f = path.join(repo, "trace.json");
|
|
135
|
+
touch(f);
|
|
136
|
+
const found = discoverTraceFiles(repo);
|
|
137
|
+
expect(found.filter(x => x === f)).toHaveLength(1);
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
});
|