@skyramp/mcp 0.0.64-rc.6 → 0.0.64-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +16 -182
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +11 -580
- package/build/prompts/testbot/testbot-prompts.js +1 -2
- package/build/utils/trace-parser.js +3 -21
- package/package.json +1 -1
- package/build/utils/trace-parser.test.js +0 -140
|
@@ -44,16 +44,7 @@ function computeTiebreakerSeed(endpoints, diffFiles) {
|
|
|
44
44
|
return crypto.createHash("sha256").update(canonical).digest("hex").slice(0, 8);
|
|
45
45
|
}
|
|
46
46
|
// ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
|
|
47
|
-
function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false
|
|
48
|
-
// Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
|
|
49
|
-
// Capped so E2E+UI together never exceed topN.
|
|
50
|
-
// Referenced in supplementNote below, but the ternary conditions that use them
|
|
51
|
-
// (`isFrontendProject && !isDiffScope`) are always false in PR/diff mode.
|
|
52
|
-
const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
|
|
53
|
-
const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
|
|
54
|
-
const slotsFloor = Math.floor(topN / 2);
|
|
55
|
-
const minE2ESlots = Math.min(rawE2E, slotsFloor);
|
|
56
|
-
const minUISlots = Math.min(rawUI, Math.max(0, topN - minE2ESlots));
|
|
47
|
+
function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false) {
|
|
57
48
|
const generateItems = scored.slice(0, Math.min(maxGen, scored.length));
|
|
58
49
|
const additionalItems = scored.slice(maxGen, topN);
|
|
59
50
|
const authRef = authHeaderValue
|
|
@@ -118,10 +109,9 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
118
109
|
prereqNote);
|
|
119
110
|
}
|
|
120
111
|
}).join("\n\n");
|
|
121
|
-
//
|
|
122
|
-
//
|
|
123
|
-
|
|
124
|
-
const needsE2ESlot = hasFrontendChanges && !isUIOnlyPR && hasTraces;
|
|
112
|
+
// For mixed PRs, always reserve slots for UI and E2E recommendations regardless of whether
|
|
113
|
+
// traces already exist — the user can record them later or the bot can record during the run.
|
|
114
|
+
const needsE2ESlot = hasFrontendChanges && !isUIOnlyPR;
|
|
125
115
|
const needsUISlot = hasFrontendChanges && !isUIOnlyPR;
|
|
126
116
|
const frontendSlots = (needsE2ESlot ? 1 : 0) + (needsUISlot ? 1 : 0);
|
|
127
117
|
const backendAdditionalItems = frontendSlots > 0
|
|
@@ -153,159 +143,8 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
153
143
|
})() : "";
|
|
154
144
|
const supplementCount = topN - generateItems.length - backendAdditionalItems.length - frontendSlots;
|
|
155
145
|
const supplementNote = supplementCount > 0
|
|
156
|
-
? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them
|
|
146
|
+
? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them from endpoint interactions and source code patterns not yet covered. Use the same 5-dimension rubric and quality gate to assign priority (HIGH/MEDIUM/LOW), testType, and category.${hasFrontendChanges && !isUIOnlyPR ? " Since this PR has frontend changes, at least 1 of these should be a UI or E2E test targeting the changed components." : ""} Do NOT produce fewer than ${topN} total.`
|
|
157
147
|
: "";
|
|
158
|
-
// ── Full-repo mode: recommendations only, no execution ──────────────────
|
|
159
|
-
if (!isDiffScope) {
|
|
160
|
-
const toTitle = (name) => name.replace(/-/g, " ").replace(/\b\w/g, c => c.toUpperCase());
|
|
161
|
-
// Coverage ranking (highest to lowest breadth):
|
|
162
|
-
// E2E first: full browser-to-backend flow — exercises both frontend and backend.
|
|
163
|
-
// UI second: frontend components call backend APIs — also exercises backend.
|
|
164
|
-
// Integration third: backend API chains validated directly.
|
|
165
|
-
// Contract last: single-endpoint boundary only.
|
|
166
|
-
const TYPE_ORDER = ["e2e", "ui", "integration", "contract"];
|
|
167
|
-
const TYPE_LABEL = {
|
|
168
|
-
e2e: "E2E", ui: "UI", integration: "Integration", contract: "Contract",
|
|
169
|
-
};
|
|
170
|
-
// All scored items up to topN, already sorted by priority/novelty
|
|
171
|
-
const allItems = scored.slice(0, topN);
|
|
172
|
-
// Group by test type while preserving priority ordering within each group
|
|
173
|
-
const byType = new Map();
|
|
174
|
-
for (const t of TYPE_ORDER)
|
|
175
|
-
byType.set(t, []);
|
|
176
|
-
for (const item of allItems) {
|
|
177
|
-
const t = item.scenario.testType ?? (item.scenario.steps.length === 1 ? "contract" : "integration");
|
|
178
|
-
if (!byType.has(t))
|
|
179
|
-
byType.set(t, []);
|
|
180
|
-
byType.get(t).push(item);
|
|
181
|
-
}
|
|
182
|
-
const renderItem = (item, rank) => {
|
|
183
|
-
const s = item.scenario;
|
|
184
|
-
const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
|
|
185
|
-
const title = toTitle(s.scenarioName);
|
|
186
|
-
if (testType === "contract") {
|
|
187
|
-
const step = s.steps[0];
|
|
188
|
-
const endpointURL = `${baseUrl}${step.path}`;
|
|
189
|
-
const isBodyMethod = ["POST", "PUT", "PATCH"].includes(step.method);
|
|
190
|
-
const dataParam = isBodyMethod
|
|
191
|
-
? `, requestData: <${step.method} ${step.path} required fields from source code>`
|
|
192
|
-
: "";
|
|
193
|
-
return [
|
|
194
|
-
`**${rank}. ${title}**`,
|
|
195
|
-
` ${s.description}`,
|
|
196
|
-
` ${step.method} ${step.path} → ${step.expectedStatusCode}`,
|
|
197
|
-
` Tool: \`skyramp_contract_test_generation({ endpointURL: "${endpointURL}", method: "${step.method}"${authRef}${dataParam} })\``,
|
|
198
|
-
` From source: fill in requestData field names and the specific production boundary this validates`,
|
|
199
|
-
].join("\n");
|
|
200
|
-
}
|
|
201
|
-
else {
|
|
202
|
-
const stepLines = s.steps.map(st => {
|
|
203
|
-
const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
|
|
204
|
-
const bodyHint = isBody ? ` — body: <${st.method} ${st.path} required fields from source>` : "";
|
|
205
|
-
return ` ${st.order}. ${st.method} ${st.path} → ${st.expectedStatusCode}: ${st.description}${bodyHint}`;
|
|
206
|
-
}).join("\n");
|
|
207
|
-
const toolCalls = s.steps.map(st => {
|
|
208
|
-
const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
|
|
209
|
-
const dataParam = isBody
|
|
210
|
-
? `, requestBody: <${st.method} ${st.path} required fields from source>`
|
|
211
|
-
: "";
|
|
212
|
-
return ` skyramp_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${s.scenarioName}", baseURL: "${baseUrl}", method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${scenarioAuthRef}${dataParam} })`;
|
|
213
|
-
}).join("\n");
|
|
214
|
-
// E2E and UI use trace-based generation, not the scenario pipeline.
|
|
215
|
-
// Only emit per-step skyramp_scenario_test_generation calls for integration type.
|
|
216
|
-
const isTraceBased = testType === "e2e" || testType === "ui";
|
|
217
|
-
const finalTool = testType === "e2e"
|
|
218
|
-
? `skyramp_e2e_test_generation({ playwrightZip: "<trace zip path>", traceFile: "<backend trace path>"${authHeaderOnlyRef} })`
|
|
219
|
-
: testType === "ui"
|
|
220
|
-
? `skyramp_ui_test_generation({ playwrightZip: "<trace zip path>"${authHeaderOnlyRef} })`
|
|
221
|
-
: `skyramp_integration_test_generation({ scenarioFile: "scenario_${s.scenarioName}.json"${authHeaderOnlyRef} })`;
|
|
222
|
-
const toolCallsBlock = isTraceBased
|
|
223
|
-
? ` ${finalTool}`
|
|
224
|
-
: `${toolCalls}\n ${finalTool}`;
|
|
225
|
-
return [
|
|
226
|
-
`**${rank}. ${title}**`,
|
|
227
|
-
` ${s.description}`,
|
|
228
|
-
` Steps:`,
|
|
229
|
-
stepLines,
|
|
230
|
-
` Tool calls:`,
|
|
231
|
-
toolCallsBlock,
|
|
232
|
-
` From source: fill in requestBody field values and assert all computed response fields`,
|
|
233
|
-
].join("\n");
|
|
234
|
-
}
|
|
235
|
-
};
|
|
236
|
-
const sections = TYPE_ORDER
|
|
237
|
-
.filter(t => (byType.get(t) ?? []).length > 0)
|
|
238
|
-
.map(t => {
|
|
239
|
-
const items = byType.get(t);
|
|
240
|
-
const label = TYPE_LABEL[t];
|
|
241
|
-
let globalRank = 0;
|
|
242
|
-
for (const prev of TYPE_ORDER) {
|
|
243
|
-
if (prev === t)
|
|
244
|
-
break;
|
|
245
|
-
globalRank += (byType.get(prev) ?? []).length;
|
|
246
|
-
}
|
|
247
|
-
const entries = items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n");
|
|
248
|
-
return `### ${label} (${items.length})\n\n${entries}`;
|
|
249
|
-
})
|
|
250
|
-
.join("\n\n");
|
|
251
|
-
const repoSupplementNote = supplementNote; // already built above with isDiffScope=false tier ordering
|
|
252
|
-
return `## Test Recommendations (${topN} total)
|
|
253
|
-
|
|
254
|
-
> **Repo mode — no tests are executed.** Use the tool calls below to generate any recommendation on demand.
|
|
255
|
-
> Highest-value tests appear first within each type. Use the "From source" hint in each item to fill in field names and assertions before calling the tool.
|
|
256
|
-
|
|
257
|
-
**Step 1 — Source-Code Enrichment (MANDATORY before presenting anything)**
|
|
258
|
-
For each endpoint listed in the Repository Context above, read the route handler source code. Look for:
|
|
259
|
-
- **All required request body fields** (names and types) for POST/PUT/PATCH — use in step descriptions and tool call params
|
|
260
|
-
- **Computed/derived response fields** (e.g. \`total_amount\`, \`discount_percent\`) and their formulas — assert these
|
|
261
|
-
- **Auth middleware** (HTTPBearer, \`Depends(get_current_user)\`, \`@UseGuards\`, \`jwt.verify\`) — set \`authHeader\`/\`authScheme\`; FastAPI HTTPBearer → **403** (not 401)
|
|
262
|
-
- **Storage backend** — if Redis or schema-less, discard unique-constraint and cascade-delete scenarios
|
|
263
|
-
- **Delete behavior** — hard-delete → 204; soft-delete/cancel → 200
|
|
264
|
-
|
|
265
|
-
Do NOT present generic placeholders. Replace every \`<... from source>\` hint with actual field names and realistic values.
|
|
266
|
-
|
|
267
|
-
${buildTestPatternGuidelines()}
|
|
268
|
-
|
|
269
|
-
${buildTestExamples()}
|
|
270
|
-
|
|
271
|
-
**Cascade vs referential integrity:** Keep only the scenario that matches the actual FK delete policy in source (CASCADE → cascade-delete; RESTRICT/no cascade → delete-blocked). Remove the other silently — do NOT create a "Removed Recommendations" or "Not Applicable" section.
|
|
272
|
-
|
|
273
|
-
**Unique constraints:** If the storage backend is Redis, in-memory, or schema-less with no explicit \`UNIQUE\` index, discard unique-constraint scenarios entirely and replace them with a different high-value test. Do NOT list removed scenarios.
|
|
274
|
-
|
|
275
|
-
**NEVER create a "Removed Recommendations", "Not Applicable", or similar section.** If a scenario is inapplicable, silently replace it with an equivalent-priority scenario from the supplement tiers. The output must contain ONLY the ${topN} recommendations.
|
|
276
|
-
|
|
277
|
-
${buildTestQualityCriteria()}
|
|
278
|
-
|
|
279
|
-
**5-dimension rubric — use to assign priority for supplement items:**
|
|
280
|
-
| Dimension | What to assess |
|
|
281
|
-
| Production Safety | Guards a critical boundary (auth, unique constraint, cascade delete, data integrity, breaking migration)? → HIGH |
|
|
282
|
-
| Bug-Finding Potential | Targets a known failure mode (race condition, data consistency, state transition, cascade effect)? → HIGH |
|
|
283
|
-
| User Journey Relevance | Reflects how real users interact (from traces, business flows, critical paths)? → HIGH or MEDIUM |
|
|
284
|
-
| Coverage Gap | Addresses an area with zero existing test coverage? → bump up one tier |
|
|
285
|
-
| Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
|
|
286
|
-
|
|
287
|
-
**Per-recommendation format (apply to ALL items):**
|
|
288
|
-
- Title and one-sentence description of what it validates (business rule, not just "tests the endpoint")
|
|
289
|
-
- Steps with concrete field names and realistic values derived from source code
|
|
290
|
-
- Ready-to-use tool call — replace all \`<...>\` placeholders with real values before presenting
|
|
291
|
-
- "From source" note — the specific production risk or business rule this prevents
|
|
292
|
-
|
|
293
|
-
**MANDATORY: Every pre-ranked item listed above MUST appear in your output — do not drop or skip any.**
|
|
294
|
-
|
|
295
|
-
${sections}
|
|
296
|
-
${repoSupplementNote}
|
|
297
|
-
|
|
298
|
-
**Test type mix — MANDATORY:**
|
|
299
|
-
${isFrontendOnlyProject
|
|
300
|
-
? `This is a frontend repo. Focus on E2E and UI tests only — E2E covers the full browser-to-backend flow (highest coverage), UI exercises frontend components that call backend APIs. Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.`
|
|
301
|
-
: isFrontendProject
|
|
302
|
-
? `This is a full-stack repo. Coverage ranking: E2E (full browser-to-backend flow) > UI (frontend exercises backend APIs) > Integration (backend chains) > Contract (single endpoint). Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`), in addition to backend integration and contract tests.`
|
|
303
|
-
: `Focus on integration and contract tests for all API endpoints.`}
|
|
304
|
-
**No smoke tests. No fuzz tests.**
|
|
305
|
-
|
|
306
|
-
**You MUST present EXACTLY ${topN} recommendations. Do NOT execute any tests. Do NOT produce fewer than ${topN}.**`;
|
|
307
|
-
}
|
|
308
|
-
// ── PR / branch-diff mode: execution plan ────────────────────────────────
|
|
309
148
|
return `## Execution Plan
|
|
310
149
|
Seed: ${seed} | Endpoints: ${endpointCount} | Budget: ${generateItems.length} generate + ${Math.max(topN - generateItems.length, 0)} additional = ${topN} total
|
|
311
150
|
|
|
@@ -531,7 +370,7 @@ ${detailBlocks}
|
|
|
531
370
|
const errorA = a.scenario.steps.some(s => s.interactionType === "error" || s.interactionType === "edge-case") ? 1 : 0;
|
|
532
371
|
const errorB = b.scenario.steps.some(s => s.interactionType === "error" || s.interactionType === "edge-case") ? 1 : 0;
|
|
533
372
|
if (errorB !== errorA)
|
|
534
|
-
return
|
|
373
|
+
return errorB - errorA;
|
|
535
374
|
// Use locale-independent comparison to avoid runtime-locale non-determinism
|
|
536
375
|
const nameA = a.scenario.scenarioName;
|
|
537
376
|
const nameB = b.scenario.scenarioName;
|
|
@@ -588,10 +427,7 @@ Do not churn recommendations without cause.
|
|
|
588
427
|
`;
|
|
589
428
|
}
|
|
590
429
|
else if (scored.length > 0) {
|
|
591
|
-
|
|
592
|
-
const isFrontendProject = projectType === "full-stack" || projectType === "frontend";
|
|
593
|
-
const isFrontendOnlyProject = projectType === "frontend";
|
|
594
|
-
mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, isDiffScope, isFrontendProject, isFrontendOnlyProject);
|
|
430
|
+
mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces);
|
|
595
431
|
}
|
|
596
432
|
else {
|
|
597
433
|
mainSection = `
|
|
@@ -665,12 +501,13 @@ and adjust the test approach if needed.
|
|
|
665
501
|
historyBody += `
|
|
666
502
|
### Previously Recommended (not generated)
|
|
667
503
|
${recLines}
|
|
668
|
-
**Stability rule**:
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
504
|
+
**Stability rule**: If a previously recommended test still applies to the current code
|
|
505
|
+
(the endpoint exists, the business logic hasn't changed), carry it forward in your
|
|
506
|
+
additionalRecommendations — match by scenarioName (for multi-step scenarios) or by
|
|
507
|
+
endpoint (for single-endpoint tests). Re-derive category and priority from the test
|
|
508
|
+
content. Do NOT drop a previous recommendation unless the underlying code was removed
|
|
509
|
+
or the test is now covered by a generated test.
|
|
510
|
+
Only add NEW recommendations for code paths introduced in the latest commit.
|
|
674
511
|
`;
|
|
675
512
|
}
|
|
676
513
|
prHistorySection = `
|
|
@@ -678,11 +515,8 @@ GENERATE items are always executed regardless of prior recommendations — do no
|
|
|
678
515
|
Tests from prior bot runs are still in the working tree — the maintenance pipeline
|
|
679
516
|
(Task 2) keeps them up to date. Use the history below to **avoid duplicating** existing
|
|
680
517
|
coverage and to fill gaps:
|
|
681
|
-
- **
|
|
682
|
-
|
|
683
|
-
handles deduplication at the file level.
|
|
684
|
-
- Tests listed under "Previously Generated Tests" are maintained automatically by Task 2 —
|
|
685
|
-
do NOT include them in additionalRecommendations.
|
|
518
|
+
- **Do NOT re-recommend** tests listed under "Previously Generated Tests" — they already
|
|
519
|
+
exist and are maintained automatically.
|
|
686
520
|
- **Carry forward** previously recommended-but-not-generated tests unchanged in
|
|
687
521
|
additionalRecommendations if they still apply. Promote the highest-priority ones
|
|
688
522
|
into generation slots if capacity allows.
|
|
@@ -202,15 +202,14 @@ describe("buildRecommendationPrompt — PR History section", () => {
|
|
|
202
202
|
expect(prompt).toContain("Promote the highest-priority ones");
|
|
203
203
|
expect(prompt).toContain("into generation slots if capacity allows");
|
|
204
204
|
});
|
|
205
|
-
it("
|
|
205
|
+
it("includes do-not-re-recommend instruction for implemented tests", () => {
|
|
206
206
|
const ctx = makePRContext({
|
|
207
207
|
previousRecommendations: [
|
|
208
208
|
{ testType: "contract", endpoint: "GET /api/items", status: "implemented", commentId: "1" },
|
|
209
209
|
],
|
|
210
210
|
});
|
|
211
211
|
const prompt = buildRecommendationPrompt(minimalAnalysis(), "current_branch_diff", 10, ctx);
|
|
212
|
-
expect(prompt).toContain("
|
|
213
|
-
expect(prompt).not.toContain("Do NOT re-recommend");
|
|
212
|
+
expect(prompt).toContain("Do NOT re-recommend");
|
|
214
213
|
expect(prompt).toContain("Previously Generated Tests");
|
|
215
214
|
});
|
|
216
215
|
it("de-duplicates multi-step scenario entries to one line per scenario", () => {
|
|
@@ -282,31 +281,29 @@ function minimalScenario(overrides = {}) {
|
|
|
282
281
|
};
|
|
283
282
|
}
|
|
284
283
|
describe("buildRecommendationPrompt — Stability and supplement section", () => {
|
|
285
|
-
|
|
286
|
-
// Full-repo mode is presentation-only; there is no previous-run state to carry forward.
|
|
287
|
-
it("includes Recommendation Stability section in output when scenarios exist (PR mode)", () => {
|
|
284
|
+
it("includes Recommendation Stability section in output when scenarios exist", () => {
|
|
288
285
|
const analysis = minimalAnalysis({
|
|
289
286
|
businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
|
|
290
287
|
});
|
|
291
|
-
const prompt = buildRecommendationPrompt(analysis, "
|
|
288
|
+
const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
|
|
292
289
|
expect(prompt).toContain("## Recommendation Stability");
|
|
293
290
|
});
|
|
294
|
-
it("stability section uses scenarioName/endpoint matching strategy
|
|
291
|
+
it("stability section uses scenarioName/endpoint matching strategy", () => {
|
|
295
292
|
const analysis = minimalAnalysis({
|
|
296
293
|
businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
|
|
297
294
|
});
|
|
298
|
-
const prompt = buildRecommendationPrompt(analysis, "
|
|
295
|
+
const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
|
|
299
296
|
const stabilityStart = prompt.indexOf("## Recommendation Stability");
|
|
300
297
|
const stabilityBlock = prompt.slice(stabilityStart, stabilityStart + 500);
|
|
301
298
|
expect(stabilityBlock).toContain("scenarioName");
|
|
302
299
|
expect(stabilityBlock).toContain("endpoint");
|
|
303
300
|
expect(stabilityBlock).toContain("Re-derive category and priority");
|
|
304
301
|
});
|
|
305
|
-
it("stability section specifies when to drop a recommendation
|
|
302
|
+
it("stability section specifies when to drop a recommendation", () => {
|
|
306
303
|
const analysis = minimalAnalysis({
|
|
307
304
|
businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
|
|
308
305
|
});
|
|
309
|
-
const prompt = buildRecommendationPrompt(analysis, "
|
|
306
|
+
const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
|
|
310
307
|
expect(prompt).toContain("target endpoint was removed");
|
|
311
308
|
expect(prompt).toContain("business logic changed");
|
|
312
309
|
expect(prompt).toContain("covered by a generated test");
|
|
@@ -324,12 +321,12 @@ describe("buildRecommendationPrompt — Stability and supplement section", () =>
|
|
|
324
321
|
it("MAX_TESTS_TO_GENERATE is 3", () => {
|
|
325
322
|
expect(MAX_TESTS_TO_GENERATE).toBe(3);
|
|
326
323
|
});
|
|
327
|
-
it("uses MAX_CRITICAL_TESTS in category-aware selection rules
|
|
324
|
+
it("uses MAX_CRITICAL_TESTS in category-aware selection rules", () => {
|
|
328
325
|
const analysis = minimalAnalysis({
|
|
329
326
|
businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
|
|
330
327
|
});
|
|
331
|
-
|
|
332
|
-
|
|
328
|
+
const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
|
|
329
|
+
// The critical-category minimum line references MAX_CRITICAL_TESTS (= 3)
|
|
333
330
|
expect(prompt).toContain("GENERATE items MUST be from HIGH-priority categories");
|
|
334
331
|
});
|
|
335
332
|
});
|
|
@@ -372,569 +369,3 @@ describe("PATH_PARAM_UUID_GUIDANCE — no hardcoded UUID anchor", () => {
|
|
|
372
369
|
expect(prompt).not.toMatch(UUID_V4_REGEX);
|
|
373
370
|
});
|
|
374
371
|
});
|
|
375
|
-
// ---------------------------------------------------------------------------
|
|
376
|
-
// Regression tests — PR #110 quality baseline
|
|
377
|
-
//
|
|
378
|
-
// Guard against regressions in recommendation quality. These tests assert that
|
|
379
|
-
// the key signals that made PR #110's recommendations excellent are present in
|
|
380
|
-
// both full_repo and branch_diff (PR) modes.
|
|
381
|
-
// Baseline: https://github.com/letsramp/demoshop-fullstack/pull/110
|
|
382
|
-
// ---------------------------------------------------------------------------
|
|
383
|
-
function mockDiffScenario(overrides = {}) {
|
|
384
|
-
return {
|
|
385
|
-
scenarioName: "orders-update-with-discount",
|
|
386
|
-
description: "PUT /api/v1/orders/{order_id} with discount_percent — verifies total_amount formula",
|
|
387
|
-
category: "business_rule",
|
|
388
|
-
priority: "high",
|
|
389
|
-
steps: [
|
|
390
|
-
{ order: 1, method: "POST", path: "/api/v1/products", expectedStatusCode: 201, description: "Create product", interactionType: "success" },
|
|
391
|
-
{ order: 2, method: "POST", path: "/api/v1/orders", expectedStatusCode: 201, description: "Create order", interactionType: "success", chainsFrom: { sourceField: "id", sourceStep: 1, sourceLocation: "body", targetParam: "product_id", targetLocation: "body" } },
|
|
392
|
-
{ order: 3, method: "PUT", path: "/api/v1/orders/{order_id}", expectedStatusCode: 200, description: "Apply discount", interactionType: "success", chainsFrom: { sourceField: "order_id", sourceStep: 2, sourceLocation: "body", targetParam: "order_id", targetLocation: "path" } },
|
|
393
|
-
],
|
|
394
|
-
chainingKeys: ["id", "order_id"],
|
|
395
|
-
requiresAuth: true,
|
|
396
|
-
estimatedComplexity: "moderate",
|
|
397
|
-
testType: "integration",
|
|
398
|
-
...overrides,
|
|
399
|
-
};
|
|
400
|
-
}
|
|
401
|
-
function analysisWithScenario(scope) {
|
|
402
|
-
const base = minimalAnalysis({
|
|
403
|
-
businessContext: {
|
|
404
|
-
mainPurpose: "E-commerce demo",
|
|
405
|
-
userFlows: [],
|
|
406
|
-
dataFlows: [],
|
|
407
|
-
integrationPatterns: [],
|
|
408
|
-
draftedScenarios: [mockDiffScenario()],
|
|
409
|
-
},
|
|
410
|
-
});
|
|
411
|
-
if (scope === "current_branch_diff") {
|
|
412
|
-
return {
|
|
413
|
-
...base,
|
|
414
|
-
branchDiffContext: {
|
|
415
|
-
currentBranch: "shiny/edit-order",
|
|
416
|
-
baseBranch: "main",
|
|
417
|
-
changedFiles: ["backend/app/routers/orders.py"],
|
|
418
|
-
newEndpoints: [{
|
|
419
|
-
path: "/api/v1/orders/{order_id}",
|
|
420
|
-
methods: [{ method: "PUT", sourceFile: "orders.py", interactionCount: 3 }],
|
|
421
|
-
}],
|
|
422
|
-
modifiedEndpoints: [],
|
|
423
|
-
affectedServices: ["orders"],
|
|
424
|
-
},
|
|
425
|
-
};
|
|
426
|
-
}
|
|
427
|
-
return base;
|
|
428
|
-
}
|
|
429
|
-
describe("PR #110 quality baseline — full_repo mode", () => {
|
|
430
|
-
let prompt;
|
|
431
|
-
beforeAll(() => { prompt = buildRecommendationPrompt(analysisWithScenario("full_repo"), "full_repo", 20); });
|
|
432
|
-
it("source enrichment targets each endpoint's route handler, not 'changed files'", () => {
|
|
433
|
-
expect(prompt).toContain("Source-Code Enrichment");
|
|
434
|
-
expect(prompt).toContain("route handler");
|
|
435
|
-
expect(prompt).not.toContain("Read the source code for ALL changed files");
|
|
436
|
-
});
|
|
437
|
-
it("includes test pattern guidelines for quality anchoring", () => {
|
|
438
|
-
expect(prompt).toContain("Test Pattern Guidelines");
|
|
439
|
-
});
|
|
440
|
-
it("includes concrete impressive/deprioritise examples", () => {
|
|
441
|
-
expect(prompt).toContain("Impressive (these catch prod bugs)");
|
|
442
|
-
expect(prompt).toContain("Deprioritise");
|
|
443
|
-
});
|
|
444
|
-
it("supplement ordering puts edge cases before cross-resource (Tier 1 before Tier 3)", () => {
|
|
445
|
-
const tier1Idx = prompt.indexOf("Tier 1");
|
|
446
|
-
const tier3Idx = prompt.indexOf("Tier 3");
|
|
447
|
-
expect(tier1Idx).toBeGreaterThan(-1);
|
|
448
|
-
expect(tier3Idx).toBeGreaterThan(-1);
|
|
449
|
-
expect(tier1Idx).toBeLessThan(tier3Idx);
|
|
450
|
-
});
|
|
451
|
-
it("supplement Tier 1 calls out boundary values and invalid IDs explicitly", () => {
|
|
452
|
-
expect(prompt).toMatch(/Tier 1.*boundary values/s);
|
|
453
|
-
expect(prompt).toMatch(/Tier 1.*invalid.*non-existent IDs/s);
|
|
454
|
-
});
|
|
455
|
-
it("includes 5-dimension quality rubric", () => {
|
|
456
|
-
expect(prompt).toContain("Production Safety");
|
|
457
|
-
expect(prompt).toContain("Bug-Finding Potential");
|
|
458
|
-
expect(prompt).toContain("Coverage Gap");
|
|
459
|
-
});
|
|
460
|
-
it("includes per-recommendation format instruction", () => {
|
|
461
|
-
// Full-repo mode hides category/priority from user output — check for format label and key fields
|
|
462
|
-
expect(prompt).toContain("Per-recommendation format");
|
|
463
|
-
expect(prompt).toContain("tool call");
|
|
464
|
-
expect(prompt).toContain("From source");
|
|
465
|
-
});
|
|
466
|
-
it("includes unique-constraint storage gating for Redis", () => {
|
|
467
|
-
expect(prompt).toContain("Unique constraints");
|
|
468
|
-
expect(prompt).toContain("Redis");
|
|
469
|
-
});
|
|
470
|
-
});
|
|
471
|
-
// ---------------------------------------------------------------------------
|
|
472
|
-
// Tests — full_repo output format and execution guardrails
|
|
473
|
-
//
|
|
474
|
-
// Guard that full_repo mode:
|
|
475
|
-
// - never emits execution/GENERATE language
|
|
476
|
-
// - groups items by test type with section headers
|
|
477
|
-
// - hides category/priority labels from user-facing rendered items
|
|
478
|
-
// - emits "Do NOT execute any tests"
|
|
479
|
-
// - renders pre-ranked item names
|
|
480
|
-
// - includes cascade guidance
|
|
481
|
-
// - scopes Tier 1 supplement to "list" (not "GENERATE set")
|
|
482
|
-
// ---------------------------------------------------------------------------
|
|
483
|
-
function fullRepoAnalysisWithScenarios(overrides = {}, scenarios = []) {
|
|
484
|
-
return minimalAnalysis({
|
|
485
|
-
businessContext: {
|
|
486
|
-
mainPurpose: "E-commerce API",
|
|
487
|
-
userFlows: [],
|
|
488
|
-
dataFlows: [],
|
|
489
|
-
integrationPatterns: [],
|
|
490
|
-
draftedScenarios: scenarios.length > 0 ? scenarios : [mockDiffScenario()],
|
|
491
|
-
},
|
|
492
|
-
...overrides,
|
|
493
|
-
});
|
|
494
|
-
}
|
|
495
|
-
function makeContractScenario() {
|
|
496
|
-
return {
|
|
497
|
-
scenarioName: "create-product-contract",
|
|
498
|
-
description: "POST /api/v1/products auth boundary",
|
|
499
|
-
category: "security_boundary",
|
|
500
|
-
priority: "high",
|
|
501
|
-
steps: [{ order: 1, method: "POST", path: "/api/v1/products", expectedStatusCode: 201, description: "Create product", interactionType: "success" }],
|
|
502
|
-
chainingKeys: [],
|
|
503
|
-
requiresAuth: true,
|
|
504
|
-
estimatedComplexity: "simple",
|
|
505
|
-
testType: "contract",
|
|
506
|
-
};
|
|
507
|
-
}
|
|
508
|
-
describe("full_repo mode — output format and execution guardrails", () => {
|
|
509
|
-
let prompt;
|
|
510
|
-
beforeAll(() => {
|
|
511
|
-
prompt = buildRecommendationPrompt(fullRepoAnalysisWithScenarios({}, [mockDiffScenario(), makeContractScenario()]), "full_repo", 10);
|
|
512
|
-
});
|
|
513
|
-
it("does NOT contain GENERATE execution language", () => {
|
|
514
|
-
expect(prompt).not.toContain("### GENERATE");
|
|
515
|
-
expect(prompt).not.toContain("execute these in order");
|
|
516
|
-
expect(prompt).not.toContain("one retry on failure then skip");
|
|
517
|
-
});
|
|
518
|
-
it("does NOT contain the PR-mode ADDITIONAL section header", () => {
|
|
519
|
-
// The '### ADDITIONAL (list in additionalRecommendations...)' header is a PR-mode structural
|
|
520
|
-
// concept; it must not appear in the full_repo grouped output.
|
|
521
|
-
expect(prompt).not.toContain("### ADDITIONAL (list in additionalRecommendations");
|
|
522
|
-
});
|
|
523
|
-
it("contains explicit 'Do NOT execute any tests' instruction", () => {
|
|
524
|
-
expect(prompt).toContain("Do NOT execute any tests");
|
|
525
|
-
});
|
|
526
|
-
it("contains 'Repo mode' header or preamble", () => {
|
|
527
|
-
expect(prompt).toContain("Repo mode");
|
|
528
|
-
});
|
|
529
|
-
it("groups items by test type — Integration section header present", () => {
|
|
530
|
-
expect(prompt).toMatch(/### (Integration|Contract)/);
|
|
531
|
-
});
|
|
532
|
-
it("E2E section appears before Integration section (E2E ranked highest coverage)", () => {
|
|
533
|
-
const e2eIdx = prompt.indexOf("### E2E");
|
|
534
|
-
const integrationIdx = prompt.indexOf("### Integration");
|
|
535
|
-
// If E2E section exists, it must appear before Integration
|
|
536
|
-
if (e2eIdx !== -1 && integrationIdx !== -1) {
|
|
537
|
-
expect(e2eIdx).toBeLessThan(integrationIdx);
|
|
538
|
-
}
|
|
539
|
-
// At minimum, E2E appears before Contract
|
|
540
|
-
const contractIdx = prompt.indexOf("### Contract");
|
|
541
|
-
if (e2eIdx !== -1 && contractIdx !== -1) {
|
|
542
|
-
expect(e2eIdx).toBeLessThan(contractIdx);
|
|
543
|
-
}
|
|
544
|
-
});
|
|
545
|
-
it("UI section appears before Integration and Contract sections", () => {
|
|
546
|
-
const uiIdx = prompt.indexOf("### UI");
|
|
547
|
-
const integrationIdx = prompt.indexOf("### Integration");
|
|
548
|
-
const contractIdx = prompt.indexOf("### Contract");
|
|
549
|
-
if (uiIdx !== -1 && integrationIdx !== -1) {
|
|
550
|
-
expect(uiIdx).toBeLessThan(integrationIdx);
|
|
551
|
-
}
|
|
552
|
-
if (uiIdx !== -1 && contractIdx !== -1) {
|
|
553
|
-
expect(uiIdx).toBeLessThan(contractIdx);
|
|
554
|
-
}
|
|
555
|
-
});
|
|
556
|
-
it("prompt forbids the LLM from creating a 'Removed Recommendations' section", () => {
|
|
557
|
-
// The prompt must contain the 'NEVER create' instruction so the LLM doesn't add such a section
|
|
558
|
-
expect(prompt).toContain("NEVER create a");
|
|
559
|
-
// The prompt must NOT have an actual section heading titled 'Removed Recommendations'
|
|
560
|
-
// (it may contain the phrase inside the NEVER instruction itself, which is expected)
|
|
561
|
-
expect(prompt).not.toMatch(/^##+ Removed Recommendations/m);
|
|
562
|
-
expect(prompt).not.toMatch(/^##+ Not Applicable/m);
|
|
563
|
-
});
|
|
564
|
-
it("rendered item does NOT contain 'priority=' label visible to user", () => {
|
|
565
|
-
// priority= is a PR-mode label; must not appear in rendered sections
|
|
566
|
-
expect(prompt).not.toMatch(/priority=(HIGH|MEDIUM|LOW|CRITICAL)/);
|
|
567
|
-
});
|
|
568
|
-
it("rendered item does NOT contain pipe-delimited category label", () => {
|
|
569
|
-
// | category | pattern used in PR-mode GENERATE blocks
|
|
570
|
-
expect(prompt).not.toMatch(/\| (security_boundary|business_rule|data_integrity|crud|workflow) \|/);
|
|
571
|
-
});
|
|
572
|
-
it("renders the pre-ranked scenario name in the output", () => {
|
|
573
|
-
expect(prompt).toContain("orders-update-with-discount");
|
|
574
|
-
});
|
|
575
|
-
it("includes cascade vs referential integrity guidance", () => {
|
|
576
|
-
expect(prompt).toContain("Cascade vs referential integrity");
|
|
577
|
-
});
|
|
578
|
-
it("supplement Tier 1 is scoped to 'list' (not 'GENERATE set') in full_repo", () => {
|
|
579
|
-
// In full_repo there is no GENERATE set — supplement references the pre-ranked list
|
|
580
|
-
expect(prompt).toMatch(/Tier 1.*list/s);
|
|
581
|
-
expect(prompt).not.toMatch(/Tier 1.*GENERATE set/s);
|
|
582
|
-
});
|
|
583
|
-
it("supplement note references 5-dimension rubric for priority assignment", () => {
|
|
584
|
-
expect(prompt).toContain("5-dimension rubric");
|
|
585
|
-
});
|
|
586
|
-
it("cascade guidance instructs silent removal — no 'Removed Recommendations' section", () => {
|
|
587
|
-
// The cascade guidance must say to remove silently, not to list removed items
|
|
588
|
-
expect(prompt).toContain("silently");
|
|
589
|
-
expect(prompt).toContain("Do NOT list removed scenarios");
|
|
590
|
-
});
|
|
591
|
-
});
|
|
592
|
-
// ---------------------------------------------------------------------------
|
|
593
|
-
// Tests — full_repo mode: full-stack vs backend-only test mix
|
|
594
|
-
// ---------------------------------------------------------------------------
|
|
595
|
-
describe("full_repo mode — full-stack repo test mix", () => {
|
|
596
|
-
function fullStackAnalysis() {
|
|
597
|
-
return fullRepoAnalysisWithScenarios({
|
|
598
|
-
projectClassification: {
|
|
599
|
-
projectType: "full-stack",
|
|
600
|
-
primaryLanguage: "TypeScript",
|
|
601
|
-
primaryFramework: "Next.js",
|
|
602
|
-
deploymentPattern: "full-stack",
|
|
603
|
-
},
|
|
604
|
-
});
|
|
605
|
-
}
|
|
606
|
-
function backendOnlyAnalysis() {
|
|
607
|
-
return fullRepoAnalysisWithScenarios({
|
|
608
|
-
projectClassification: {
|
|
609
|
-
projectType: "rest-api",
|
|
610
|
-
primaryLanguage: "Python",
|
|
611
|
-
primaryFramework: "FastAPI",
|
|
612
|
-
deploymentPattern: "traditional",
|
|
613
|
-
},
|
|
614
|
-
});
|
|
615
|
-
}
|
|
616
|
-
// topN=10 → 15% × 10 = 1.5 → round → 2 for both E2E and UI
|
|
617
|
-
it("full-stack repo mandates percentage-based UI slots (topN=10 → ≥2)", () => {
|
|
618
|
-
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
|
|
619
|
-
expect(prompt).toContain("skyramp_ui_test_generation");
|
|
620
|
-
expect(prompt).toMatch(/at least 2 UI test/);
|
|
621
|
-
});
|
|
622
|
-
it("full-stack repo mandates percentage-based E2E slots (topN=10 → ≥2)", () => {
|
|
623
|
-
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
|
|
624
|
-
expect(prompt).toContain("skyramp_e2e_test_generation");
|
|
625
|
-
expect(prompt).toMatch(/at least 2 E2E test/);
|
|
626
|
-
});
|
|
627
|
-
// topN=20 → 15% × 20 = 3 for both E2E and UI (scales up vs fixed ≥1/≥2)
|
|
628
|
-
it("full-stack repo scales to ≥3 E2E and ≥3 UI at topN=20", () => {
|
|
629
|
-
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 20);
|
|
630
|
-
expect(prompt).toMatch(/at least 3 E2E test/);
|
|
631
|
-
expect(prompt).toMatch(/at least 3 UI test/);
|
|
632
|
-
});
|
|
633
|
-
// topN=5 → 15% × 5 = 0.75 → round → 1, floor at 1
|
|
634
|
-
it("full-stack repo floors at ≥1 E2E and ≥1 UI for small topN=5", () => {
|
|
635
|
-
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 5);
|
|
636
|
-
expect(prompt).toMatch(/at least 1 E2E test/);
|
|
637
|
-
expect(prompt).toMatch(/at least 1 UI test/);
|
|
638
|
-
});
|
|
639
|
-
it("full-stack repo explicitly excludes smoke and fuzz tests", () => {
|
|
640
|
-
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
|
|
641
|
-
expect(prompt).toContain("No smoke tests");
|
|
642
|
-
expect(prompt).toContain("No fuzz tests");
|
|
643
|
-
});
|
|
644
|
-
it("backend-only (rest-api) repo does NOT mandate UI/E2E tests", () => {
|
|
645
|
-
const prompt = buildRecommendationPrompt(backendOnlyAnalysis(), "full_repo", 10);
|
|
646
|
-
// Tool names appear in generic buildToolWorkflows docs — check for the mandate text instead
|
|
647
|
-
expect(prompt).not.toMatch(/at least \d+ (UI|E2E) test/);
|
|
648
|
-
expect(prompt).not.toContain("supplement MUST include");
|
|
649
|
-
expect(prompt).not.toContain("full-stack repo");
|
|
650
|
-
});
|
|
651
|
-
it("backend-only repo focuses on integration and contract tests", () => {
|
|
652
|
-
const prompt = buildRecommendationPrompt(backendOnlyAnalysis(), "full_repo", 10);
|
|
653
|
-
expect(prompt).toContain("integration and contract tests");
|
|
654
|
-
});
|
|
655
|
-
it("backend-only repo still excludes smoke and fuzz tests", () => {
|
|
656
|
-
const prompt = buildRecommendationPrompt(backendOnlyAnalysis(), "full_repo", 10);
|
|
657
|
-
expect(prompt).toContain("No smoke tests");
|
|
658
|
-
expect(prompt).toContain("No fuzz tests");
|
|
659
|
-
});
|
|
660
|
-
it("'frontend' project type focuses on UI/E2E only — NOT backend tests", () => {
|
|
661
|
-
const frontendAnalysis = fullRepoAnalysisWithScenarios({
|
|
662
|
-
projectClassification: {
|
|
663
|
-
projectType: "frontend",
|
|
664
|
-
primaryLanguage: "TypeScript",
|
|
665
|
-
primaryFramework: "React",
|
|
666
|
-
deploymentPattern: "traditional",
|
|
667
|
-
},
|
|
668
|
-
});
|
|
669
|
-
const prompt = buildRecommendationPrompt(frontendAnalysis, "full_repo", 10);
|
|
670
|
-
// topN=10 → 15% × 10 = 1.5 → round → 2 for both
|
|
671
|
-
expect(prompt).toMatch(/at least 2 UI test/);
|
|
672
|
-
expect(prompt).toMatch(/at least 2 E2E test/);
|
|
673
|
-
// Should NOT say "in addition to backend integration and contract tests"
|
|
674
|
-
expect(prompt).not.toContain("in addition to backend integration and contract tests");
|
|
675
|
-
// Should explicitly say no integration/contract
|
|
676
|
-
expect(prompt).toContain("Do NOT add integration or contract tests");
|
|
677
|
-
});
|
|
678
|
-
it("'frontend' project type says 'frontend repo' not 'full-stack repo'", () => {
|
|
679
|
-
const frontendAnalysis = fullRepoAnalysisWithScenarios({
|
|
680
|
-
projectClassification: {
|
|
681
|
-
projectType: "frontend",
|
|
682
|
-
primaryLanguage: "TypeScript",
|
|
683
|
-
primaryFramework: "React",
|
|
684
|
-
deploymentPattern: "traditional",
|
|
685
|
-
},
|
|
686
|
-
});
|
|
687
|
-
const prompt = buildRecommendationPrompt(frontendAnalysis, "full_repo", 10);
|
|
688
|
-
expect(prompt).toContain("frontend repo");
|
|
689
|
-
expect(prompt).not.toContain("full-stack repo");
|
|
690
|
-
});
|
|
691
|
-
it("'full-stack' project type includes BOTH backend and frontend tests", () => {
|
|
692
|
-
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
|
|
693
|
-
expect(prompt).toContain("full-stack repo");
|
|
694
|
-
expect(prompt).toContain("in addition to backend integration and contract tests");
|
|
695
|
-
});
|
|
696
|
-
it("full-stack repo explains E2E > UI > Integration > Contract coverage ranking", () => {
|
|
697
|
-
const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
|
|
698
|
-
expect(prompt).toContain("Coverage ranking");
|
|
699
|
-
expect(prompt).toContain("E2E");
|
|
700
|
-
expect(prompt).toContain("UI");
|
|
701
|
-
});
|
|
702
|
-
// Critical: scenarioDrafting.ts NEVER generates UI or E2E testType —
|
|
703
|
-
// they only come from the LLM supplement. The supplement note MUST
|
|
704
|
-
// explicitly tell the LLM to add UI/E2E for full-stack repos, otherwise
|
|
705
|
-
// the LLM fills the supplement with backend-only tiers (edge cases, CRUD)
|
|
706
|
-
// and never produces UI/E2E recommendations (PR #110 regression risk).
|
|
707
|
-
it("full-stack supplement note explicitly mandates UI and E2E with percentage-based counts (PR #110 regression guard)", () => {
|
|
708
|
-
// topN=20, 15% → 3 E2E + 3 UI mandated in the supplement note
|
|
709
|
-
const analysis = fullRepoAnalysisWithScenarios({
|
|
710
|
-
projectClassification: {
|
|
711
|
-
projectType: "full-stack",
|
|
712
|
-
primaryLanguage: "TypeScript",
|
|
713
|
-
primaryFramework: "Next.js",
|
|
714
|
-
deploymentPattern: "full-stack",
|
|
715
|
-
},
|
|
716
|
-
});
|
|
717
|
-
const prompt = buildRecommendationPrompt(analysis, "full_repo", 20);
|
|
718
|
-
// Tool names must appear in supplement (not just test-mix footer)
|
|
719
|
-
const requiredIdx = prompt.indexOf("REQUIRED — You MUST add");
|
|
720
|
-
const e2eIdx = prompt.indexOf("skyramp_e2e_test_generation");
|
|
721
|
-
expect(requiredIdx).toBeGreaterThan(-1);
|
|
722
|
-
expect(e2eIdx).toBeGreaterThan(-1);
|
|
723
|
-
expect(e2eIdx).toBeGreaterThan(requiredIdx); // inside supplement note
|
|
724
|
-
// Percentage-based count: topN=20 → 3
|
|
725
|
-
expect(prompt).toMatch(/at least 3 E2E test/);
|
|
726
|
-
expect(prompt).toMatch(/at least 3 UI test/);
|
|
727
|
-
});
|
|
728
|
-
it("backend-only repo supplement note does NOT add UI/E2E mandate", () => {
|
|
729
|
-
const analysis = fullRepoAnalysisWithScenarios({
|
|
730
|
-
projectClassification: {
|
|
731
|
-
projectType: "rest-api",
|
|
732
|
-
primaryLanguage: "Python",
|
|
733
|
-
primaryFramework: "FastAPI",
|
|
734
|
-
deploymentPattern: "traditional",
|
|
735
|
-
},
|
|
736
|
-
});
|
|
737
|
-
const prompt = buildRecommendationPrompt(analysis, "full_repo", 20);
|
|
738
|
-
const requiredIdx = prompt.indexOf("REQUIRED — You MUST add");
|
|
739
|
-
if (requiredIdx === -1)
|
|
740
|
-
return; // no supplement needed
|
|
741
|
-
const supplementBlock = prompt.slice(requiredIdx, requiredIdx + 800);
|
|
742
|
-
// Backend-only repos should NOT mandate UI/E2E in the supplement tiers
|
|
743
|
-
expect(supplementBlock).not.toContain("full-stack repo, the supplement MUST include");
|
|
744
|
-
});
|
|
745
|
-
});
|
|
746
|
-
// ---------------------------------------------------------------------------
|
|
747
|
-
// Tests — full_repo mode: PR mode must NOT be affected by these changes
|
|
748
|
-
// ---------------------------------------------------------------------------
|
|
749
|
-
describe("full_repo mode — PR mode unchanged by full_repo changes", () => {
|
|
750
|
-
let prPrompt;
|
|
751
|
-
beforeAll(() => {
|
|
752
|
-
prPrompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 10);
|
|
753
|
-
});
|
|
754
|
-
it("PR mode still contains GENERATE execution language", () => {
|
|
755
|
-
expect(prPrompt).toContain("### GENERATE");
|
|
756
|
-
});
|
|
757
|
-
it("PR mode still shows priority= labels on GENERATE items", () => {
|
|
758
|
-
expect(prPrompt).toMatch(/priority=(HIGH|MEDIUM|LOW|CRITICAL)/);
|
|
759
|
-
});
|
|
760
|
-
it("PR mode does not show 'Do NOT execute any tests'", () => {
|
|
761
|
-
expect(prPrompt).not.toContain("Do NOT execute any tests");
|
|
762
|
-
});
|
|
763
|
-
it("PR mode does not show 'Repo mode' preamble", () => {
|
|
764
|
-
expect(prPrompt).not.toContain("Repo mode — no tests are executed");
|
|
765
|
-
});
|
|
766
|
-
});
|
|
767
|
-
describe("PR #110 quality baseline — branch_diff (PR) mode", () => {
|
|
768
|
-
let prompt;
|
|
769
|
-
beforeAll(() => { prompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 20); });
|
|
770
|
-
it("source enrichment references changed files (not 'each endpoint')", () => {
|
|
771
|
-
expect(prompt).toContain("Source-Code Enrichment");
|
|
772
|
-
expect(prompt).toContain("changed files");
|
|
773
|
-
expect(prompt).not.toContain("For each endpoint listed in the Repository Context above, read the route handler");
|
|
774
|
-
});
|
|
775
|
-
it("supplement Tier 1 scoped to GENERATE set", () => {
|
|
776
|
-
expect(prompt).toMatch(/Tier 1.*GENERATE set/s);
|
|
777
|
-
});
|
|
778
|
-
it("supplement ordering puts edge cases before cross-resource", () => {
|
|
779
|
-
const tier1Idx = prompt.indexOf("Tier 1");
|
|
780
|
-
const tier3Idx = prompt.indexOf("Tier 3");
|
|
781
|
-
expect(tier1Idx).toBeGreaterThan(-1);
|
|
782
|
-
expect(tier3Idx).toBeGreaterThan(-1);
|
|
783
|
-
expect(tier1Idx).toBeLessThan(tier3Idx);
|
|
784
|
-
});
|
|
785
|
-
it("includes cascade vs referential integrity guidance", () => {
|
|
786
|
-
expect(prompt).toContain("Cascade vs referential integrity");
|
|
787
|
-
});
|
|
788
|
-
it("includes per-recommendation format requirements", () => {
|
|
789
|
-
expect(prompt).toContain("Per-recommendation format");
|
|
790
|
-
});
|
|
791
|
-
it("GENERATE block present for the business_rule scenario", () => {
|
|
792
|
-
expect(prompt).toContain("GENERATE");
|
|
793
|
-
expect(prompt).toContain("orders-update-with-discount");
|
|
794
|
-
});
|
|
795
|
-
});
|
|
796
|
-
// ---------------------------------------------------------------------------
|
|
797
|
-
// Regression tests — v3 gap fixes
|
|
798
|
-
// ---------------------------------------------------------------------------
|
|
799
|
-
describe("Gap 1 — happy-path ranking: success scenarios ranked before error/edge-case scenarios", () => {
|
|
800
|
-
function makeScenarioByInteraction(name, interactionType) {
|
|
801
|
-
return mockDiffScenario({
|
|
802
|
-
scenarioName: name,
|
|
803
|
-
steps: [
|
|
804
|
-
{ order: 1, method: "POST", path: "/api/items", expectedStatusCode: interactionType === "success" ? 201 : 404, description: "step", interactionType },
|
|
805
|
-
{ order: 2, method: "GET", path: "/api/items/{id}", expectedStatusCode: interactionType === "success" ? 200 : 404, description: "verify", interactionType },
|
|
806
|
-
{ order: 3, method: "DELETE", path: "/api/items/{id}", expectedStatusCode: interactionType === "success" ? 204 : 404, description: "cleanup", interactionType },
|
|
807
|
-
],
|
|
808
|
-
});
|
|
809
|
-
}
|
|
810
|
-
it("happy-path scenario ranked before error-path scenario in GENERATE block", () => {
|
|
811
|
-
const analysis = {
|
|
812
|
-
...analysisWithScenario("current_branch_diff"),
|
|
813
|
-
businessContext: {
|
|
814
|
-
mainPurpose: "Test",
|
|
815
|
-
userFlows: [], dataFlows: [], integrationPatterns: [],
|
|
816
|
-
draftedScenarios: [
|
|
817
|
-
makeScenarioByInteraction("error-path-scenario", "error"),
|
|
818
|
-
makeScenarioByInteraction("happy-path-scenario", "success"),
|
|
819
|
-
],
|
|
820
|
-
},
|
|
821
|
-
};
|
|
822
|
-
const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 5);
|
|
823
|
-
const happyIdx = prompt.indexOf("happy-path-scenario");
|
|
824
|
-
const errorIdx = prompt.indexOf("error-path-scenario");
|
|
825
|
-
expect(happyIdx).toBeGreaterThan(-1);
|
|
826
|
-
expect(errorIdx).toBeGreaterThan(-1);
|
|
827
|
-
// Happy path should appear first (lower index = earlier in the output)
|
|
828
|
-
expect(happyIdx).toBeLessThan(errorIdx);
|
|
829
|
-
});
|
|
830
|
-
});
|
|
831
|
-
describe("Gap 2 — E2E ADDITIONAL slot gated on hasTraces", () => {
|
|
832
|
-
function makeMixedPRAnalysis(hasTraceFiles) {
|
|
833
|
-
// Needs draftedScenarios so scored.length > 0 and buildExecutionPlan is reached
|
|
834
|
-
const base = analysisWithScenario("current_branch_diff");
|
|
835
|
-
return {
|
|
836
|
-
...base,
|
|
837
|
-
artifacts: {
|
|
838
|
-
openApiSpecs: [],
|
|
839
|
-
playwrightRecordings: [],
|
|
840
|
-
traceFiles: hasTraceFiles ? [{ path: "/repo/tests/trace.json", format: "skyramp" }] : [],
|
|
841
|
-
notFound: [],
|
|
842
|
-
},
|
|
843
|
-
branchDiffContext: {
|
|
844
|
-
currentBranch: "test",
|
|
845
|
-
baseBranch: "main",
|
|
846
|
-
// frontend/components/.tsx triggers hasFrontendChanges; newEndpoints makes it a mixed PR (not UI-only)
|
|
847
|
-
changedFiles: ["frontend/components/App.tsx", "backend/routers/orders.py"],
|
|
848
|
-
newEndpoints: [{ path: "/api/v1/orders/{order_id}", methods: [{ method: "PUT", sourceFile: "orders.py", interactionCount: 3 }] }],
|
|
849
|
-
modifiedEndpoints: [],
|
|
850
|
-
affectedServices: ["orders"],
|
|
851
|
-
},
|
|
852
|
-
};
|
|
853
|
-
}
|
|
854
|
-
it("E2E [ADDITIONAL] slot present when hasTraces=true and frontend+API changes exist", () => {
|
|
855
|
-
const prompt = buildRecommendationPrompt(makeMixedPRAnalysis(true), "current_branch_diff", 10);
|
|
856
|
-
expect(prompt).toMatch(/\[ADDITIONAL\].*E2E/s);
|
|
857
|
-
});
|
|
858
|
-
it("E2E [ADDITIONAL] slot absent when hasTraces=false and frontend+API changes exist", () => {
|
|
859
|
-
const prompt = buildRecommendationPrompt(makeMixedPRAnalysis(false), "current_branch_diff", 10);
|
|
860
|
-
// UI slot should still be present, E2E slot should not
|
|
861
|
-
expect(prompt).toMatch(/\[ADDITIONAL\].*UI/s);
|
|
862
|
-
// [ADDITIONAL] E2E label must not appear (tool docs contain "E2E" but not as [ADDITIONAL] label)
|
|
863
|
-
expect(prompt).not.toContain("[ADDITIONAL] | E2E |");
|
|
864
|
-
});
|
|
865
|
-
});
|
|
866
|
-
describe("Gap 4 — PR history does NOT suppress GENERATE items on 2nd+ run", () => {
|
|
867
|
-
it("prompt contains GENERATE-unaffected instruction when prior history exists", () => {
|
|
868
|
-
const ctx = makePRContext({
|
|
869
|
-
previousRecommendations: [
|
|
870
|
-
{ testType: "integration", endpoint: "POST /api/v1/orders", scenarioName: "orders-update-with-discount", status: "implemented", commentId: "1" },
|
|
871
|
-
],
|
|
872
|
-
});
|
|
873
|
-
const prompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 5, ctx);
|
|
874
|
-
expect(prompt).toContain("GENERATE section is unaffected by prior history");
|
|
875
|
-
});
|
|
876
|
-
it("prompt does NOT contain old suppression text 'Do NOT re-recommend'", () => {
|
|
877
|
-
const ctx = makePRContext({
|
|
878
|
-
previousRecommendations: [
|
|
879
|
-
{ testType: "integration", endpoint: "POST /api/v1/orders", status: "implemented", commentId: "1" },
|
|
880
|
-
],
|
|
881
|
-
});
|
|
882
|
-
const prompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 5, ctx);
|
|
883
|
-
expect(prompt).not.toContain("Do NOT re-recommend");
|
|
884
|
-
});
|
|
885
|
-
});
|
|
886
|
-
describe("renderItem — correct tool for E2E and UI testTypes in full_repo mode", () => {
|
|
887
|
-
function makeTypedScenario(testType) {
|
|
888
|
-
return mockDiffScenario({
|
|
889
|
-
scenarioName: `${testType}-scenario`,
|
|
890
|
-
testType,
|
|
891
|
-
steps: [
|
|
892
|
-
{ order: 1, method: "GET", path: "/api/items", expectedStatusCode: 200, description: "list items", interactionType: "success" },
|
|
893
|
-
{ order: 2, method: "POST", path: "/api/items", expectedStatusCode: 201, description: "create item", interactionType: "success" },
|
|
894
|
-
],
|
|
895
|
-
});
|
|
896
|
-
}
|
|
897
|
-
it("integration scenario uses skyramp_integration_test_generation in full_repo", () => {
|
|
898
|
-
const analysis = minimalAnalysis({
|
|
899
|
-
businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("integration")] },
|
|
900
|
-
});
|
|
901
|
-
const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
|
|
902
|
-
expect(prompt).toContain("skyramp_integration_test_generation");
|
|
903
|
-
});
|
|
904
|
-
it("e2e scenario uses skyramp_e2e_test_generation and omits scenario step calls in full_repo", () => {
|
|
905
|
-
const analysis = minimalAnalysis({
|
|
906
|
-
businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("e2e")] },
|
|
907
|
-
});
|
|
908
|
-
const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
|
|
909
|
-
// Extract recommendation content only (before Tool Workflows docs which list all tools)
|
|
910
|
-
const toolWorkflowsIdx = prompt.indexOf("## How to Generate Tests");
|
|
911
|
-
const mainContent = toolWorkflowsIdx > 0 ? prompt.slice(0, toolWorkflowsIdx) : prompt;
|
|
912
|
-
expect(mainContent).toContain("skyramp_e2e_test_generation");
|
|
913
|
-
expect(mainContent).not.toContain("skyramp_integration_test_generation");
|
|
914
|
-
// E2E does not use per-step scenario pipeline
|
|
915
|
-
expect(mainContent).not.toContain("skyramp_scenario_test_generation");
|
|
916
|
-
});
|
|
917
|
-
it("ui scenario uses skyramp_ui_test_generation and omits scenario step calls in full_repo", () => {
|
|
918
|
-
const analysis = minimalAnalysis({
|
|
919
|
-
businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("ui")] },
|
|
920
|
-
});
|
|
921
|
-
const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
|
|
922
|
-
// Extract recommendation content only (before Tool Workflows docs which list all tools)
|
|
923
|
-
const toolWorkflowsIdx = prompt.indexOf("## How to Generate Tests");
|
|
924
|
-
const mainContent = toolWorkflowsIdx > 0 ? prompt.slice(0, toolWorkflowsIdx) : prompt;
|
|
925
|
-
expect(mainContent).toContain("skyramp_ui_test_generation");
|
|
926
|
-
expect(mainContent).not.toContain("skyramp_integration_test_generation");
|
|
927
|
-
// UI does not use per-step scenario pipeline
|
|
928
|
-
expect(mainContent).not.toContain("skyramp_scenario_test_generation");
|
|
929
|
-
});
|
|
930
|
-
it("integration scenario still emits per-step skyramp_scenario_test_generation calls in full_repo", () => {
|
|
931
|
-
const analysis = minimalAnalysis({
|
|
932
|
-
businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("integration")] },
|
|
933
|
-
});
|
|
934
|
-
const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
|
|
935
|
-
const toolWorkflowsIdx = prompt.indexOf("## How to Generate Tests");
|
|
936
|
-
const mainContent = toolWorkflowsIdx > 0 ? prompt.slice(0, toolWorkflowsIdx) : prompt;
|
|
937
|
-
expect(mainContent).toContain("skyramp_scenario_test_generation");
|
|
938
|
-
expect(mainContent).toContain("skyramp_integration_test_generation");
|
|
939
|
-
});
|
|
940
|
-
});
|
|
@@ -120,7 +120,6 @@ Generate a net-new test. Use a unique descriptive filename to avoid overwriting
|
|
|
120
120
|
**How to generate each type (for ADD and REGENERATE):**
|
|
121
121
|
- **Integration**: call \`skyramp_scenario_test_generation\` per step (sequentially), then \`skyramp_integration_test_generation\` with the scenario file.
|
|
122
122
|
Scenario JSON goes in the same \`outputDir\` (e.g. \`tests/scenario_<name>.json\`), not \`.skyramp/\`.
|
|
123
|
-
**Required fields (MANDATORY before generating any scenario step):** For every POST/PUT/PATCH step — including prerequisite/setup steps (e.g. create a product before creating an order) — read the route handler source code or OpenAPI schema to identify ALL required request body fields. Include every required field with a realistic value. Do NOT omit fields just because they are not the focus of the test.
|
|
124
123
|
- **Contract**: call \`skyramp_contract_test_generation\` with \`endpointURL\`, \`method\`, and \`requestData\` for POST/PUT/PATCH.
|
|
125
124
|
Pass \`apiSchema\` if an OpenAPI spec exists.
|
|
126
125
|
For internal/microservice APIs: add \`providerMode: true\` to verify implementation matches the contract.
|
|
@@ -191,7 +190,7 @@ Call \`skyramp_submit_report\` with \`summaryOutputFile\`: "${summaryOutputFile}
|
|
|
191
190
|
VERIFY: note that the test was verified as unaffected by the diff — no file changes made.
|
|
192
191
|
Do NOT include files that were newly created in this run (those go in \`newTestsCreated\`).
|
|
193
192
|
|
|
194
|
-
**additionalRecommendations** —
|
|
193
|
+
**additionalRecommendations** — items you could not act on (quota exceeded, missing traces, etc.):
|
|
195
194
|
\`testId\` (human-readable kebab-case, e.g. \`integration-products-orders-workflow\`), \`testType\`, \`category\`, \`scenarioName\`, \`priority\` (high/medium/low — used for sorting, not displayed), \`description\`, \`steps\`, \`reasoning\`
|
|
196
195
|
Keep each \`description\` to one sentence. Omit \`requestBody\` and \`responseBody\` from steps.
|
|
197
196
|
Include at most 3 steps per recommendation.
|
|
@@ -156,8 +156,6 @@ export async function parseTraceFile(filePath) {
|
|
|
156
156
|
return { entries, userFlows, format };
|
|
157
157
|
}
|
|
158
158
|
const SKIP_DIRS = new Set(["node_modules", ".git", "dist", "build", ".next", ".nuxt", "coverage", "__pycache__", ".venv", "venv"]);
|
|
159
|
-
/** Known test-artifact directories where testbot-generated traces are written. */
|
|
160
|
-
const TRACE_SCAN_DIRS = [".skyramp", "tests", "test", "e2e", "playwright"];
|
|
161
159
|
/**
|
|
162
160
|
* Recursively scan a directory for files matching a predicate, up to maxDepth levels.
|
|
163
161
|
*/
|
|
@@ -182,22 +180,6 @@ function scanDir(dir, predicate, maxDepth, results) {
|
|
|
182
180
|
}
|
|
183
181
|
}
|
|
184
182
|
}
|
|
185
|
-
/**
|
|
186
|
-
* Scan only known test-artifact directories for trace files.
|
|
187
|
-
* Root-level files are checked at depth 0; named test-artifact subdirs are scanned
|
|
188
|
-
* at full depth. This prevents picking up committed demo assets (e.g. frontend/public/traces/).
|
|
189
|
-
*/
|
|
190
|
-
function scanTraceArtifactDirs(repositoryPath, predicate, results) {
|
|
191
|
-
// Root-level files only (depth 0)
|
|
192
|
-
scanDir(repositoryPath, predicate, 0, results);
|
|
193
|
-
// Named test-artifact subdirectories (full depth)
|
|
194
|
-
for (const dir of TRACE_SCAN_DIRS) {
|
|
195
|
-
const full = path.join(repositoryPath, dir);
|
|
196
|
-
if (fs.existsSync(full)) {
|
|
197
|
-
scanDir(full, predicate, 5, results);
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
183
|
/**
|
|
202
184
|
* Discover trace JSON files in a repository path.
|
|
203
185
|
*/
|
|
@@ -209,12 +191,12 @@ export function discoverTraceFiles(repositoryPath) {
|
|
|
209
191
|
if (fs.existsSync(full))
|
|
210
192
|
found.push(full);
|
|
211
193
|
}
|
|
212
|
-
// Recursive scan
|
|
194
|
+
// Recursive scan: any *trace*.json|har, but exclude scenario files and test output files
|
|
213
195
|
const isTraceJson = (name) => /\.(json|har)$/i.test(name) &&
|
|
214
196
|
/trace/i.test(name) &&
|
|
215
197
|
!/^scenario_/i.test(name) &&
|
|
216
198
|
!/_test\.(json|har)$/i.test(name);
|
|
217
|
-
|
|
199
|
+
scanDir(repositoryPath, isTraceJson, 5, found);
|
|
218
200
|
// Deduplicate and sort for deterministic ordering
|
|
219
201
|
return [...new Set(found)].sort();
|
|
220
202
|
}
|
|
@@ -227,6 +209,6 @@ export function discoverPlaywrightZips(repositoryPath) {
|
|
|
227
209
|
const isPlaywrightZip = (name) => /\.zip$/i.test(name) && (/playwright/i.test(name) ||
|
|
228
210
|
/_trace\.zip$/i.test(name) ||
|
|
229
211
|
name.toLowerCase() === "trace.zip");
|
|
230
|
-
|
|
212
|
+
scanDir(repositoryPath, isPlaywrightZip, 5, found);
|
|
231
213
|
return [...new Set(found)].sort();
|
|
232
214
|
}
|
package/package.json
CHANGED
|
@@ -1,140 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Unit tests for trace-parser.ts — specifically the scanTraceArtifactDirs scoping
|
|
3
|
-
* introduced to prevent demo/fixture files (e.g. frontend/public/traces/) from being
|
|
4
|
-
* misidentified as testbot-generated traces.
|
|
5
|
-
*/
|
|
6
|
-
import * as fs from "fs";
|
|
7
|
-
import * as os from "os";
|
|
8
|
-
import * as path from "path";
|
|
9
|
-
import { discoverTraceFiles, discoverPlaywrightZips } from "./trace-parser.js";
|
|
10
|
-
// ---------------------------------------------------------------------------
|
|
11
|
-
// Helpers
|
|
12
|
-
// ---------------------------------------------------------------------------
|
|
13
|
-
function mkdirp(dir) {
|
|
14
|
-
fs.mkdirSync(dir, { recursive: true });
|
|
15
|
-
}
|
|
16
|
-
function touch(file) {
|
|
17
|
-
mkdirp(path.dirname(file));
|
|
18
|
-
fs.writeFileSync(file, "");
|
|
19
|
-
}
|
|
20
|
-
function withTempRepo(fn) {
|
|
21
|
-
const dir = fs.mkdtempSync(path.join(os.tmpdir(), "trace-parser-test-"));
|
|
22
|
-
try {
|
|
23
|
-
fn(dir);
|
|
24
|
-
}
|
|
25
|
-
finally {
|
|
26
|
-
fs.rmSync(dir, { recursive: true, force: true });
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
// ---------------------------------------------------------------------------
|
|
30
|
-
// discoverPlaywrightZips — scoping tests
|
|
31
|
-
// ---------------------------------------------------------------------------
|
|
32
|
-
describe("discoverPlaywrightZips — scanTraceArtifactDirs scoping", () => {
|
|
33
|
-
it("does NOT discover playwright zip in frontend/public/traces/ (demo fixture dir)", () => {
|
|
34
|
-
withTempRepo(repo => {
|
|
35
|
-
touch(path.join(repo, "frontend", "public", "traces", "ui_test_playwright.zip"));
|
|
36
|
-
expect(discoverPlaywrightZips(repo)).toEqual([]);
|
|
37
|
-
});
|
|
38
|
-
});
|
|
39
|
-
it("discovers playwright zip in tests/ (test-artifact dir)", () => {
|
|
40
|
-
withTempRepo(repo => {
|
|
41
|
-
const zip = path.join(repo, "tests", "ui_test_playwright.zip");
|
|
42
|
-
touch(zip);
|
|
43
|
-
expect(discoverPlaywrightZips(repo)).toContain(zip);
|
|
44
|
-
});
|
|
45
|
-
});
|
|
46
|
-
it("discovers playwright zip in .skyramp/ (test-artifact dir)", () => {
|
|
47
|
-
withTempRepo(repo => {
|
|
48
|
-
const zip = path.join(repo, ".skyramp", "recording_playwright.zip");
|
|
49
|
-
touch(zip);
|
|
50
|
-
expect(discoverPlaywrightZips(repo)).toContain(zip);
|
|
51
|
-
});
|
|
52
|
-
});
|
|
53
|
-
it("discovers playwright zip in e2e/ (test-artifact dir)", () => {
|
|
54
|
-
withTempRepo(repo => {
|
|
55
|
-
const zip = path.join(repo, "e2e", "flow_playwright.zip");
|
|
56
|
-
touch(zip);
|
|
57
|
-
expect(discoverPlaywrightZips(repo)).toContain(zip);
|
|
58
|
-
});
|
|
59
|
-
});
|
|
60
|
-
it("discovers playwright zip in playwright/ (test-artifact dir)", () => {
|
|
61
|
-
withTempRepo(repo => {
|
|
62
|
-
const zip = path.join(repo, "playwright", "trace.zip");
|
|
63
|
-
touch(zip);
|
|
64
|
-
expect(discoverPlaywrightZips(repo)).toContain(zip);
|
|
65
|
-
});
|
|
66
|
-
});
|
|
67
|
-
it("does NOT discover zip in src/ (not a test-artifact dir)", () => {
|
|
68
|
-
withTempRepo(repo => {
|
|
69
|
-
touch(path.join(repo, "src", "recordings", "ui_playwright.zip"));
|
|
70
|
-
expect(discoverPlaywrightZips(repo)).toEqual([]);
|
|
71
|
-
});
|
|
72
|
-
});
|
|
73
|
-
it("does NOT discover zip in deeply nested non-test dir", () => {
|
|
74
|
-
withTempRepo(repo => {
|
|
75
|
-
touch(path.join(repo, "frontend", "src", "assets", "demo_playwright.zip"));
|
|
76
|
-
expect(discoverPlaywrightZips(repo)).toEqual([]);
|
|
77
|
-
});
|
|
78
|
-
});
|
|
79
|
-
});
|
|
80
|
-
// ---------------------------------------------------------------------------
|
|
81
|
-
// discoverTraceFiles — scoping tests
|
|
82
|
-
// ---------------------------------------------------------------------------
|
|
83
|
-
describe("discoverTraceFiles — scanTraceArtifactDirs scoping", () => {
|
|
84
|
-
it("does NOT discover trace.json nested under frontend/public/traces/", () => {
|
|
85
|
-
withTempRepo(repo => {
|
|
86
|
-
touch(path.join(repo, "frontend", "public", "traces", "backend_trace.json"));
|
|
87
|
-
const found = discoverTraceFiles(repo);
|
|
88
|
-
// fixed-name root candidates don't match "backend_trace.json", and scan won't reach frontend/
|
|
89
|
-
expect(found.some(f => f.includes("frontend"))).toBe(false);
|
|
90
|
-
});
|
|
91
|
-
});
|
|
92
|
-
it("discovers trace.json in tests/ dir", () => {
|
|
93
|
-
withTempRepo(repo => {
|
|
94
|
-
const f = path.join(repo, "tests", "backend_trace.json");
|
|
95
|
-
touch(f);
|
|
96
|
-
expect(discoverTraceFiles(repo)).toContain(f);
|
|
97
|
-
});
|
|
98
|
-
});
|
|
99
|
-
it("discovers trace.json in .skyramp/ dir", () => {
|
|
100
|
-
withTempRepo(repo => {
|
|
101
|
-
const f = path.join(repo, ".skyramp", "skyramp_trace.json");
|
|
102
|
-
touch(f);
|
|
103
|
-
expect(discoverTraceFiles(repo)).toContain(f);
|
|
104
|
-
});
|
|
105
|
-
});
|
|
106
|
-
it("discovers root-level trace.json", () => {
|
|
107
|
-
withTempRepo(repo => {
|
|
108
|
-
const f = path.join(repo, "trace.json");
|
|
109
|
-
touch(f);
|
|
110
|
-
expect(discoverTraceFiles(repo)).toContain(f);
|
|
111
|
-
});
|
|
112
|
-
});
|
|
113
|
-
it("discovers root-level skyramp_traces.json via fixed-name check", () => {
|
|
114
|
-
withTempRepo(repo => {
|
|
115
|
-
const f = path.join(repo, "skyramp_traces.json");
|
|
116
|
-
touch(f);
|
|
117
|
-
expect(discoverTraceFiles(repo)).toContain(f);
|
|
118
|
-
});
|
|
119
|
-
});
|
|
120
|
-
it("does NOT discover scenario_ json files (excluded by predicate)", () => {
|
|
121
|
-
withTempRepo(repo => {
|
|
122
|
-
touch(path.join(repo, "tests", "scenario_orders_trace.json"));
|
|
123
|
-
expect(discoverTraceFiles(repo)).toEqual([]);
|
|
124
|
-
});
|
|
125
|
-
});
|
|
126
|
-
it("does NOT discover _test.json files (excluded by predicate)", () => {
|
|
127
|
-
withTempRepo(repo => {
|
|
128
|
-
touch(path.join(repo, "tests", "orders_trace_test.json"));
|
|
129
|
-
expect(discoverTraceFiles(repo)).toEqual([]);
|
|
130
|
-
});
|
|
131
|
-
});
|
|
132
|
-
it("results are deduplicated when fixed-name and scan both find the same root file", () => {
|
|
133
|
-
withTempRepo(repo => {
|
|
134
|
-
const f = path.join(repo, "trace.json");
|
|
135
|
-
touch(f);
|
|
136
|
-
const found = discoverTraceFiles(repo);
|
|
137
|
-
expect(found.filter(x => x === f)).toHaveLength(1);
|
|
138
|
-
});
|
|
139
|
-
});
|
|
140
|
-
});
|