@skyramp/mcp 0.0.64-rc.6 → 0.0.64-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,16 +44,7 @@ function computeTiebreakerSeed(endpoints, diffFiles) {
44
44
  return crypto.createHash("sha256").update(canonical).digest("hex").slice(0, 8);
45
45
  }
46
46
  // ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
47
- function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, isDiffScope = false, isFrontendProject = false, isFrontendOnlyProject = false) {
48
- // Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
49
- // Capped so E2E+UI together never exceed topN.
50
- // Referenced in supplementNote below, but the ternary conditions that use them
51
- // (`isFrontendProject && !isDiffScope`) are always false in PR/diff mode.
52
- const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
53
- const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
54
- const slotsFloor = Math.floor(topN / 2);
55
- const minE2ESlots = Math.min(rawE2E, slotsFloor);
56
- const minUISlots = Math.min(rawUI, Math.max(0, topN - minE2ESlots));
47
+ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false) {
57
48
  const generateItems = scored.slice(0, Math.min(maxGen, scored.length));
58
49
  const additionalItems = scored.slice(maxGen, topN);
59
50
  const authRef = authHeaderValue
@@ -118,10 +109,9 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
118
109
  prereqNote);
119
110
  }
120
111
  }).join("\n\n");
121
- // Reserve slots for UI/E2E ADDITIONAL recommendations on mixed PRs.
122
- // E2E requires traces to generateonly reserve the slot when traces are available.
123
- // UI can be recommended without traces (agent can record inline).
124
- const needsE2ESlot = hasFrontendChanges && !isUIOnlyPR && hasTraces;
112
+ // For mixed PRs, always reserve slots for UI and E2E recommendations regardless of whether
113
+ // traces already existthe user can record them later or the bot can record during the run.
114
+ const needsE2ESlot = hasFrontendChanges && !isUIOnlyPR;
125
115
  const needsUISlot = hasFrontendChanges && !isUIOnlyPR;
126
116
  const frontendSlots = (needsE2ESlot ? 1 : 0) + (needsUISlot ? 1 : 0);
127
117
  const backendAdditionalItems = frontendSlots > 0
@@ -153,159 +143,8 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
153
143
  })() : "";
154
144
  const supplementCount = topN - generateItems.length - backendAdditionalItems.length - frontendSlots;
155
145
  const supplementNote = supplementCount > 0
156
- ? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them in this priority order — exhaust each tier before moving to the next:\n\n**Tier 1:** Edge-case and error-path tests for endpoints already in the ${isDiffScope ? "GENERATE set" : "list"} — boundary values for numeric fields (e.g. 0%, 100%, >100% discount), invalid/non-existent IDs (→ 404), empty arrays where a minimum is required, missing required fields (→ 422), auth boundary (call without Authorization header → 403/401).\n\n**Tier 2:** Auth-boundary contract tests for any endpoint not yet covered.\n\n**Tier 3:** Cross-resource integration tests — ONLY when one resource's POST body contains the other's \`_id\` field. NEVER pair resources where neither POST body has the other's ID.\n\n**Tier 4:** CRUD lifecycle tests for any resource not yet covered.\n\nUse the same 5-dimension rubric to assign priority (HIGH/MEDIUM/LOW), testType, and category. For each supplement item, apply the same source-code enrichment from Step 1 — use real field names from the route handler, not generic placeholders.${isFrontendOnlyProject && !isDiffScope ? ` Since this is a frontend repo, the supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.` : isFrontendProject && !isDiffScope ? ` Since this is a full-stack repo, the supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\` — full browser-to-backend flow) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\` — component-level interaction flows). Add these before exhausting backend tiers.` : hasFrontendChanges && !isUIOnlyPR ? " Since this PR has frontend changes, at least 1 of these should be a UI or E2E test targeting the changed components." : ""} Do NOT produce fewer than ${topN} total.`
146
+ ? `\n**REQUIRED — You MUST add ${supplementCount} more to reach the total of ${topN}.** Draft them from endpoint interactions and source code patterns not yet covered. Use the same 5-dimension rubric and quality gate to assign priority (HIGH/MEDIUM/LOW), testType, and category.${hasFrontendChanges && !isUIOnlyPR ? " Since this PR has frontend changes, at least 1 of these should be a UI or E2E test targeting the changed components." : ""} Do NOT produce fewer than ${topN} total.`
157
147
  : "";
158
- // ── Full-repo mode: recommendations only, no execution ──────────────────
159
- if (!isDiffScope) {
160
- const toTitle = (name) => name.replace(/-/g, " ").replace(/\b\w/g, c => c.toUpperCase());
161
- // Coverage ranking (highest to lowest breadth):
162
- // E2E first: full browser-to-backend flow — exercises both frontend and backend.
163
- // UI second: frontend components call backend APIs — also exercises backend.
164
- // Integration third: backend API chains validated directly.
165
- // Contract last: single-endpoint boundary only.
166
- const TYPE_ORDER = ["e2e", "ui", "integration", "contract"];
167
- const TYPE_LABEL = {
168
- e2e: "E2E", ui: "UI", integration: "Integration", contract: "Contract",
169
- };
170
- // All scored items up to topN, already sorted by priority/novelty
171
- const allItems = scored.slice(0, topN);
172
- // Group by test type while preserving priority ordering within each group
173
- const byType = new Map();
174
- for (const t of TYPE_ORDER)
175
- byType.set(t, []);
176
- for (const item of allItems) {
177
- const t = item.scenario.testType ?? (item.scenario.steps.length === 1 ? "contract" : "integration");
178
- if (!byType.has(t))
179
- byType.set(t, []);
180
- byType.get(t).push(item);
181
- }
182
- const renderItem = (item, rank) => {
183
- const s = item.scenario;
184
- const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
185
- const title = toTitle(s.scenarioName);
186
- if (testType === "contract") {
187
- const step = s.steps[0];
188
- const endpointURL = `${baseUrl}${step.path}`;
189
- const isBodyMethod = ["POST", "PUT", "PATCH"].includes(step.method);
190
- const dataParam = isBodyMethod
191
- ? `, requestData: <${step.method} ${step.path} required fields from source code>`
192
- : "";
193
- return [
194
- `**${rank}. ${title}**`,
195
- ` ${s.description}`,
196
- ` ${step.method} ${step.path} → ${step.expectedStatusCode}`,
197
- ` Tool: \`skyramp_contract_test_generation({ endpointURL: "${endpointURL}", method: "${step.method}"${authRef}${dataParam} })\``,
198
- ` From source: fill in requestData field names and the specific production boundary this validates`,
199
- ].join("\n");
200
- }
201
- else {
202
- const stepLines = s.steps.map(st => {
203
- const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
204
- const bodyHint = isBody ? ` — body: <${st.method} ${st.path} required fields from source>` : "";
205
- return ` ${st.order}. ${st.method} ${st.path} → ${st.expectedStatusCode}: ${st.description}${bodyHint}`;
206
- }).join("\n");
207
- const toolCalls = s.steps.map(st => {
208
- const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
209
- const dataParam = isBody
210
- ? `, requestBody: <${st.method} ${st.path} required fields from source>`
211
- : "";
212
- return ` skyramp_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${s.scenarioName}", baseURL: "${baseUrl}", method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${scenarioAuthRef}${dataParam} })`;
213
- }).join("\n");
214
- // E2E and UI use trace-based generation, not the scenario pipeline.
215
- // Only emit per-step skyramp_scenario_test_generation calls for integration type.
216
- const isTraceBased = testType === "e2e" || testType === "ui";
217
- const finalTool = testType === "e2e"
218
- ? `skyramp_e2e_test_generation({ playwrightZip: "<trace zip path>", traceFile: "<backend trace path>"${authHeaderOnlyRef} })`
219
- : testType === "ui"
220
- ? `skyramp_ui_test_generation({ playwrightZip: "<trace zip path>"${authHeaderOnlyRef} })`
221
- : `skyramp_integration_test_generation({ scenarioFile: "scenario_${s.scenarioName}.json"${authHeaderOnlyRef} })`;
222
- const toolCallsBlock = isTraceBased
223
- ? ` ${finalTool}`
224
- : `${toolCalls}\n ${finalTool}`;
225
- return [
226
- `**${rank}. ${title}**`,
227
- ` ${s.description}`,
228
- ` Steps:`,
229
- stepLines,
230
- ` Tool calls:`,
231
- toolCallsBlock,
232
- ` From source: fill in requestBody field values and assert all computed response fields`,
233
- ].join("\n");
234
- }
235
- };
236
- const sections = TYPE_ORDER
237
- .filter(t => (byType.get(t) ?? []).length > 0)
238
- .map(t => {
239
- const items = byType.get(t);
240
- const label = TYPE_LABEL[t];
241
- let globalRank = 0;
242
- for (const prev of TYPE_ORDER) {
243
- if (prev === t)
244
- break;
245
- globalRank += (byType.get(prev) ?? []).length;
246
- }
247
- const entries = items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n");
248
- return `### ${label} (${items.length})\n\n${entries}`;
249
- })
250
- .join("\n\n");
251
- const repoSupplementNote = supplementNote; // already built above with isDiffScope=false tier ordering
252
- return `## Test Recommendations (${topN} total)
253
-
254
- > **Repo mode — no tests are executed.** Use the tool calls below to generate any recommendation on demand.
255
- > Highest-value tests appear first within each type. Use the "From source" hint in each item to fill in field names and assertions before calling the tool.
256
-
257
- **Step 1 — Source-Code Enrichment (MANDATORY before presenting anything)**
258
- For each endpoint listed in the Repository Context above, read the route handler source code. Look for:
259
- - **All required request body fields** (names and types) for POST/PUT/PATCH — use in step descriptions and tool call params
260
- - **Computed/derived response fields** (e.g. \`total_amount\`, \`discount_percent\`) and their formulas — assert these
261
- - **Auth middleware** (HTTPBearer, \`Depends(get_current_user)\`, \`@UseGuards\`, \`jwt.verify\`) — set \`authHeader\`/\`authScheme\`; FastAPI HTTPBearer → **403** (not 401)
262
- - **Storage backend** — if Redis or schema-less, discard unique-constraint and cascade-delete scenarios
263
- - **Delete behavior** — hard-delete → 204; soft-delete/cancel → 200
264
-
265
- Do NOT present generic placeholders. Replace every \`<... from source>\` hint with actual field names and realistic values.
266
-
267
- ${buildTestPatternGuidelines()}
268
-
269
- ${buildTestExamples()}
270
-
271
- **Cascade vs referential integrity:** Keep only the scenario that matches the actual FK delete policy in source (CASCADE → cascade-delete; RESTRICT/no cascade → delete-blocked). Remove the other silently — do NOT create a "Removed Recommendations" or "Not Applicable" section.
272
-
273
- **Unique constraints:** If the storage backend is Redis, in-memory, or schema-less with no explicit \`UNIQUE\` index, discard unique-constraint scenarios entirely and replace them with a different high-value test. Do NOT list removed scenarios.
274
-
275
- **NEVER create a "Removed Recommendations", "Not Applicable", or similar section.** If a scenario is inapplicable, silently replace it with an equivalent-priority scenario from the supplement tiers. The output must contain ONLY the ${topN} recommendations.
276
-
277
- ${buildTestQualityCriteria()}
278
-
279
- **5-dimension rubric — use to assign priority for supplement items:**
280
- | Dimension | What to assess |
281
- | Production Safety | Guards a critical boundary (auth, unique constraint, cascade delete, data integrity, breaking migration)? → HIGH |
282
- | Bug-Finding Potential | Targets a known failure mode (race condition, data consistency, state transition, cascade effect)? → HIGH |
283
- | User Journey Relevance | Reflects how real users interact (from traces, business flows, critical paths)? → HIGH or MEDIUM |
284
- | Coverage Gap | Addresses an area with zero existing test coverage? → bump up one tier |
285
- | Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
286
-
287
- **Per-recommendation format (apply to ALL items):**
288
- - Title and one-sentence description of what it validates (business rule, not just "tests the endpoint")
289
- - Steps with concrete field names and realistic values derived from source code
290
- - Ready-to-use tool call — replace all \`<...>\` placeholders with real values before presenting
291
- - "From source" note — the specific production risk or business rule this prevents
292
-
293
- **MANDATORY: Every pre-ranked item listed above MUST appear in your output — do not drop or skip any.**
294
-
295
- ${sections}
296
- ${repoSupplementNote}
297
-
298
- **Test type mix — MANDATORY:**
299
- ${isFrontendOnlyProject
300
- ? `This is a frontend repo. Focus on E2E and UI tests only — E2E covers the full browser-to-backend flow (highest coverage), UI exercises frontend components that call backend APIs. Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.`
301
- : isFrontendProject
302
- ? `This is a full-stack repo. Coverage ranking: E2E (full browser-to-backend flow) > UI (frontend exercises backend APIs) > Integration (backend chains) > Contract (single endpoint). Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`), in addition to backend integration and contract tests.`
303
- : `Focus on integration and contract tests for all API endpoints.`}
304
- **No smoke tests. No fuzz tests.**
305
-
306
- **You MUST present EXACTLY ${topN} recommendations. Do NOT execute any tests. Do NOT produce fewer than ${topN}.**`;
307
- }
308
- // ── PR / branch-diff mode: execution plan ────────────────────────────────
309
148
  return `## Execution Plan
310
149
  Seed: ${seed} | Endpoints: ${endpointCount} | Budget: ${generateItems.length} generate + ${Math.max(topN - generateItems.length, 0)} additional = ${topN} total
311
150
 
@@ -531,7 +370,7 @@ ${detailBlocks}
531
370
  const errorA = a.scenario.steps.some(s => s.interactionType === "error" || s.interactionType === "edge-case") ? 1 : 0;
532
371
  const errorB = b.scenario.steps.some(s => s.interactionType === "error" || s.interactionType === "edge-case") ? 1 : 0;
533
372
  if (errorB !== errorA)
534
- return errorA - errorB;
373
+ return errorB - errorA;
535
374
  // Use locale-independent comparison to avoid runtime-locale non-determinism
536
375
  const nameA = a.scenario.scenarioName;
537
376
  const nameB = b.scenario.scenarioName;
@@ -588,10 +427,7 @@ Do not churn recommendations without cause.
588
427
  `;
589
428
  }
590
429
  else if (scored.length > 0) {
591
- const projectType = analysis.projectClassification.projectType;
592
- const isFrontendProject = projectType === "full-stack" || projectType === "frontend";
593
- const isFrontendOnlyProject = projectType === "frontend";
594
- mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, isDiffScope, isFrontendProject, isFrontendOnlyProject);
430
+ mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces);
595
431
  }
596
432
  else {
597
433
  mainSection = `
@@ -665,12 +501,13 @@ and adjust the test approach if needed.
665
501
  historyBody += `
666
502
  ### Previously Recommended (not generated)
667
503
  ${recLines}
668
- **Stability rule**: Carry forward previously recommended tests unchanged in
669
- additionalRecommendations if they still apply match by scenarioName (for multi-step
670
- scenarios) or by endpoint (for single-endpoint tests). Re-derive category and priority
671
- from the test content. Drop only if the underlying endpoint was removed, business logic
672
- changed, or the test is now covered by a generated test.
673
- GENERATE items are always executed regardless of prior recommendations — do not suppress them.
504
+ **Stability rule**: If a previously recommended test still applies to the current code
505
+ (the endpoint exists, the business logic hasn't changed), carry it forward in your
506
+ additionalRecommendations match by scenarioName (for multi-step scenarios) or by
507
+ endpoint (for single-endpoint tests). Re-derive category and priority from the test
508
+ content. Do NOT drop a previous recommendation unless the underlying code was removed
509
+ or the test is now covered by a generated test.
510
+ Only add NEW recommendations for code paths introduced in the latest commit.
674
511
  `;
675
512
  }
676
513
  prHistorySection = `
@@ -678,11 +515,8 @@ GENERATE items are always executed regardless of prior recommendations — do no
678
515
  Tests from prior bot runs are still in the working tree — the maintenance pipeline
679
516
  (Task 2) keeps them up to date. Use the history below to **avoid duplicating** existing
680
517
  coverage and to fill gaps:
681
- - **GENERATE section is unaffected by prior history** always execute ALL pre-ranked
682
- GENERATE items regardless of what was generated in prior runs. The execution pipeline
683
- handles deduplication at the file level.
684
- - Tests listed under "Previously Generated Tests" are maintained automatically by Task 2 —
685
- do NOT include them in additionalRecommendations.
518
+ - **Do NOT re-recommend** tests listed under "Previously Generated Tests" they already
519
+ exist and are maintained automatically.
686
520
  - **Carry forward** previously recommended-but-not-generated tests unchanged in
687
521
  additionalRecommendations if they still apply. Promote the highest-priority ones
688
522
  into generation slots if capacity allows.
@@ -202,15 +202,14 @@ describe("buildRecommendationPrompt — PR History section", () => {
202
202
  expect(prompt).toContain("Promote the highest-priority ones");
203
203
  expect(prompt).toContain("into generation slots if capacity allows");
204
204
  });
205
- it("instructs that GENERATE is unaffected by prior history for implemented tests (Gap 4)", () => {
205
+ it("includes do-not-re-recommend instruction for implemented tests", () => {
206
206
  const ctx = makePRContext({
207
207
  previousRecommendations: [
208
208
  { testType: "contract", endpoint: "GET /api/items", status: "implemented", commentId: "1" },
209
209
  ],
210
210
  });
211
211
  const prompt = buildRecommendationPrompt(minimalAnalysis(), "current_branch_diff", 10, ctx);
212
- expect(prompt).toContain("GENERATE section is unaffected by prior history");
213
- expect(prompt).not.toContain("Do NOT re-recommend");
212
+ expect(prompt).toContain("Do NOT re-recommend");
214
213
  expect(prompt).toContain("Previously Generated Tests");
215
214
  });
216
215
  it("de-duplicates multi-step scenario entries to one line per scenario", () => {
@@ -282,31 +281,29 @@ function minimalScenario(overrides = {}) {
282
281
  };
283
282
  }
284
283
  describe("buildRecommendationPrompt — Stability and supplement section", () => {
285
- // Recommendation Stability is a PR-mode (branch_diff) concept carry-forward across bot runs.
286
- // Full-repo mode is presentation-only; there is no previous-run state to carry forward.
287
- it("includes Recommendation Stability section in output when scenarios exist (PR mode)", () => {
284
+ it("includes Recommendation Stability section in output when scenarios exist", () => {
288
285
  const analysis = minimalAnalysis({
289
286
  businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
290
287
  });
291
- const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10);
288
+ const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
292
289
  expect(prompt).toContain("## Recommendation Stability");
293
290
  });
294
- it("stability section uses scenarioName/endpoint matching strategy (PR mode)", () => {
291
+ it("stability section uses scenarioName/endpoint matching strategy", () => {
295
292
  const analysis = minimalAnalysis({
296
293
  businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
297
294
  });
298
- const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10);
295
+ const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
299
296
  const stabilityStart = prompt.indexOf("## Recommendation Stability");
300
297
  const stabilityBlock = prompt.slice(stabilityStart, stabilityStart + 500);
301
298
  expect(stabilityBlock).toContain("scenarioName");
302
299
  expect(stabilityBlock).toContain("endpoint");
303
300
  expect(stabilityBlock).toContain("Re-derive category and priority");
304
301
  });
305
- it("stability section specifies when to drop a recommendation (PR mode)", () => {
302
+ it("stability section specifies when to drop a recommendation", () => {
306
303
  const analysis = minimalAnalysis({
307
304
  businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
308
305
  });
309
- const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10);
306
+ const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
310
307
  expect(prompt).toContain("target endpoint was removed");
311
308
  expect(prompt).toContain("business logic changed");
312
309
  expect(prompt).toContain("covered by a generated test");
@@ -324,12 +321,12 @@ describe("buildRecommendationPrompt — Stability and supplement section", () =>
324
321
  it("MAX_TESTS_TO_GENERATE is 3", () => {
325
322
  expect(MAX_TESTS_TO_GENERATE).toBe(3);
326
323
  });
327
- it("uses MAX_CRITICAL_TESTS in category-aware selection rules (PR mode)", () => {
324
+ it("uses MAX_CRITICAL_TESTS in category-aware selection rules", () => {
328
325
  const analysis = minimalAnalysis({
329
326
  businessContext: { mainPurpose: "Test API", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [minimalScenario()] },
330
327
  });
331
- // MAX_CRITICAL_TESTS applies to PR mode (GENERATE items) — full_repo mode only presents, does not execute
332
- const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 10);
328
+ const prompt = buildRecommendationPrompt(analysis, "full_repo", 10);
329
+ // The critical-category minimum line references MAX_CRITICAL_TESTS (= 3)
333
330
  expect(prompt).toContain("GENERATE items MUST be from HIGH-priority categories");
334
331
  });
335
332
  });
@@ -372,569 +369,3 @@ describe("PATH_PARAM_UUID_GUIDANCE — no hardcoded UUID anchor", () => {
372
369
  expect(prompt).not.toMatch(UUID_V4_REGEX);
373
370
  });
374
371
  });
375
- // ---------------------------------------------------------------------------
376
- // Regression tests — PR #110 quality baseline
377
- //
378
- // Guard against regressions in recommendation quality. These tests assert that
379
- // the key signals that made PR #110's recommendations excellent are present in
380
- // both full_repo and branch_diff (PR) modes.
381
- // Baseline: https://github.com/letsramp/demoshop-fullstack/pull/110
382
- // ---------------------------------------------------------------------------
383
- function mockDiffScenario(overrides = {}) {
384
- return {
385
- scenarioName: "orders-update-with-discount",
386
- description: "PUT /api/v1/orders/{order_id} with discount_percent — verifies total_amount formula",
387
- category: "business_rule",
388
- priority: "high",
389
- steps: [
390
- { order: 1, method: "POST", path: "/api/v1/products", expectedStatusCode: 201, description: "Create product", interactionType: "success" },
391
- { order: 2, method: "POST", path: "/api/v1/orders", expectedStatusCode: 201, description: "Create order", interactionType: "success", chainsFrom: { sourceField: "id", sourceStep: 1, sourceLocation: "body", targetParam: "product_id", targetLocation: "body" } },
392
- { order: 3, method: "PUT", path: "/api/v1/orders/{order_id}", expectedStatusCode: 200, description: "Apply discount", interactionType: "success", chainsFrom: { sourceField: "order_id", sourceStep: 2, sourceLocation: "body", targetParam: "order_id", targetLocation: "path" } },
393
- ],
394
- chainingKeys: ["id", "order_id"],
395
- requiresAuth: true,
396
- estimatedComplexity: "moderate",
397
- testType: "integration",
398
- ...overrides,
399
- };
400
- }
401
- function analysisWithScenario(scope) {
402
- const base = minimalAnalysis({
403
- businessContext: {
404
- mainPurpose: "E-commerce demo",
405
- userFlows: [],
406
- dataFlows: [],
407
- integrationPatterns: [],
408
- draftedScenarios: [mockDiffScenario()],
409
- },
410
- });
411
- if (scope === "current_branch_diff") {
412
- return {
413
- ...base,
414
- branchDiffContext: {
415
- currentBranch: "shiny/edit-order",
416
- baseBranch: "main",
417
- changedFiles: ["backend/app/routers/orders.py"],
418
- newEndpoints: [{
419
- path: "/api/v1/orders/{order_id}",
420
- methods: [{ method: "PUT", sourceFile: "orders.py", interactionCount: 3 }],
421
- }],
422
- modifiedEndpoints: [],
423
- affectedServices: ["orders"],
424
- },
425
- };
426
- }
427
- return base;
428
- }
429
- describe("PR #110 quality baseline — full_repo mode", () => {
430
- let prompt;
431
- beforeAll(() => { prompt = buildRecommendationPrompt(analysisWithScenario("full_repo"), "full_repo", 20); });
432
- it("source enrichment targets each endpoint's route handler, not 'changed files'", () => {
433
- expect(prompt).toContain("Source-Code Enrichment");
434
- expect(prompt).toContain("route handler");
435
- expect(prompt).not.toContain("Read the source code for ALL changed files");
436
- });
437
- it("includes test pattern guidelines for quality anchoring", () => {
438
- expect(prompt).toContain("Test Pattern Guidelines");
439
- });
440
- it("includes concrete impressive/deprioritise examples", () => {
441
- expect(prompt).toContain("Impressive (these catch prod bugs)");
442
- expect(prompt).toContain("Deprioritise");
443
- });
444
- it("supplement ordering puts edge cases before cross-resource (Tier 1 before Tier 3)", () => {
445
- const tier1Idx = prompt.indexOf("Tier 1");
446
- const tier3Idx = prompt.indexOf("Tier 3");
447
- expect(tier1Idx).toBeGreaterThan(-1);
448
- expect(tier3Idx).toBeGreaterThan(-1);
449
- expect(tier1Idx).toBeLessThan(tier3Idx);
450
- });
451
- it("supplement Tier 1 calls out boundary values and invalid IDs explicitly", () => {
452
- expect(prompt).toMatch(/Tier 1.*boundary values/s);
453
- expect(prompt).toMatch(/Tier 1.*invalid.*non-existent IDs/s);
454
- });
455
- it("includes 5-dimension quality rubric", () => {
456
- expect(prompt).toContain("Production Safety");
457
- expect(prompt).toContain("Bug-Finding Potential");
458
- expect(prompt).toContain("Coverage Gap");
459
- });
460
- it("includes per-recommendation format instruction", () => {
461
- // Full-repo mode hides category/priority from user output — check for format label and key fields
462
- expect(prompt).toContain("Per-recommendation format");
463
- expect(prompt).toContain("tool call");
464
- expect(prompt).toContain("From source");
465
- });
466
- it("includes unique-constraint storage gating for Redis", () => {
467
- expect(prompt).toContain("Unique constraints");
468
- expect(prompt).toContain("Redis");
469
- });
470
- });
471
- // ---------------------------------------------------------------------------
472
- // Tests — full_repo output format and execution guardrails
473
- //
474
- // Guard that full_repo mode:
475
- // - never emits execution/GENERATE language
476
- // - groups items by test type with section headers
477
- // - hides category/priority labels from user-facing rendered items
478
- // - emits "Do NOT execute any tests"
479
- // - renders pre-ranked item names
480
- // - includes cascade guidance
481
- // - scopes Tier 1 supplement to "list" (not "GENERATE set")
482
- // ---------------------------------------------------------------------------
483
- function fullRepoAnalysisWithScenarios(overrides = {}, scenarios = []) {
484
- return minimalAnalysis({
485
- businessContext: {
486
- mainPurpose: "E-commerce API",
487
- userFlows: [],
488
- dataFlows: [],
489
- integrationPatterns: [],
490
- draftedScenarios: scenarios.length > 0 ? scenarios : [mockDiffScenario()],
491
- },
492
- ...overrides,
493
- });
494
- }
495
- function makeContractScenario() {
496
- return {
497
- scenarioName: "create-product-contract",
498
- description: "POST /api/v1/products auth boundary",
499
- category: "security_boundary",
500
- priority: "high",
501
- steps: [{ order: 1, method: "POST", path: "/api/v1/products", expectedStatusCode: 201, description: "Create product", interactionType: "success" }],
502
- chainingKeys: [],
503
- requiresAuth: true,
504
- estimatedComplexity: "simple",
505
- testType: "contract",
506
- };
507
- }
508
- describe("full_repo mode — output format and execution guardrails", () => {
509
- let prompt;
510
- beforeAll(() => {
511
- prompt = buildRecommendationPrompt(fullRepoAnalysisWithScenarios({}, [mockDiffScenario(), makeContractScenario()]), "full_repo", 10);
512
- });
513
- it("does NOT contain GENERATE execution language", () => {
514
- expect(prompt).not.toContain("### GENERATE");
515
- expect(prompt).not.toContain("execute these in order");
516
- expect(prompt).not.toContain("one retry on failure then skip");
517
- });
518
- it("does NOT contain the PR-mode ADDITIONAL section header", () => {
519
- // The '### ADDITIONAL (list in additionalRecommendations...)' header is a PR-mode structural
520
- // concept; it must not appear in the full_repo grouped output.
521
- expect(prompt).not.toContain("### ADDITIONAL (list in additionalRecommendations");
522
- });
523
- it("contains explicit 'Do NOT execute any tests' instruction", () => {
524
- expect(prompt).toContain("Do NOT execute any tests");
525
- });
526
- it("contains 'Repo mode' header or preamble", () => {
527
- expect(prompt).toContain("Repo mode");
528
- });
529
- it("groups items by test type — Integration section header present", () => {
530
- expect(prompt).toMatch(/### (Integration|Contract)/);
531
- });
532
- it("E2E section appears before Integration section (E2E ranked highest coverage)", () => {
533
- const e2eIdx = prompt.indexOf("### E2E");
534
- const integrationIdx = prompt.indexOf("### Integration");
535
- // If E2E section exists, it must appear before Integration
536
- if (e2eIdx !== -1 && integrationIdx !== -1) {
537
- expect(e2eIdx).toBeLessThan(integrationIdx);
538
- }
539
- // At minimum, E2E appears before Contract
540
- const contractIdx = prompt.indexOf("### Contract");
541
- if (e2eIdx !== -1 && contractIdx !== -1) {
542
- expect(e2eIdx).toBeLessThan(contractIdx);
543
- }
544
- });
545
- it("UI section appears before Integration and Contract sections", () => {
546
- const uiIdx = prompt.indexOf("### UI");
547
- const integrationIdx = prompt.indexOf("### Integration");
548
- const contractIdx = prompt.indexOf("### Contract");
549
- if (uiIdx !== -1 && integrationIdx !== -1) {
550
- expect(uiIdx).toBeLessThan(integrationIdx);
551
- }
552
- if (uiIdx !== -1 && contractIdx !== -1) {
553
- expect(uiIdx).toBeLessThan(contractIdx);
554
- }
555
- });
556
- it("prompt forbids the LLM from creating a 'Removed Recommendations' section", () => {
557
- // The prompt must contain the 'NEVER create' instruction so the LLM doesn't add such a section
558
- expect(prompt).toContain("NEVER create a");
559
- // The prompt must NOT have an actual section heading titled 'Removed Recommendations'
560
- // (it may contain the phrase inside the NEVER instruction itself, which is expected)
561
- expect(prompt).not.toMatch(/^##+ Removed Recommendations/m);
562
- expect(prompt).not.toMatch(/^##+ Not Applicable/m);
563
- });
564
- it("rendered item does NOT contain 'priority=' label visible to user", () => {
565
- // priority= is a PR-mode label; must not appear in rendered sections
566
- expect(prompt).not.toMatch(/priority=(HIGH|MEDIUM|LOW|CRITICAL)/);
567
- });
568
- it("rendered item does NOT contain pipe-delimited category label", () => {
569
- // | category | pattern used in PR-mode GENERATE blocks
570
- expect(prompt).not.toMatch(/\| (security_boundary|business_rule|data_integrity|crud|workflow) \|/);
571
- });
572
- it("renders the pre-ranked scenario name in the output", () => {
573
- expect(prompt).toContain("orders-update-with-discount");
574
- });
575
- it("includes cascade vs referential integrity guidance", () => {
576
- expect(prompt).toContain("Cascade vs referential integrity");
577
- });
578
- it("supplement Tier 1 is scoped to 'list' (not 'GENERATE set') in full_repo", () => {
579
- // In full_repo there is no GENERATE set — supplement references the pre-ranked list
580
- expect(prompt).toMatch(/Tier 1.*list/s);
581
- expect(prompt).not.toMatch(/Tier 1.*GENERATE set/s);
582
- });
583
- it("supplement note references 5-dimension rubric for priority assignment", () => {
584
- expect(prompt).toContain("5-dimension rubric");
585
- });
586
- it("cascade guidance instructs silent removal — no 'Removed Recommendations' section", () => {
587
- // The cascade guidance must say to remove silently, not to list removed items
588
- expect(prompt).toContain("silently");
589
- expect(prompt).toContain("Do NOT list removed scenarios");
590
- });
591
- });
592
- // ---------------------------------------------------------------------------
593
- // Tests — full_repo mode: full-stack vs backend-only test mix
594
- // ---------------------------------------------------------------------------
595
- describe("full_repo mode — full-stack repo test mix", () => {
596
- function fullStackAnalysis() {
597
- return fullRepoAnalysisWithScenarios({
598
- projectClassification: {
599
- projectType: "full-stack",
600
- primaryLanguage: "TypeScript",
601
- primaryFramework: "Next.js",
602
- deploymentPattern: "full-stack",
603
- },
604
- });
605
- }
606
- function backendOnlyAnalysis() {
607
- return fullRepoAnalysisWithScenarios({
608
- projectClassification: {
609
- projectType: "rest-api",
610
- primaryLanguage: "Python",
611
- primaryFramework: "FastAPI",
612
- deploymentPattern: "traditional",
613
- },
614
- });
615
- }
616
- // topN=10 → 15% × 10 = 1.5 → round → 2 for both E2E and UI
617
- it("full-stack repo mandates percentage-based UI slots (topN=10 → ≥2)", () => {
618
- const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
619
- expect(prompt).toContain("skyramp_ui_test_generation");
620
- expect(prompt).toMatch(/at least 2 UI test/);
621
- });
622
- it("full-stack repo mandates percentage-based E2E slots (topN=10 → ≥2)", () => {
623
- const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
624
- expect(prompt).toContain("skyramp_e2e_test_generation");
625
- expect(prompt).toMatch(/at least 2 E2E test/);
626
- });
627
- // topN=20 → 15% × 20 = 3 for both E2E and UI (scales up vs fixed ≥1/≥2)
628
- it("full-stack repo scales to ≥3 E2E and ≥3 UI at topN=20", () => {
629
- const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 20);
630
- expect(prompt).toMatch(/at least 3 E2E test/);
631
- expect(prompt).toMatch(/at least 3 UI test/);
632
- });
633
- // topN=5 → 15% × 5 = 0.75 → round → 1, floor at 1
634
- it("full-stack repo floors at ≥1 E2E and ≥1 UI for small topN=5", () => {
635
- const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 5);
636
- expect(prompt).toMatch(/at least 1 E2E test/);
637
- expect(prompt).toMatch(/at least 1 UI test/);
638
- });
639
- it("full-stack repo explicitly excludes smoke and fuzz tests", () => {
640
- const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
641
- expect(prompt).toContain("No smoke tests");
642
- expect(prompt).toContain("No fuzz tests");
643
- });
644
- it("backend-only (rest-api) repo does NOT mandate UI/E2E tests", () => {
645
- const prompt = buildRecommendationPrompt(backendOnlyAnalysis(), "full_repo", 10);
646
- // Tool names appear in generic buildToolWorkflows docs — check for the mandate text instead
647
- expect(prompt).not.toMatch(/at least \d+ (UI|E2E) test/);
648
- expect(prompt).not.toContain("supplement MUST include");
649
- expect(prompt).not.toContain("full-stack repo");
650
- });
651
- it("backend-only repo focuses on integration and contract tests", () => {
652
- const prompt = buildRecommendationPrompt(backendOnlyAnalysis(), "full_repo", 10);
653
- expect(prompt).toContain("integration and contract tests");
654
- });
655
- it("backend-only repo still excludes smoke and fuzz tests", () => {
656
- const prompt = buildRecommendationPrompt(backendOnlyAnalysis(), "full_repo", 10);
657
- expect(prompt).toContain("No smoke tests");
658
- expect(prompt).toContain("No fuzz tests");
659
- });
660
- it("'frontend' project type focuses on UI/E2E only — NOT backend tests", () => {
661
- const frontendAnalysis = fullRepoAnalysisWithScenarios({
662
- projectClassification: {
663
- projectType: "frontend",
664
- primaryLanguage: "TypeScript",
665
- primaryFramework: "React",
666
- deploymentPattern: "traditional",
667
- },
668
- });
669
- const prompt = buildRecommendationPrompt(frontendAnalysis, "full_repo", 10);
670
- // topN=10 → 15% × 10 = 1.5 → round → 2 for both
671
- expect(prompt).toMatch(/at least 2 UI test/);
672
- expect(prompt).toMatch(/at least 2 E2E test/);
673
- // Should NOT say "in addition to backend integration and contract tests"
674
- expect(prompt).not.toContain("in addition to backend integration and contract tests");
675
- // Should explicitly say no integration/contract
676
- expect(prompt).toContain("Do NOT add integration or contract tests");
677
- });
678
- it("'frontend' project type says 'frontend repo' not 'full-stack repo'", () => {
679
- const frontendAnalysis = fullRepoAnalysisWithScenarios({
680
- projectClassification: {
681
- projectType: "frontend",
682
- primaryLanguage: "TypeScript",
683
- primaryFramework: "React",
684
- deploymentPattern: "traditional",
685
- },
686
- });
687
- const prompt = buildRecommendationPrompt(frontendAnalysis, "full_repo", 10);
688
- expect(prompt).toContain("frontend repo");
689
- expect(prompt).not.toContain("full-stack repo");
690
- });
691
- it("'full-stack' project type includes BOTH backend and frontend tests", () => {
692
- const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
693
- expect(prompt).toContain("full-stack repo");
694
- expect(prompt).toContain("in addition to backend integration and contract tests");
695
- });
696
- it("full-stack repo explains E2E > UI > Integration > Contract coverage ranking", () => {
697
- const prompt = buildRecommendationPrompt(fullStackAnalysis(), "full_repo", 10);
698
- expect(prompt).toContain("Coverage ranking");
699
- expect(prompt).toContain("E2E");
700
- expect(prompt).toContain("UI");
701
- });
702
- // Critical: scenarioDrafting.ts NEVER generates UI or E2E testType —
703
- // they only come from the LLM supplement. The supplement note MUST
704
- // explicitly tell the LLM to add UI/E2E for full-stack repos, otherwise
705
- // the LLM fills the supplement with backend-only tiers (edge cases, CRUD)
706
- // and never produces UI/E2E recommendations (PR #110 regression risk).
707
- it("full-stack supplement note explicitly mandates UI and E2E with percentage-based counts (PR #110 regression guard)", () => {
708
- // topN=20, 15% → 3 E2E + 3 UI mandated in the supplement note
709
- const analysis = fullRepoAnalysisWithScenarios({
710
- projectClassification: {
711
- projectType: "full-stack",
712
- primaryLanguage: "TypeScript",
713
- primaryFramework: "Next.js",
714
- deploymentPattern: "full-stack",
715
- },
716
- });
717
- const prompt = buildRecommendationPrompt(analysis, "full_repo", 20);
718
- // Tool names must appear in supplement (not just test-mix footer)
719
- const requiredIdx = prompt.indexOf("REQUIRED — You MUST add");
720
- const e2eIdx = prompt.indexOf("skyramp_e2e_test_generation");
721
- expect(requiredIdx).toBeGreaterThan(-1);
722
- expect(e2eIdx).toBeGreaterThan(-1);
723
- expect(e2eIdx).toBeGreaterThan(requiredIdx); // inside supplement note
724
- // Percentage-based count: topN=20 → 3
725
- expect(prompt).toMatch(/at least 3 E2E test/);
726
- expect(prompt).toMatch(/at least 3 UI test/);
727
- });
728
- it("backend-only repo supplement note does NOT add UI/E2E mandate", () => {
729
- const analysis = fullRepoAnalysisWithScenarios({
730
- projectClassification: {
731
- projectType: "rest-api",
732
- primaryLanguage: "Python",
733
- primaryFramework: "FastAPI",
734
- deploymentPattern: "traditional",
735
- },
736
- });
737
- const prompt = buildRecommendationPrompt(analysis, "full_repo", 20);
738
- const requiredIdx = prompt.indexOf("REQUIRED — You MUST add");
739
- if (requiredIdx === -1)
740
- return; // no supplement needed
741
- const supplementBlock = prompt.slice(requiredIdx, requiredIdx + 800);
742
- // Backend-only repos should NOT mandate UI/E2E in the supplement tiers
743
- expect(supplementBlock).not.toContain("full-stack repo, the supplement MUST include");
744
- });
745
- });
746
- // ---------------------------------------------------------------------------
747
- // Tests — full_repo mode: PR mode must NOT be affected by these changes
748
- // ---------------------------------------------------------------------------
749
- describe("full_repo mode — PR mode unchanged by full_repo changes", () => {
750
- let prPrompt;
751
- beforeAll(() => {
752
- prPrompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 10);
753
- });
754
- it("PR mode still contains GENERATE execution language", () => {
755
- expect(prPrompt).toContain("### GENERATE");
756
- });
757
- it("PR mode still shows priority= labels on GENERATE items", () => {
758
- expect(prPrompt).toMatch(/priority=(HIGH|MEDIUM|LOW|CRITICAL)/);
759
- });
760
- it("PR mode does not show 'Do NOT execute any tests'", () => {
761
- expect(prPrompt).not.toContain("Do NOT execute any tests");
762
- });
763
- it("PR mode does not show 'Repo mode' preamble", () => {
764
- expect(prPrompt).not.toContain("Repo mode — no tests are executed");
765
- });
766
- });
767
- describe("PR #110 quality baseline — branch_diff (PR) mode", () => {
768
- let prompt;
769
- beforeAll(() => { prompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 20); });
770
- it("source enrichment references changed files (not 'each endpoint')", () => {
771
- expect(prompt).toContain("Source-Code Enrichment");
772
- expect(prompt).toContain("changed files");
773
- expect(prompt).not.toContain("For each endpoint listed in the Repository Context above, read the route handler");
774
- });
775
- it("supplement Tier 1 scoped to GENERATE set", () => {
776
- expect(prompt).toMatch(/Tier 1.*GENERATE set/s);
777
- });
778
- it("supplement ordering puts edge cases before cross-resource", () => {
779
- const tier1Idx = prompt.indexOf("Tier 1");
780
- const tier3Idx = prompt.indexOf("Tier 3");
781
- expect(tier1Idx).toBeGreaterThan(-1);
782
- expect(tier3Idx).toBeGreaterThan(-1);
783
- expect(tier1Idx).toBeLessThan(tier3Idx);
784
- });
785
- it("includes cascade vs referential integrity guidance", () => {
786
- expect(prompt).toContain("Cascade vs referential integrity");
787
- });
788
- it("includes per-recommendation format requirements", () => {
789
- expect(prompt).toContain("Per-recommendation format");
790
- });
791
- it("GENERATE block present for the business_rule scenario", () => {
792
- expect(prompt).toContain("GENERATE");
793
- expect(prompt).toContain("orders-update-with-discount");
794
- });
795
- });
796
- // ---------------------------------------------------------------------------
797
- // Regression tests — v3 gap fixes
798
- // ---------------------------------------------------------------------------
799
- describe("Gap 1 — happy-path ranking: success scenarios ranked before error/edge-case scenarios", () => {
800
- function makeScenarioByInteraction(name, interactionType) {
801
- return mockDiffScenario({
802
- scenarioName: name,
803
- steps: [
804
- { order: 1, method: "POST", path: "/api/items", expectedStatusCode: interactionType === "success" ? 201 : 404, description: "step", interactionType },
805
- { order: 2, method: "GET", path: "/api/items/{id}", expectedStatusCode: interactionType === "success" ? 200 : 404, description: "verify", interactionType },
806
- { order: 3, method: "DELETE", path: "/api/items/{id}", expectedStatusCode: interactionType === "success" ? 204 : 404, description: "cleanup", interactionType },
807
- ],
808
- });
809
- }
810
- it("happy-path scenario ranked before error-path scenario in GENERATE block", () => {
811
- const analysis = {
812
- ...analysisWithScenario("current_branch_diff"),
813
- businessContext: {
814
- mainPurpose: "Test",
815
- userFlows: [], dataFlows: [], integrationPatterns: [],
816
- draftedScenarios: [
817
- makeScenarioByInteraction("error-path-scenario", "error"),
818
- makeScenarioByInteraction("happy-path-scenario", "success"),
819
- ],
820
- },
821
- };
822
- const prompt = buildRecommendationPrompt(analysis, "current_branch_diff", 5);
823
- const happyIdx = prompt.indexOf("happy-path-scenario");
824
- const errorIdx = prompt.indexOf("error-path-scenario");
825
- expect(happyIdx).toBeGreaterThan(-1);
826
- expect(errorIdx).toBeGreaterThan(-1);
827
- // Happy path should appear first (lower index = earlier in the output)
828
- expect(happyIdx).toBeLessThan(errorIdx);
829
- });
830
- });
831
- describe("Gap 2 — E2E ADDITIONAL slot gated on hasTraces", () => {
832
- function makeMixedPRAnalysis(hasTraceFiles) {
833
- // Needs draftedScenarios so scored.length > 0 and buildExecutionPlan is reached
834
- const base = analysisWithScenario("current_branch_diff");
835
- return {
836
- ...base,
837
- artifacts: {
838
- openApiSpecs: [],
839
- playwrightRecordings: [],
840
- traceFiles: hasTraceFiles ? [{ path: "/repo/tests/trace.json", format: "skyramp" }] : [],
841
- notFound: [],
842
- },
843
- branchDiffContext: {
844
- currentBranch: "test",
845
- baseBranch: "main",
846
- // frontend/components/.tsx triggers hasFrontendChanges; newEndpoints makes it a mixed PR (not UI-only)
847
- changedFiles: ["frontend/components/App.tsx", "backend/routers/orders.py"],
848
- newEndpoints: [{ path: "/api/v1/orders/{order_id}", methods: [{ method: "PUT", sourceFile: "orders.py", interactionCount: 3 }] }],
849
- modifiedEndpoints: [],
850
- affectedServices: ["orders"],
851
- },
852
- };
853
- }
854
- it("E2E [ADDITIONAL] slot present when hasTraces=true and frontend+API changes exist", () => {
855
- const prompt = buildRecommendationPrompt(makeMixedPRAnalysis(true), "current_branch_diff", 10);
856
- expect(prompt).toMatch(/\[ADDITIONAL\].*E2E/s);
857
- });
858
- it("E2E [ADDITIONAL] slot absent when hasTraces=false and frontend+API changes exist", () => {
859
- const prompt = buildRecommendationPrompt(makeMixedPRAnalysis(false), "current_branch_diff", 10);
860
- // UI slot should still be present, E2E slot should not
861
- expect(prompt).toMatch(/\[ADDITIONAL\].*UI/s);
862
- // [ADDITIONAL] E2E label must not appear (tool docs contain "E2E" but not as [ADDITIONAL] label)
863
- expect(prompt).not.toContain("[ADDITIONAL] | E2E |");
864
- });
865
- });
866
- describe("Gap 4 — PR history does NOT suppress GENERATE items on 2nd+ run", () => {
867
- it("prompt contains GENERATE-unaffected instruction when prior history exists", () => {
868
- const ctx = makePRContext({
869
- previousRecommendations: [
870
- { testType: "integration", endpoint: "POST /api/v1/orders", scenarioName: "orders-update-with-discount", status: "implemented", commentId: "1" },
871
- ],
872
- });
873
- const prompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 5, ctx);
874
- expect(prompt).toContain("GENERATE section is unaffected by prior history");
875
- });
876
- it("prompt does NOT contain old suppression text 'Do NOT re-recommend'", () => {
877
- const ctx = makePRContext({
878
- previousRecommendations: [
879
- { testType: "integration", endpoint: "POST /api/v1/orders", status: "implemented", commentId: "1" },
880
- ],
881
- });
882
- const prompt = buildRecommendationPrompt(analysisWithScenario("current_branch_diff"), "current_branch_diff", 5, ctx);
883
- expect(prompt).not.toContain("Do NOT re-recommend");
884
- });
885
- });
886
- describe("renderItem — correct tool for E2E and UI testTypes in full_repo mode", () => {
887
- function makeTypedScenario(testType) {
888
- return mockDiffScenario({
889
- scenarioName: `${testType}-scenario`,
890
- testType,
891
- steps: [
892
- { order: 1, method: "GET", path: "/api/items", expectedStatusCode: 200, description: "list items", interactionType: "success" },
893
- { order: 2, method: "POST", path: "/api/items", expectedStatusCode: 201, description: "create item", interactionType: "success" },
894
- ],
895
- });
896
- }
897
- it("integration scenario uses skyramp_integration_test_generation in full_repo", () => {
898
- const analysis = minimalAnalysis({
899
- businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("integration")] },
900
- });
901
- const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
902
- expect(prompt).toContain("skyramp_integration_test_generation");
903
- });
904
- it("e2e scenario uses skyramp_e2e_test_generation and omits scenario step calls in full_repo", () => {
905
- const analysis = minimalAnalysis({
906
- businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("e2e")] },
907
- });
908
- const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
909
- // Extract recommendation content only (before Tool Workflows docs which list all tools)
910
- const toolWorkflowsIdx = prompt.indexOf("## How to Generate Tests");
911
- const mainContent = toolWorkflowsIdx > 0 ? prompt.slice(0, toolWorkflowsIdx) : prompt;
912
- expect(mainContent).toContain("skyramp_e2e_test_generation");
913
- expect(mainContent).not.toContain("skyramp_integration_test_generation");
914
- // E2E does not use per-step scenario pipeline
915
- expect(mainContent).not.toContain("skyramp_scenario_test_generation");
916
- });
917
- it("ui scenario uses skyramp_ui_test_generation and omits scenario step calls in full_repo", () => {
918
- const analysis = minimalAnalysis({
919
- businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("ui")] },
920
- });
921
- const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
922
- // Extract recommendation content only (before Tool Workflows docs which list all tools)
923
- const toolWorkflowsIdx = prompt.indexOf("## How to Generate Tests");
924
- const mainContent = toolWorkflowsIdx > 0 ? prompt.slice(0, toolWorkflowsIdx) : prompt;
925
- expect(mainContent).toContain("skyramp_ui_test_generation");
926
- expect(mainContent).not.toContain("skyramp_integration_test_generation");
927
- // UI does not use per-step scenario pipeline
928
- expect(mainContent).not.toContain("skyramp_scenario_test_generation");
929
- });
930
- it("integration scenario still emits per-step skyramp_scenario_test_generation calls in full_repo", () => {
931
- const analysis = minimalAnalysis({
932
- businessContext: { mainPurpose: "Test", userFlows: [], dataFlows: [], integrationPatterns: [], draftedScenarios: [makeTypedScenario("integration")] },
933
- });
934
- const prompt = buildRecommendationPrompt(analysis, "full_repo", 5);
935
- const toolWorkflowsIdx = prompt.indexOf("## How to Generate Tests");
936
- const mainContent = toolWorkflowsIdx > 0 ? prompt.slice(0, toolWorkflowsIdx) : prompt;
937
- expect(mainContent).toContain("skyramp_scenario_test_generation");
938
- expect(mainContent).toContain("skyramp_integration_test_generation");
939
- });
940
- });
@@ -120,7 +120,6 @@ Generate a net-new test. Use a unique descriptive filename to avoid overwriting
120
120
  **How to generate each type (for ADD and REGENERATE):**
121
121
  - **Integration**: call \`skyramp_scenario_test_generation\` per step (sequentially), then \`skyramp_integration_test_generation\` with the scenario file.
122
122
  Scenario JSON goes in the same \`outputDir\` (e.g. \`tests/scenario_<name>.json\`), not \`.skyramp/\`.
123
- **Required fields (MANDATORY before generating any scenario step):** For every POST/PUT/PATCH step — including prerequisite/setup steps (e.g. create a product before creating an order) — read the route handler source code or OpenAPI schema to identify ALL required request body fields. Include every required field with a realistic value. Do NOT omit fields just because they are not the focus of the test.
124
123
  - **Contract**: call \`skyramp_contract_test_generation\` with \`endpointURL\`, \`method\`, and \`requestData\` for POST/PUT/PATCH.
125
124
  Pass \`apiSchema\` if an OpenAPI spec exists.
126
125
  For internal/microservice APIs: add \`providerMode: true\` to verify implementation matches the contract.
@@ -191,7 +190,7 @@ Call \`skyramp_submit_report\` with \`summaryOutputFile\`: "${summaryOutputFile}
191
190
  VERIFY: note that the test was verified as unaffected by the diff — no file changes made.
192
191
  Do NOT include files that were newly created in this run (those go in \`newTestsCreated\`).
193
192
 
194
- **additionalRecommendations** — remaining recommendations from the ranked list (MUST contain EXACTLY ${maxRecommendations - maxGenerate} items):
193
+ **additionalRecommendations** — items you could not act on (quota exceeded, missing traces, etc.):
195
194
  \`testId\` (human-readable kebab-case, e.g. \`integration-products-orders-workflow\`), \`testType\`, \`category\`, \`scenarioName\`, \`priority\` (high/medium/low — used for sorting, not displayed), \`description\`, \`steps\`, \`reasoning\`
196
195
  Keep each \`description\` to one sentence. Omit \`requestBody\` and \`responseBody\` from steps.
197
196
  Include at most 3 steps per recommendation.
@@ -156,8 +156,6 @@ export async function parseTraceFile(filePath) {
156
156
  return { entries, userFlows, format };
157
157
  }
158
158
  const SKIP_DIRS = new Set(["node_modules", ".git", "dist", "build", ".next", ".nuxt", "coverage", "__pycache__", ".venv", "venv"]);
159
- /** Known test-artifact directories where testbot-generated traces are written. */
160
- const TRACE_SCAN_DIRS = [".skyramp", "tests", "test", "e2e", "playwright"];
161
159
  /**
162
160
  * Recursively scan a directory for files matching a predicate, up to maxDepth levels.
163
161
  */
@@ -182,22 +180,6 @@ function scanDir(dir, predicate, maxDepth, results) {
182
180
  }
183
181
  }
184
182
  }
185
- /**
186
- * Scan only known test-artifact directories for trace files.
187
- * Root-level files are checked at depth 0; named test-artifact subdirs are scanned
188
- * at full depth. This prevents picking up committed demo assets (e.g. frontend/public/traces/).
189
- */
190
- function scanTraceArtifactDirs(repositoryPath, predicate, results) {
191
- // Root-level files only (depth 0)
192
- scanDir(repositoryPath, predicate, 0, results);
193
- // Named test-artifact subdirectories (full depth)
194
- for (const dir of TRACE_SCAN_DIRS) {
195
- const full = path.join(repositoryPath, dir);
196
- if (fs.existsSync(full)) {
197
- scanDir(full, predicate, 5, results);
198
- }
199
- }
200
- }
201
183
  /**
202
184
  * Discover trace JSON files in a repository path.
203
185
  */
@@ -209,12 +191,12 @@ export function discoverTraceFiles(repositoryPath) {
209
191
  if (fs.existsSync(full))
210
192
  found.push(full);
211
193
  }
212
- // Recursive scan scoped to test-artifact dirs: any *trace*.json|har, excluding scenario/test output files
194
+ // Recursive scan: any *trace*.json|har, but exclude scenario files and test output files
213
195
  const isTraceJson = (name) => /\.(json|har)$/i.test(name) &&
214
196
  /trace/i.test(name) &&
215
197
  !/^scenario_/i.test(name) &&
216
198
  !/_test\.(json|har)$/i.test(name);
217
- scanTraceArtifactDirs(repositoryPath, isTraceJson, found);
199
+ scanDir(repositoryPath, isTraceJson, 5, found);
218
200
  // Deduplicate and sort for deterministic ordering
219
201
  return [...new Set(found)].sort();
220
202
  }
@@ -227,6 +209,6 @@ export function discoverPlaywrightZips(repositoryPath) {
227
209
  const isPlaywrightZip = (name) => /\.zip$/i.test(name) && (/playwright/i.test(name) ||
228
210
  /_trace\.zip$/i.test(name) ||
229
211
  name.toLowerCase() === "trace.zip");
230
- scanTraceArtifactDirs(repositoryPath, isPlaywrightZip, found);
212
+ scanDir(repositoryPath, isPlaywrightZip, 5, found);
231
213
  return [...new Set(found)].sort();
232
214
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@skyramp/mcp",
3
- "version": "0.0.64-rc.6",
3
+ "version": "0.0.64-rc.7",
4
4
  "main": "build/index.js",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,140 +0,0 @@
1
- /**
2
- * Unit tests for trace-parser.ts — specifically the scanTraceArtifactDirs scoping
3
- * introduced to prevent demo/fixture files (e.g. frontend/public/traces/) from being
4
- * misidentified as testbot-generated traces.
5
- */
6
- import * as fs from "fs";
7
- import * as os from "os";
8
- import * as path from "path";
9
- import { discoverTraceFiles, discoverPlaywrightZips } from "./trace-parser.js";
10
- // ---------------------------------------------------------------------------
11
- // Helpers
12
- // ---------------------------------------------------------------------------
13
- function mkdirp(dir) {
14
- fs.mkdirSync(dir, { recursive: true });
15
- }
16
- function touch(file) {
17
- mkdirp(path.dirname(file));
18
- fs.writeFileSync(file, "");
19
- }
20
- function withTempRepo(fn) {
21
- const dir = fs.mkdtempSync(path.join(os.tmpdir(), "trace-parser-test-"));
22
- try {
23
- fn(dir);
24
- }
25
- finally {
26
- fs.rmSync(dir, { recursive: true, force: true });
27
- }
28
- }
29
- // ---------------------------------------------------------------------------
30
- // discoverPlaywrightZips — scoping tests
31
- // ---------------------------------------------------------------------------
32
- describe("discoverPlaywrightZips — scanTraceArtifactDirs scoping", () => {
33
- it("does NOT discover playwright zip in frontend/public/traces/ (demo fixture dir)", () => {
34
- withTempRepo(repo => {
35
- touch(path.join(repo, "frontend", "public", "traces", "ui_test_playwright.zip"));
36
- expect(discoverPlaywrightZips(repo)).toEqual([]);
37
- });
38
- });
39
- it("discovers playwright zip in tests/ (test-artifact dir)", () => {
40
- withTempRepo(repo => {
41
- const zip = path.join(repo, "tests", "ui_test_playwright.zip");
42
- touch(zip);
43
- expect(discoverPlaywrightZips(repo)).toContain(zip);
44
- });
45
- });
46
- it("discovers playwright zip in .skyramp/ (test-artifact dir)", () => {
47
- withTempRepo(repo => {
48
- const zip = path.join(repo, ".skyramp", "recording_playwright.zip");
49
- touch(zip);
50
- expect(discoverPlaywrightZips(repo)).toContain(zip);
51
- });
52
- });
53
- it("discovers playwright zip in e2e/ (test-artifact dir)", () => {
54
- withTempRepo(repo => {
55
- const zip = path.join(repo, "e2e", "flow_playwright.zip");
56
- touch(zip);
57
- expect(discoverPlaywrightZips(repo)).toContain(zip);
58
- });
59
- });
60
- it("discovers playwright zip in playwright/ (test-artifact dir)", () => {
61
- withTempRepo(repo => {
62
- const zip = path.join(repo, "playwright", "trace.zip");
63
- touch(zip);
64
- expect(discoverPlaywrightZips(repo)).toContain(zip);
65
- });
66
- });
67
- it("does NOT discover zip in src/ (not a test-artifact dir)", () => {
68
- withTempRepo(repo => {
69
- touch(path.join(repo, "src", "recordings", "ui_playwright.zip"));
70
- expect(discoverPlaywrightZips(repo)).toEqual([]);
71
- });
72
- });
73
- it("does NOT discover zip in deeply nested non-test dir", () => {
74
- withTempRepo(repo => {
75
- touch(path.join(repo, "frontend", "src", "assets", "demo_playwright.zip"));
76
- expect(discoverPlaywrightZips(repo)).toEqual([]);
77
- });
78
- });
79
- });
80
- // ---------------------------------------------------------------------------
81
- // discoverTraceFiles — scoping tests
82
- // ---------------------------------------------------------------------------
83
- describe("discoverTraceFiles — scanTraceArtifactDirs scoping", () => {
84
- it("does NOT discover trace.json nested under frontend/public/traces/", () => {
85
- withTempRepo(repo => {
86
- touch(path.join(repo, "frontend", "public", "traces", "backend_trace.json"));
87
- const found = discoverTraceFiles(repo);
88
- // fixed-name root candidates don't match "backend_trace.json", and scan won't reach frontend/
89
- expect(found.some(f => f.includes("frontend"))).toBe(false);
90
- });
91
- });
92
- it("discovers trace.json in tests/ dir", () => {
93
- withTempRepo(repo => {
94
- const f = path.join(repo, "tests", "backend_trace.json");
95
- touch(f);
96
- expect(discoverTraceFiles(repo)).toContain(f);
97
- });
98
- });
99
- it("discovers trace.json in .skyramp/ dir", () => {
100
- withTempRepo(repo => {
101
- const f = path.join(repo, ".skyramp", "skyramp_trace.json");
102
- touch(f);
103
- expect(discoverTraceFiles(repo)).toContain(f);
104
- });
105
- });
106
- it("discovers root-level trace.json", () => {
107
- withTempRepo(repo => {
108
- const f = path.join(repo, "trace.json");
109
- touch(f);
110
- expect(discoverTraceFiles(repo)).toContain(f);
111
- });
112
- });
113
- it("discovers root-level skyramp_traces.json via fixed-name check", () => {
114
- withTempRepo(repo => {
115
- const f = path.join(repo, "skyramp_traces.json");
116
- touch(f);
117
- expect(discoverTraceFiles(repo)).toContain(f);
118
- });
119
- });
120
- it("does NOT discover scenario_ json files (excluded by predicate)", () => {
121
- withTempRepo(repo => {
122
- touch(path.join(repo, "tests", "scenario_orders_trace.json"));
123
- expect(discoverTraceFiles(repo)).toEqual([]);
124
- });
125
- });
126
- it("does NOT discover _test.json files (excluded by predicate)", () => {
127
- withTempRepo(repo => {
128
- touch(path.join(repo, "tests", "orders_trace_test.json"));
129
- expect(discoverTraceFiles(repo)).toEqual([]);
130
- });
131
- });
132
- it("results are deduplicated when fixed-name and scan both find the same root file", () => {
133
- withTempRepo(repo => {
134
- const f = path.join(repo, "trace.json");
135
- touch(f);
136
- const found = discoverTraceFiles(repo);
137
- expect(found.filter(x => x === f)).toHaveLength(1);
138
- });
139
- });
140
- });