@skyramp/mcp 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/prompts/test-maintenance/driftAnalysisSections.js +2 -2
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +26 -21
- package/build/prompts/test-recommendation/recommendationSections.js +42 -10
- package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +2 -5
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +114 -157
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +250 -18
- package/build/prompts/testbot/testbot-prompts.js +17 -9
- package/build/services/ScenarioGenerationService.js +2 -1
- package/build/services/TestDiscoveryService.js +22 -7
- package/build/services/TestDiscoveryService.test.js +44 -0
- package/build/tools/generate-tests/generateBatchScenarioRestTool.js +3 -4
- package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +9 -0
- package/build/tools/submitReportTool.js +4 -3
- package/build/tools/submitReportTool.test.js +16 -2
- package/build/tools/test-management/analyzeChangesTool.js +264 -140
- package/build/tools/test-management/analyzeChangesTool.test.js +3 -1
- package/build/tools/test-management/analyzeTestHealthTool.js +5 -0
- package/build/types/RepositoryAnalysis.js +8 -0
- package/build/types/TestRecommendation.js +2 -0
- package/build/utils/branchDiff.js +24 -8
- package/build/utils/featureFlags.js +25 -0
- package/build/utils/httpDefaults.js +12 -0
- package/build/utils/repoScanner.js +16 -2
- package/build/utils/routeParsers.js +79 -79
- package/build/utils/routeParsers.test.js +192 -66
- package/build/utils/scenarioDrafting.js +116 -497
- package/build/utils/scenarioDrafting.test.js +260 -480
- package/package.json +1 -1
|
@@ -4,8 +4,9 @@ import { WorkspaceAuthType, getDefaultAuthHeader, AUTH_MIDDLEWARE_PATTERNS_STR }
|
|
|
4
4
|
import { logger } from "../../utils/logger.js";
|
|
5
5
|
import { extractResourceFromPath } from "../../utils/routeParsers.js";
|
|
6
6
|
import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria, buildFewShotExamples, buildVerificationChecklist, buildGenerationRules, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
|
|
7
|
-
import { CATEGORY_PRIORITY, TEST_CATEGORIES } from "../../types/TestRecommendation.js";
|
|
7
|
+
import { CATEGORY_PRIORITY, PRIORITY_TIER_ORDER, TEST_CATEGORIES } from "../../types/TestRecommendation.js";
|
|
8
8
|
import { buildScopeAssessmentSection, isFrontendFile } from "./scopeAssessment.js";
|
|
9
|
+
import { resolveServiceDetailsRef } from "../../utils/featureFlags.js";
|
|
9
10
|
function formatTestLocations(locs) {
|
|
10
11
|
const entries = Object.entries(locs || {});
|
|
11
12
|
if (entries.length === 0)
|
|
@@ -26,8 +27,8 @@ function formatTestLocations(locs) {
|
|
|
26
27
|
// Categories map to HIGH / MEDIUM / LOW tiers.
|
|
27
28
|
// Within a tier, novelty (new > modified > existing) breaks ties,
|
|
28
29
|
// then cross-resource, step count, and finally the deterministic SHA-256 seed.
|
|
29
|
-
//
|
|
30
|
-
const PRIORITY_ORDER =
|
|
30
|
+
// Single source of truth for priority ordering — imported from types.
|
|
31
|
+
const PRIORITY_ORDER = PRIORITY_TIER_ORDER;
|
|
31
32
|
const NOVELTY_ORDER = { new: 3, modified: 2, existing: 1 };
|
|
32
33
|
function classifyNovelty(scenario, diffContext) {
|
|
33
34
|
if (!diffContext)
|
|
@@ -35,9 +36,10 @@ function classifyNovelty(scenario, diffContext) {
|
|
|
35
36
|
const paths = scenario.steps.map(s => s.path);
|
|
36
37
|
const newPaths = new Set((diffContext.newEndpoints || []).map(ep => ep.path));
|
|
37
38
|
const modPaths = new Set((diffContext.modifiedEndpoints || []).map(ep => ep.path));
|
|
39
|
+
const removedPaths = new Set((diffContext.removedEndpoints || []).map(ep => ep.path));
|
|
38
40
|
if (paths.some(p => newPaths.has(p)))
|
|
39
41
|
return "new";
|
|
40
|
-
if (paths.some(p => modPaths.has(p)))
|
|
42
|
+
if (paths.some(p => modPaths.has(p) || removedPaths.has(p)))
|
|
41
43
|
return "modified";
|
|
42
44
|
return "existing";
|
|
43
45
|
}
|
|
@@ -132,12 +134,6 @@ function buildExternalCoverageSet(testLocations) {
|
|
|
132
134
|
}
|
|
133
135
|
// ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
|
|
134
136
|
function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject = false, isFrontendOnlyProject = false, externalCoverage = new Set()) {
|
|
135
|
-
// Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
|
|
136
|
-
const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
|
|
137
|
-
const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
|
|
138
|
-
const slotsFloor = Math.floor(topN / 2);
|
|
139
|
-
const minE2ESlots = Math.min(rawE2E, slotsFloor);
|
|
140
|
-
const minUISlots = Math.min(rawUI, Math.max(0, topN - minE2ESlots));
|
|
141
137
|
const authRef = authHeaderValue
|
|
142
138
|
? `, authHeader: "${authHeaderValue}"${authSchemeSnippet}`
|
|
143
139
|
: `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
|
|
@@ -166,11 +162,9 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
166
162
|
return true;
|
|
167
163
|
})
|
|
168
164
|
: scored;
|
|
169
|
-
//
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
: topN;
|
|
173
|
-
const allItems = scoredFiltered.slice(0, backendSlotCount);
|
|
165
|
+
// All backend slots — UI/E2E split is determined by the LLM's Budget Plan
|
|
166
|
+
// (via buildScopeAssessmentSection), not by hardcoded percentage allocation.
|
|
167
|
+
const allItems = scoredFiltered.slice(0, topN);
|
|
174
168
|
const byType = new Map();
|
|
175
169
|
for (const t of TYPE_ORDER)
|
|
176
170
|
byType.set(t, []);
|
|
@@ -194,7 +188,7 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
194
188
|
return [
|
|
195
189
|
`**${rank}. ${title}**`,
|
|
196
190
|
` ${s.description}`,
|
|
197
|
-
` ${step.method} ${step.path} \u2192 ${step.expectedStatusCode}`,
|
|
191
|
+
` ${step.method} ${step.path}${step.expectedStatusCode ? ` \u2192 ${step.expectedStatusCode}` : ""}`,
|
|
198
192
|
` Tool: \`skyramp_contract_test_generation({ endpointURL: "${endpointURL}", method: "${step.method}"${authRef}${dataParam} })\``,
|
|
199
193
|
` From source: fill in requestData field names and the specific production boundary this validates`,
|
|
200
194
|
].join("\n");
|
|
@@ -203,7 +197,7 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
203
197
|
const stepLines = s.steps.map(st => {
|
|
204
198
|
const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
|
|
205
199
|
const bodyHint = isBody ? ` \u2014 body: <${st.method} ${st.path} required fields from source>` : "";
|
|
206
|
-
return ` ${st.order}. ${st.method} ${st.path} \u2192 ${st.expectedStatusCode}: ${st.description}${bodyHint}`;
|
|
200
|
+
return ` ${st.order}. ${st.method} ${st.path}${st.expectedStatusCode ? ` \u2192 ${st.expectedStatusCode}` : ""}: ${st.description}${bodyHint}`;
|
|
207
201
|
}).join("\n");
|
|
208
202
|
const isTraceBased = testType === "e2e" || testType === "ui";
|
|
209
203
|
let toolCallsBlock;
|
|
@@ -249,7 +243,7 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
249
243
|
dataParam = `, requestBody: <${st.method} ${st.path} required fields from source code>`;
|
|
250
244
|
}
|
|
251
245
|
}
|
|
252
|
-
return ` { method: "${st.method}", path: "${st.path}"
|
|
246
|
+
return ` { method: "${st.method}", path: "${st.path}"${st.expectedStatusCode ? `, statusCode: ${st.expectedStatusCode}` : ""}${dataParam} }`;
|
|
253
247
|
}).join(",\n");
|
|
254
248
|
toolCallsBlock = [
|
|
255
249
|
` skyramp_batch_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${destinationHost}", baseURL: "${baseUrl}"${scenarioAuthRef}, steps: [\n${batchSteps}\n ] })`,
|
|
@@ -281,55 +275,11 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
281
275
|
const entries = items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n");
|
|
282
276
|
return `### ${label} (${items.length})\n\n${entries}`;
|
|
283
277
|
});
|
|
284
|
-
|
|
285
|
-
const e2eSectionParts = [];
|
|
286
|
-
const uiSectionParts = [];
|
|
287
|
-
if (isFrontendProject) {
|
|
288
|
-
for (let i = 0; i < minE2ESlots; i++) {
|
|
289
|
-
const rank = i + 1;
|
|
290
|
-
e2eSectionParts.push(`**${rank}. E2E User Journey ${i + 1}**\n` +
|
|
291
|
-
` End-to-end test covering a complete user journey through the frontend and backend.\n` +
|
|
292
|
-
` To generate: record a browser trace, then call the generation tool.\n` +
|
|
293
|
-
` browser_navigate({ url: "${baseUrl}" }) \u2192 exercise key user flow \u2192 skyramp_export_zip({ outputPath: "<repo>/.skyramp/e2e_journey_${i + 1}.zip" })\n` +
|
|
294
|
-
` Tool: \`skyramp_e2e_test_generation({ playwrightInput: "<repo>/.skyramp/e2e_journey_${i + 1}.zip"${authHeaderOnlyRef} })\`\n` +
|
|
295
|
-
` From source: read frontend components and their API calls to identify the highest-value user journey`);
|
|
296
|
-
}
|
|
297
|
-
for (let i = 0; i < minUISlots; i++) {
|
|
298
|
-
const rank = minE2ESlots + i + 1;
|
|
299
|
-
uiSectionParts.push(`**${rank}. UI Component Test ${i + 1}**\n` +
|
|
300
|
-
` Test key UI component interactions and state changes.\n` +
|
|
301
|
-
` To generate: record a browser trace, then call the generation tool.\n` +
|
|
302
|
-
` browser_navigate({ url: "${baseUrl}" }) \u2192 interact with UI components \u2192 skyramp_export_zip({ outputPath: "<repo>/.skyramp/ui_component_${i + 1}.zip" })\n` +
|
|
303
|
-
` Tool: \`skyramp_ui_test_generation({ playwrightInput: "<repo>/.skyramp/ui_component_${i + 1}.zip"${authHeaderOnlyRef} })\`\n` +
|
|
304
|
-
` From source: read frontend component files to identify interactions, form submissions, and state transitions`);
|
|
305
|
-
}
|
|
306
|
-
// Offset backend section ranks by the number of E2E + UI placeholders
|
|
307
|
-
const offset = minE2ESlots + minUISlots;
|
|
308
|
-
backendSections.forEach((_, idx) => {
|
|
309
|
-
const t = TYPE_ORDER.filter(t => (byType.get(t) ?? []).length > 0)[idx];
|
|
310
|
-
if (!t)
|
|
311
|
-
return;
|
|
312
|
-
const items = byType.get(t);
|
|
313
|
-
const label = TYPE_LABEL[t];
|
|
314
|
-
let globalRank = offset;
|
|
315
|
-
for (const prev of TYPE_ORDER) {
|
|
316
|
-
if (prev === t)
|
|
317
|
-
break;
|
|
318
|
-
globalRank += (byType.get(prev) ?? []).length;
|
|
319
|
-
}
|
|
320
|
-
backendSections[idx] = `### ${label} (${items.length})\n\n${items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n")}`;
|
|
321
|
-
});
|
|
322
|
-
}
|
|
323
|
-
const allSections = [
|
|
324
|
-
...(e2eSectionParts.length > 0 ? [`### E2E (${e2eSectionParts.length})\n\n${e2eSectionParts.join("\n\n")}`] : []),
|
|
325
|
-
...(uiSectionParts.length > 0 ? [`### UI (${uiSectionParts.length})\n\n${uiSectionParts.join("\n\n")}`] : []),
|
|
326
|
-
...backendSections,
|
|
327
|
-
];
|
|
328
|
-
const sections = allSections.join("\n\n");
|
|
278
|
+
const sections = backendSections.join("\n\n");
|
|
329
279
|
const frontendTierNote = isFrontendOnlyProject
|
|
330
|
-
? `\n\n**Frontend repo:**
|
|
280
|
+
? `\n\n**Frontend repo:** add E2E and UI tests only — no integration or contract tests. The number of each is determined by the UI/E2E percentage you committed to in your Budget Plan above.`
|
|
331
281
|
: isFrontendProject
|
|
332
|
-
? `\n\n**Full-stack repo:**
|
|
282
|
+
? `\n\n**Full-stack repo:** add E2E and UI tests alongside backend tests. Fill your Budget Plan's UI/E2E percentage first, then use remaining slots for backend tests (Tiers 1-4 above).`
|
|
333
283
|
: "";
|
|
334
284
|
const repoSupplementNote = supplementCount > 0
|
|
335
285
|
? `
|
|
@@ -356,9 +306,9 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
356
306
|
</supplement_guidance>`
|
|
357
307
|
: "";
|
|
358
308
|
const typeMixText = isFrontendOnlyProject
|
|
359
|
-
? `This is a frontend repo. Focus on E2E and UI tests only.
|
|
309
|
+
? `This is a frontend repo. Focus on E2E and UI tests only. Do NOT add integration or contract tests. Split between E2E and UI based on the percentage in your Budget Plan above.`
|
|
360
310
|
: isFrontendProject
|
|
361
|
-
? `This is a full-stack repo. Coverage ranking: E2E > UI > Integration > Contract.
|
|
311
|
+
? `This is a full-stack repo. Coverage ranking: E2E > UI > Integration > Contract. Use \`skyramp_e2e_test_generation\` for E2E and \`skyramp_ui_test_generation\` for UI tests. Split between frontend and backend tests based on the percentage in your Budget Plan above.`
|
|
362
312
|
: `Focus on integration and contract tests for all API endpoints.`;
|
|
363
313
|
return `## Test Recommendations — ${topN} total (grouped by test type)
|
|
364
314
|
|
|
@@ -387,7 +337,7 @@ Before filling in tool call parameters for each item, use the analysis data alre
|
|
|
387
337
|
- Computed/derived response fields and their formulas — assert exact values; read source for formula details not captured in the analysis
|
|
388
338
|
- Auth middleware — set authHeader/authScheme from the repository context above; FastAPI HTTPBearer → 403 not 401
|
|
389
339
|
- Storage backend — if Redis or schema-less, discard unique-constraint and cascade-delete scenarios
|
|
390
|
-
- Delete behavior — hard-delete
|
|
340
|
+
- Delete behavior — read the route handler to determine actual response code (hard-delete may use 204, soft-delete/cancel may use 200)
|
|
391
341
|
|
|
392
342
|
${buildTestQualityCriteria()}
|
|
393
343
|
|
|
@@ -401,16 +351,6 @@ ${buildTestQualityCriteria()}
|
|
|
401
351
|
</enrichment_notes>`;
|
|
402
352
|
}
|
|
403
353
|
function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, externalCoverage = new Set(), relevantExternalTestPaths = []) {
|
|
404
|
-
const frontendUrl = "<frontend_url>";
|
|
405
|
-
// Slot allocation:
|
|
406
|
-
// - UI-only PR: all GENERATE slots are UI placeholders (no pre-ranked backend scenarios)
|
|
407
|
-
// - Mixed PR: last GENERATE slot is a UI placeholder; remaining slots are backend
|
|
408
|
-
// - Backend-only PR: all GENERATE slots are backend scenarios
|
|
409
|
-
const backendGenerateCount = isUIOnlyPR
|
|
410
|
-
? 0
|
|
411
|
-
: hasFrontendChanges
|
|
412
|
-
? Math.max(0, maxGen - 1)
|
|
413
|
-
: maxGen;
|
|
414
354
|
// Filter out scenarios whose primary method + resource + test type is already covered by external tests.
|
|
415
355
|
// Method-aware: an external test covering GET /orders won't block PUT /orders scenarios.
|
|
416
356
|
// This is the programmatic complement to the prompt-level Step 0 dedup instructions.
|
|
@@ -424,8 +364,10 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
424
364
|
return true;
|
|
425
365
|
})
|
|
426
366
|
: scored;
|
|
427
|
-
|
|
428
|
-
|
|
367
|
+
// All pre-ranked backend scenarios go into GENERATE slots (up to maxGen).
|
|
368
|
+
// UI/E2E split is determined by the LLM's Budget Plan — not hardcoded here.
|
|
369
|
+
const generateItems = scoredAfterExternalDedup.slice(0, Math.min(maxGen, scoredAfterExternalDedup.length));
|
|
370
|
+
const rawAdditionalItems = scoredAfterExternalDedup.slice(maxGen, topN);
|
|
429
371
|
// Filter additional items whose primary resource + test type already appear in GENERATE
|
|
430
372
|
const generatedCoverage = new Set(generateItems.map(item => scenarioCoverageKey(item.scenario)));
|
|
431
373
|
const additionalItems = rawAdditionalItems.filter(item => !generatedCoverage.has(scenarioCoverageKey(item.scenario)));
|
|
@@ -438,47 +380,10 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
438
380
|
: authHeaderValue
|
|
439
381
|
? `, authHeader: "${authHeaderValue}"`
|
|
440
382
|
: `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
|
|
441
|
-
// UI-only:
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
const zipPath = `<repositoryPath>/.skyramp/ui_test_${rank}_trace.zip`;
|
|
446
|
-
return hasTraces
|
|
447
|
-
? (`**#${rank} — GENERATE** | ui | workflow | new\n` +
|
|
448
|
-
`Scenario: ui-test-from-trace-${rank} (rename from the actual changed component/flow)\n` +
|
|
449
|
-
`Validates: UI interactions for a changed frontend component or flow.\n\n` +
|
|
450
|
-
`**Tool**: \`skyramp_ui_test_generation({ playwrightInput: "<discovered_trace_file_path>", outputDir: "<frontend service testDirectory from workspace.yml e.g. frontend/tests>" })\``)
|
|
451
|
-
: (`**#${rank} — GENERATE** | ui | workflow | new\n` +
|
|
452
|
-
`Scenario: ui-test-for-changed-component-${rank} (rename from the actual changed component/flow)\n` +
|
|
453
|
-
`Validates: UI interactions for changed frontend component/flow ${rank}.\n\n` +
|
|
454
|
-
`**Tool workflow:**\n` +
|
|
455
|
-
` 1. \`browser_navigate({ url: "${frontendUrl}" })\`\n` +
|
|
456
|
-
` 2. Interact with the changed component (read the diff to identify which component changed and what interactions it supports)\n` +
|
|
457
|
-
` 3. \`browser_snapshot()\` after each key interaction\n` +
|
|
458
|
-
` 4. \`skyramp_export_zip({ outputPath: "${zipPath}" })\` — absolute path\n` +
|
|
459
|
-
` 5. \`skyramp_ui_test_generation({ playwrightInput: "${zipPath}", outputDir: "<frontend service testDirectory from workspace.yml e.g. frontend/tests>" })\`\n\n` +
|
|
460
|
-
`Each item must target a distinct changed component or user flow.`);
|
|
461
|
-
}).join("\n\n")
|
|
462
|
-
: "";
|
|
463
|
-
// Mixed PR: reserve the last GENERATE slot for a UI test for the changed frontend components.
|
|
464
|
-
// Guard: skip when maxGen=0 (caller explicitly requested no generation)
|
|
465
|
-
const uiRank = generateItems.length + 1;
|
|
466
|
-
const uiPlaceholderBlock = (hasFrontendChanges && !isUIOnlyPR && maxGen > 0)
|
|
467
|
-
? hasTraces
|
|
468
|
-
? (`**#${uiRank} — GENERATE** | ui | workflow | new\n` +
|
|
469
|
-
`Scenario: ui-test-for-changed-components (rename from the actual changed component/flow)\n` +
|
|
470
|
-
`Validates: UI interactions for the changed frontend components in this PR.\n\n` +
|
|
471
|
-
`**Tool**: \`skyramp_ui_test_generation({ playwrightInput: "<discovered_trace_file_path>", outputDir: "<frontend service testDirectory from workspace.yml e.g. frontend/tests>" })\``)
|
|
472
|
-
: (`**#${uiRank} — GENERATE** | ui | workflow | new\n` +
|
|
473
|
-
`Scenario: ui-test-for-changed-components (rename from the actual changed component/flow)\n` +
|
|
474
|
-
`Validates: UI interactions for the changed frontend components in this PR.\n\n` +
|
|
475
|
-
`**Tool workflow:**\n` +
|
|
476
|
-
` 1. \`browser_navigate({ url: "${frontendUrl}" })\`\n` +
|
|
477
|
-
` 2. Interact with the changed component (read the diff to identify which component changed and what interactions it supports)\n` +
|
|
478
|
-
` 3. \`browser_snapshot()\` after each key interaction\n` +
|
|
479
|
-
` 4. \`skyramp_export_zip({ outputPath: "<repositoryPath>/.skyramp/ui_mixed_pr_trace.zip" })\` — absolute path\n` +
|
|
480
|
-
` 5. \`skyramp_ui_test_generation({ playwrightInput: "<repositoryPath>/.skyramp/ui_mixed_pr_trace.zip", outputDir: "<frontend service testDirectory from workspace.yml e.g. frontend/tests>" })\`\n\n` +
|
|
481
|
-
`Derive scenario name and steps from the actual changed frontend files.`)
|
|
383
|
+
// UI-only PR: provide guidance template for the LLM to derive UI tests from changed files.
|
|
384
|
+
// The LLM's Budget Plan (100% UI for UI-only PRs) determines how many to generate.
|
|
385
|
+
const uiOnlyGenerateGuidance = isUIOnlyPR
|
|
386
|
+
? `**UI-only PR — derive ${maxGen} UI tests from changed frontend files.**\nEach test must target a distinct changed component or user flow. Use \`skyramp_ui_test_generation\` to generate each test.`
|
|
482
387
|
: "";
|
|
483
388
|
const generateBlocks = generateItems.map((item, i) => {
|
|
484
389
|
const rank = i + 1;
|
|
@@ -495,7 +400,7 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
495
400
|
? `\n authHeader: "${authHeaderValue}"${authSchemeSnippet}`
|
|
496
401
|
: `\n authHeader: <resolve from workspace or OpenAPI securitySchemes>; authScheme: <if Authorization>`;
|
|
497
402
|
return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | ${item.novelty}\n` +
|
|
498
|
-
`${step.method} ${step.path} → ${step.expectedStatusCode}\n` +
|
|
403
|
+
`${step.method} ${step.path}${step.expectedStatusCode ? ` → ${step.expectedStatusCode}` : ""}\n` +
|
|
499
404
|
`Validates: ${s.description}\n\n` +
|
|
500
405
|
`**Context for generation**:\n` +
|
|
501
406
|
` Endpoint URL: ${endpointURL}${requestBodyData}${authContext}\n\n` +
|
|
@@ -518,7 +423,7 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
518
423
|
const bodyData = st.requestBody && Object.keys(st.requestBody).length > 0
|
|
519
424
|
? ` [use requestBody: ${JSON.stringify(st.requestBody)} — pass as JSON string in tool call]`
|
|
520
425
|
: "";
|
|
521
|
-
return ` ${st.order}. ${st.method} ${st.path} → ${st.expectedStatusCode}: ${st.description}${chains}${bodyHint}${bodyData}${responseHint}`;
|
|
426
|
+
return ` ${st.order}. ${st.method} ${st.path}${st.expectedStatusCode ? ` → ${st.expectedStatusCode}` : ""}: ${st.description}${chains}${bodyHint}${bodyData}${responseHint}`;
|
|
522
427
|
}).join("\n");
|
|
523
428
|
let destinationHost = "localhost";
|
|
524
429
|
try {
|
|
@@ -529,9 +434,7 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
529
434
|
const authContext = authHeaderValue
|
|
530
435
|
? `authHeader: "${authHeaderValue}"${authSchemeSnippet}`
|
|
531
436
|
: "authHeader: <resolve from workspace or OpenAPI securitySchemes>; authScheme: <if Authorization>";
|
|
532
|
-
const prereqNote = s.
|
|
533
|
-
? `\n**Prerequisite discovery**: Check for FK fields (product_id, user_id, order_id) in the endpoint's request body. If found, prepend a step to create that prerequisite resource first, then chain its \`id\` into the dependent step using template variable syntax.`
|
|
534
|
-
: "";
|
|
437
|
+
const prereqNote = `\n**Prerequisite discovery**: Check for Foreign Key fields (product_id, user_id, order_id) in the endpoint's request body. If found, prepend a step to create that prerequisite resource first, then chain its primary key field into the dependent step using template variable syntax. Check the actual field name from the response body (\`id\`, \`uuid\`, \`_id\`, etc.), response header (\`Location\`), or cookie — do not assume \`id\`.`;
|
|
535
438
|
const bugLine = s.bugCatchingTarget
|
|
536
439
|
? `**Bug to catch**: ${s.bugCatchingTarget}\n`
|
|
537
440
|
: "";
|
|
@@ -560,17 +463,16 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
560
463
|
const s = item.scenario;
|
|
561
464
|
const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
|
|
562
465
|
const target = s.steps.length === 1
|
|
563
|
-
? `${s.steps[0].method} ${s.steps[0].path} → ${s.steps[0].expectedStatusCode}`
|
|
466
|
+
? `${s.steps[0].method} ${s.steps[0].path}${s.steps[0].expectedStatusCode ? ` → ${s.steps[0].expectedStatusCode}` : ""}`
|
|
564
467
|
: `Scenario: ${s.scenarioName} (${s.steps.map(st => `${st.method} ${st.path}`).join(" → ")})`;
|
|
565
468
|
return `#${rank} [ADDITIONAL] | ${testType} | ${s.category} | ${item.novelty}\n ${target}\n Validates: ${s.description}`;
|
|
566
469
|
}).join("\n\n");
|
|
567
|
-
// UI/E2E guidance — the LLM adds
|
|
568
|
-
//
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
**UI/E2E tests (add per your Budget Plan):** If your Budget Plan requires UI/E2E items beyond what is already in your GENERATE list, append an [ADDITIONAL] entry for each. If a UI test already occupies a GENERATE slot above, that slot satisfies your UI/E2E generate count — do NOT add it again to ADDITIONAL. Tool workflow for each new item:
|
|
470
|
+
// UI/E2E guidance — the LLM adds UI/E2E items as its Budget Plan dictates.
|
|
471
|
+
// Only rendered for non-UI-only PRs (UI-only PRs have dedicated guidance above).
|
|
472
|
+
const uiGuidance = (!isUIOnlyPR && hasFrontendChanges) ? `
|
|
473
|
+
**UI/E2E tests (add per your Budget Plan):** If your Budget Plan allocates UI/E2E slots, add them here. Tool workflow for each new item:
|
|
572
474
|
- **E2E**: ${hasTraces ? "Use discovered trace/recording files with `skyramp_e2e_test_generation`." : "Add to additionalRecommendations with a note that both a backend API trace (`skyramp_start_trace_collection` / `skyramp_stop_trace_collection`) and a browser Playwright recording must be collected in a live environment first. Do NOT attempt `skyramp_e2e_test_generation` without both traces present."}
|
|
573
|
-
- **UI**: ${hasTraces ? "Use an existing Playwright `.zip` trace with `skyramp_ui_test_generation`." :
|
|
475
|
+
- **UI**: ${hasTraces ? "Use an existing Playwright `.zip` trace with `skyramp_ui_test_generation`." : `Record a trace using \`browser_navigate\` + \`browser_snapshot\` + \`skyramp_export_zip\`, then call \`skyramp_ui_test_generation({ playwrightInput: "<zip_path>", outputDir: "<frontend_test_directory>" })\`. Resolve \`<frontend_test_directory>\` from ${resolveServiceDetailsRef().frontendTestDirRef}.`}
|
|
574
476
|
Derive scenario names and steps from the actual changed frontend files. If your Budget Plan calls for 0% UI/E2E, omit this entirely.` : "";
|
|
575
477
|
const supplementNote = `\n**If your Budget Plan total exceeds the pre-ranked items listed above:** draft additional tests from source-code enrichment (Step 1). For each new or changed endpoint, identify boundary or variation scenarios — formula parameters, search/filter constraints, required field validation. Only after exhausting PR-specific scenarios, add generic patterns (auth boundary → 401, non-existent ID → 404). Do NOT supplement with tests whose endpoint + test type match a GENERATE item.`;
|
|
576
478
|
// ── PR / branch-diff mode: execution plan ────────────────────────────────
|
|
@@ -597,7 +499,7 @@ ${externalTestFilesList}For every GENERATE item below, check its endpoint path a
|
|
|
597
499
|
|
|
598
500
|
**Step 1 — Source-Code Enrichment (before executing anything)**
|
|
599
501
|
Read the source code for ALL changed files. Before generating each recommendation, quote the relevant source code in a <source_evidence> block — include the route handler signature, request body schema fields, response shape, and any computed field formulas. Use these quotes to derive tool call parameters. Look for:
|
|
600
|
-
- **Auth middleware** — check for known signals (${AUTH_MIDDLEWARE_PATTERNS_STR}). If any match, override \`authHeader\` and \`authScheme\` even if
|
|
502
|
+
- **Auth middleware** — check for known signals (${AUTH_MIDDLEWARE_PATTERNS_STR}). If any match, override \`authHeader\` and \`authScheme\` even if ${resolveServiceDetailsRef().authSourceRef} says authType: none. **If no known signal matches but the diff shows security-adjacent code** (decorators like \`@requiresRole\`/\`@Protected\`, function names like \`validateToken\`/\`checkPermission\`/\`verifyHMAC\`, or imports from auth/security packages), read the relevant source file to determine the actual auth scheme before proceeding. Auth handling for \`skyramp_integration_test_generation\` with \`scenarioFile\` is covered in the Tool Workflows section below.
|
|
601
503
|
- Business rules and formulas (e.g. total_cost = compute * rate + memory * rate)
|
|
602
504
|
- State transitions and domain constraints (e.g. budget cannot drop below current spend)
|
|
603
505
|
- Validation logic (field constraints, cross-field dependencies)
|
|
@@ -632,7 +534,7 @@ If these conditions are not met, add it to ADDITIONAL only — do NOT displace a
|
|
|
632
534
|
When a qualifying candidate is inserted: place it HIGH before MEDIUM before LOW; within the same priority, source-code-derived candidates go BEFORE structural ones. Re-number ranks after insertion. The top ${maxGen} ranked items become GENERATE candidates.
|
|
633
535
|
|
|
634
536
|
**Source-code validation gates (apply during Step 1):**
|
|
635
|
-
- **Cascade vs referential integrity**: If both a cascade-delete and a delete-blocked scenario appear for the same resource pair, keep only the one matching the source
|
|
537
|
+
- **Cascade vs referential integrity**: If both a cascade-delete and a delete-blocked scenario appear for the same resource pair, keep only the one matching the source Foreign Key delete policy (ON DELETE CASCADE / cascade=True / onDelete: 'CASCADE' → keep cascade-delete; RESTRICT/PROTECT/no annotation → keep delete-blocked). Remove the inapplicable variant.
|
|
636
538
|
- **Unique constraints**: Unique-constraint scenarios (duplicate POST → 409) are pre-drafted for all resources. Confirm enforcement before keeping: SQL UNIQUE index, Mongoose unique: true, Prisma @unique, or explicit duplicate-check code. If the backend is Redis, schema-less, or has no explicit constraint in the changed files, move to ADDITIONAL with a note — do NOT generate.
|
|
637
539
|
|
|
638
540
|
**Step 2 — Diversity check (using enriched knowledge from Step 1)**
|
|
@@ -648,7 +550,7 @@ For each pair of GENERATE items, ask: same HTTP method + path + step sequence +
|
|
|
648
550
|
Same step sequence with only payload differences (e.g. 10% vs 5% discount both returning 200) = same code path = duplicate. Different scenario names do not make duplicate tests distinct.
|
|
649
551
|
|
|
650
552
|
**Step 3 — Execute merged plan in rank order**
|
|
651
|
-
Replace any scenario that pairs unrelated resources with one reflecting actual
|
|
553
|
+
Replace any scenario that pairs unrelated resources with one reflecting actual Foreign Key relationships in the codebase.
|
|
652
554
|
Use the field names and values from the \`<source_evidence>\` blocks you quoted in Step 1 to fill all tool call parameters. Prefer reusing Step 1 evidence when it already resolves a placeholder, but if a placeholder cannot be replaced with concrete values from files already read, you may read the specific schema, model, or handler file needed to resolve it. Assert response field values, not just status codes.
|
|
653
555
|
|
|
654
556
|
${buildTestQualityCriteria()}
|
|
@@ -664,10 +566,10 @@ ${buildGenerationRules(isUIOnlyPR)}
|
|
|
664
566
|
### GENERATE (process these EXACTLY as listed, in order — after completing Steps 0–2 above; if Step 0 converts an item to UPDATE, backfill the ADD slot from ADDITIONAL following the priority order in Step 0)
|
|
665
567
|
|
|
666
568
|
${isUIOnlyPR
|
|
667
|
-
? (
|
|
668
|
-
: (
|
|
569
|
+
? (uiOnlyGenerateGuidance || " (no UI generate items — derive scenarios from changed frontend files)")
|
|
570
|
+
: (generateBlocks || " (no pre-ranked generate items — draft your own based on endpoint analysis)")}
|
|
669
571
|
|
|
670
|
-
**
|
|
572
|
+
**VERIFICATION CHECK**: Before proceeding, verify your generate list covers the same endpoints and test types as the items above. Add genuinely new scenarios to ADDITIONAL instead. One retry on failure then skip to next item.
|
|
671
573
|
|
|
672
574
|
### ADDITIONAL (list in additionalRecommendations in this order after Step 1 insertion)
|
|
673
575
|
|
|
@@ -702,10 +604,17 @@ export function buildRecommendationPrompt(analysis, analysisScope = AnalysisScop
|
|
|
702
604
|
const hasFrontendChanges = isDiffScope && diffContext
|
|
703
605
|
? filteredChangedFiles.some(f => isFrontendFile(f))
|
|
704
606
|
: false;
|
|
607
|
+
// Backend changes detected if:
|
|
608
|
+
// 1. Endpoints directly matched from changed files (new/modified/removed), OR
|
|
609
|
+
// 2. Changed files are in backend service/model/middleware directories (affectedServices non-empty)
|
|
610
|
+
// but couldn't be mapped to specific endpoints (service-layer changes like services/items.ts)
|
|
705
611
|
const hasApiChanges = isDiffScope && diffContext
|
|
706
|
-
? (diffContext.newEndpoints.length > 0 || diffContext.modifiedEndpoints.length > 0)
|
|
612
|
+
? (diffContext.newEndpoints.length > 0 || diffContext.modifiedEndpoints.length > 0 || (diffContext.removedEndpoints?.length ?? 0) > 0)
|
|
707
613
|
: false;
|
|
708
|
-
const
|
|
614
|
+
const hasBackendServiceChanges = isDiffScope && diffContext
|
|
615
|
+
? (diffContext.affectedServices.length > 0 && filteredChangedFiles.some(f => !isFrontendFile(f) && /\.(ts|js|py|java|go|rb|rs|cs)$/.test(f)))
|
|
616
|
+
: false;
|
|
617
|
+
const isUIOnlyPR = hasFrontendChanges && !hasApiChanges && !hasBackendServiceChanges;
|
|
709
618
|
const hasTraces = (analysis.artifacts?.traceFiles?.length ?? 0) > 0 ||
|
|
710
619
|
(analysis.artifacts?.playwrightRecordings?.length ?? 0) > 0;
|
|
711
620
|
// ── Mode preamble ──
|
|
@@ -719,9 +628,46 @@ Output should be concise and immediately actionable.`
|
|
|
719
628
|
: `You are in **Repo mode**. Comprehensive test strategy across all endpoints.`;
|
|
720
629
|
// ── Endpoint listing ──
|
|
721
630
|
const allEndpoints = analysis.apiEndpoints.endpoints;
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
631
|
+
// In PR mode, identify which endpoints were changed so we can partition the listing.
|
|
632
|
+
const changedEndpointKeys = new Set();
|
|
633
|
+
if (isDiffScope && diffContext) {
|
|
634
|
+
for (const ep of [...(diffContext.newEndpoints || []), ...(diffContext.modifiedEndpoints || []), ...(diffContext.removedEndpoints || [])]) {
|
|
635
|
+
for (const m of (ep.methods ?? [])) {
|
|
636
|
+
changedEndpointKeys.add(`${m.method} ${ep.path}`);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
const fmtEndpoint = (m, ep) => ` ${m.method} ${ep.path}${m.authRequired ? " [auth]" : ""} (${(m.interactions ?? []).length} interactions)`;
|
|
641
|
+
let endpointLines;
|
|
642
|
+
if (isDiffScope && changedEndpointKeys.size > 0) {
|
|
643
|
+
const changedLines = [];
|
|
644
|
+
const otherLines = [];
|
|
645
|
+
for (const ep of allEndpoints) {
|
|
646
|
+
for (const m of (ep.methods ?? [])) {
|
|
647
|
+
const line = fmtEndpoint(m, ep);
|
|
648
|
+
if (changedEndpointKeys.has(`${m.method} ${ep.path}`)) {
|
|
649
|
+
changedLines.push(line);
|
|
650
|
+
}
|
|
651
|
+
else {
|
|
652
|
+
otherLines.push(line);
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
// Removed endpoints no longer exist in allEndpoints (current catalog), so they
|
|
657
|
+
// would be silently absent from changedLines. Append them explicitly with a
|
|
658
|
+
// [removed] marker so the LLM knows to generate verify-404/deprecation tests.
|
|
659
|
+
for (const ep of (diffContext?.removedEndpoints || [])) {
|
|
660
|
+
for (const m of (ep.methods ?? [])) {
|
|
661
|
+
changedLines.push(` ${m.method} ${ep.path} [removed]`);
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
endpointLines = `**Changed in this PR:**\n${changedLines.join("\n") || " none"}\n\n**Other endpoints (reference only — do not prioritize for testing):**\n${otherLines.join("\n") || " none"}`;
|
|
665
|
+
}
|
|
666
|
+
else {
|
|
667
|
+
endpointLines = allEndpoints
|
|
668
|
+
.flatMap((ep) => (ep.methods ?? []).map((m) => fmtEndpoint(m, ep)))
|
|
669
|
+
.join("\n");
|
|
670
|
+
}
|
|
725
671
|
const authMethod = analysis.authentication.method || "unknown";
|
|
726
672
|
const authTypeValue = workspaceAuthType ?? "";
|
|
727
673
|
let authHeaderValue;
|
|
@@ -803,9 +749,13 @@ New endpoints:
|
|
|
803
749
|
${fmtEps(diffContext.newEndpoints, (m) => `${m.sourceFile}, ${m.interactionCount} interactions`)}
|
|
804
750
|
Modified endpoints:
|
|
805
751
|
${fmtEps(diffContext.modifiedEndpoints, (m) => `${m.sourceFile}, ${m.changeType}`)}
|
|
752
|
+
Removed endpoints:
|
|
753
|
+
${fmtEps(diffContext.removedEndpoints ?? [], (m) => `${m.sourceFile}, removed`)}
|
|
806
754
|
Affected services: ${diffContext.affectedServices.join(", ") || "N/A"}
|
|
807
755
|
|
|
808
756
|
Focus on tests that validate these changes and how they interact with existing resources.
|
|
757
|
+
For removed endpoints: verify they now return 404 or the appropriate deprecation status code.
|
|
758
|
+
Allocate your test budget to endpoints listed under "Changed in this PR". Use other endpoints only as setup steps (e.g. creating a resource before testing its deletion).
|
|
809
759
|
`;
|
|
810
760
|
}
|
|
811
761
|
// ── Interactions ──
|
|
@@ -864,17 +814,11 @@ ${detailBlocks}
|
|
|
864
814
|
const na = NOVELTY_ORDER[a.novelty], nb = NOVELTY_ORDER[b.novelty];
|
|
865
815
|
if (nb !== na)
|
|
866
816
|
return nb - na;
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
return b.scenario.steps.length - a.scenario.steps.length;
|
|
873
|
-
const errorA = a.scenario.steps.some(s => s.interactionType === "error" || s.interactionType === "edge-case") ? 1 : 0;
|
|
874
|
-
const errorB = b.scenario.steps.some(s => s.interactionType === "error" || s.interactionType === "edge-case") ? 1 : 0;
|
|
875
|
-
if (errorB !== errorA)
|
|
876
|
-
return errorB - errorA;
|
|
877
|
-
// Use locale-independent comparison to avoid runtime-locale non-determinism
|
|
817
|
+
// Deterministic tiebreaker: when priority and novelty are equal, sort by
|
|
818
|
+
// scenario name then seeded hash. This ensures identical inputs always produce
|
|
819
|
+
// the same ordering regardless of runtime locale or JS sort stability — important
|
|
820
|
+
// because the LLM receives a ranked list and would otherwise produce inconsistent
|
|
821
|
+
// recommendations across runs for equally-ranked scenarios.
|
|
878
822
|
const nameA = a.scenario.scenarioName;
|
|
879
823
|
const nameB = b.scenario.scenarioName;
|
|
880
824
|
if (nameA < nameB)
|
|
@@ -909,12 +853,23 @@ ${detailBlocks}
|
|
|
909
853
|
mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, externalCoverage, analysis.existingTests.relevantExternalTestPaths ?? []);
|
|
910
854
|
}
|
|
911
855
|
else {
|
|
856
|
+
// Endpoint discovery hint: when backend service files changed but no endpoints were
|
|
857
|
+
// directly matched, guide the LLM to trace from service files → controllers → routes.
|
|
858
|
+
const endpointDiscoveryHint = hasBackendServiceChanges && diffContext
|
|
859
|
+
? `
|
|
860
|
+
**Endpoint Discovery Required** — the diff modifies backend service files (affected services: ${diffContext.affectedServices.join(", ")}) that don't directly define routes. You MUST:
|
|
861
|
+
1. Read the Routing entry-point files listed above
|
|
862
|
+
2. Trace which controllers/routers import the affected services
|
|
863
|
+
3. Identify the specific HTTP endpoints those controllers register
|
|
864
|
+
4. Use discovered endpoints as your GENERATE targets (contract + integration tests)
|
|
865
|
+
Do NOT default to UI-only tests — this PR has backend logic changes that require API-level testing.`
|
|
866
|
+
: "";
|
|
912
867
|
mainSection = `
|
|
913
868
|
## Draft Your Execution Plan
|
|
914
869
|
|
|
915
|
-
No pre-drafted scenarios available
|
|
870
|
+
No pre-drafted scenarios available.${endpointDiscoveryHint}
|
|
916
871
|
|
|
917
|
-
${buildScopeAssessmentSection(topN, maxGen)}
|
|
872
|
+
${buildScopeAssessmentSection(topN, maxGen, isUIOnlyPR)}
|
|
918
873
|
|
|
919
874
|
Draft tests from the endpoint interactions and source code above, following the same tool pipeline described in Tool Workflows below. Prioritize critical categories: security_boundary > data_integrity > business_rule > workflow > crud.
|
|
920
875
|
|
|
@@ -922,6 +877,8 @@ For each test: pick the highest-impact endpoint(s), draft a realistic scenario w
|
|
|
922
877
|
|
|
923
878
|
**Honor your Budget Plan: produce exactly the total you committed to (GENERATE + ADDITIONAL). No fewer, no padding with low-value tests.**
|
|
924
879
|
|
|
880
|
+
**Coverage breadth enforcement:** Your GENERATE items must span DIFFERENT HTTP methods or endpoints from your Coverage Reasoning surfaces. If you identified 5+ testable surfaces but all GENERATE items target the same method + path (e.g. all POST /permissions), you are violating diversity. Spread GENERATE slots across distinct surfaces; put remaining surfaces in ADDITIONAL recommendations.
|
|
881
|
+
|
|
925
882
|
## Recommendation Stability
|
|
926
883
|
- **Carry forward** previous additionalRecommendations that still apply — match by scenarioName (multi-step) or endpoint (single-endpoint). Re-derive category and priority from test content.
|
|
927
884
|
- **Only drop** a previous recommendation if its target endpoint was removed, its business logic changed, or it is now covered by a generated test.
|