@skyramp/mcp 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +6 -5
- package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js +11 -7
- package/build/prompts/personas.js +2 -1
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +2 -1
- package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +28 -0
- package/build/prompts/test-maintenance/driftAnalysisSections.js +2 -2
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +74 -16
- package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -0
- package/build/prompts/test-recommendation/recommendationSections.js +13 -43
- package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +19 -0
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +158 -70
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +24 -117
- package/build/prompts/testbot/testbot-prompts.js +12 -18
- package/build/prompts/testbot/testbot-prompts.test.js +2 -2
- package/build/resources/analysisResources.js +1 -0
- package/build/tools/code-refactor/enhanceAssertionsTool.js +2 -1
- package/build/tools/generate-tests/generateBatchScenarioRestTool.js +127 -4
- package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +205 -18
- package/build/tools/generate-tests/generateContractRestTool.js +19 -19
- package/build/tools/generate-tests/generateIntegrationRestTool.js +9 -2
- package/build/tools/generate-tests/generateUIRestTool.js +23 -8
- package/build/tools/test-management/analyzeChangesTool.js +222 -11
- package/build/tools/test-management/analyzeChangesTool.test.js +233 -1
- package/build/types/TestRecommendation.js +0 -2
- package/build/utils/featureFlags.js +4 -22
- package/build/utils/featureFlags.test.js +81 -0
- package/build/utils/httpDefaults.js +6 -1
- package/build/utils/httpDefaults.test.js +21 -0
- package/build/utils/scenarioDrafting.js +511 -100
- package/build/utils/scenarioDrafting.test.js +545 -259
- package/build/utils/telemetry.js +2 -1
- package/build/utils/utils.js +23 -0
- package/package.json +1 -1
|
@@ -4,9 +4,11 @@ import { WorkspaceAuthType, getDefaultAuthHeader, AUTH_MIDDLEWARE_PATTERNS_STR }
|
|
|
4
4
|
import { logger } from "../../utils/logger.js";
|
|
5
5
|
import { extractResourceFromPath } from "../../utils/routeParsers.js";
|
|
6
6
|
import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria, buildFewShotExamples, buildVerificationChecklist, buildGenerationRules, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
|
|
7
|
-
import { CATEGORY_PRIORITY,
|
|
7
|
+
import { CATEGORY_PRIORITY, TEST_CATEGORIES } from "../../types/TestRecommendation.js";
|
|
8
8
|
import { buildScopeAssessmentSection, isFrontendFile } from "./scopeAssessment.js";
|
|
9
|
-
import { resolveServiceDetailsRef } from "../../utils/
|
|
9
|
+
import { resolveServiceDetailsRef } from "../../utils/utils.js";
|
|
10
|
+
// Cached at module-load — flag is process-wide and cannot change per call.
|
|
11
|
+
const SERVICE_REFS = resolveServiceDetailsRef();
|
|
10
12
|
function formatTestLocations(locs) {
|
|
11
13
|
const entries = Object.entries(locs || {});
|
|
12
14
|
if (entries.length === 0)
|
|
@@ -27,8 +29,8 @@ function formatTestLocations(locs) {
|
|
|
27
29
|
// Categories map to HIGH / MEDIUM / LOW tiers.
|
|
28
30
|
// Within a tier, novelty (new > modified > existing) breaks ties,
|
|
29
31
|
// then cross-resource, step count, and finally the deterministic SHA-256 seed.
|
|
30
|
-
//
|
|
31
|
-
const PRIORITY_ORDER =
|
|
32
|
+
// CATEGORY_PRIORITY and PriorityTier imported from ../../types/TestRecommendation.js
|
|
33
|
+
const PRIORITY_ORDER = { CRITICAL: 4, HIGH: 3, MEDIUM: 2, LOW: 1 };
|
|
32
34
|
const NOVELTY_ORDER = { new: 3, modified: 2, existing: 1 };
|
|
33
35
|
function classifyNovelty(scenario, diffContext) {
|
|
34
36
|
if (!diffContext)
|
|
@@ -134,6 +136,12 @@ function buildExternalCoverageSet(testLocations) {
|
|
|
134
136
|
}
|
|
135
137
|
// ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
|
|
136
138
|
function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject = false, isFrontendOnlyProject = false, externalCoverage = new Set()) {
|
|
139
|
+
// Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
|
|
140
|
+
const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
|
|
141
|
+
const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
|
|
142
|
+
const slotsFloor = Math.floor(topN / 2);
|
|
143
|
+
const minE2ESlots = Math.min(rawE2E, slotsFloor);
|
|
144
|
+
const minUISlots = Math.min(rawUI, Math.max(0, topN - minE2ESlots));
|
|
137
145
|
const authRef = authHeaderValue
|
|
138
146
|
? `, authHeader: "${authHeaderValue}"${authSchemeSnippet}`
|
|
139
147
|
: `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
|
|
@@ -162,9 +170,11 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
162
170
|
return true;
|
|
163
171
|
})
|
|
164
172
|
: scored;
|
|
165
|
-
//
|
|
166
|
-
|
|
167
|
-
|
|
173
|
+
// For full-stack repos, carve out E2E and UI slots before filling with backend tests.
|
|
174
|
+
const backendSlotCount = isFrontendProject
|
|
175
|
+
? Math.max(0, topN - minE2ESlots - minUISlots)
|
|
176
|
+
: topN;
|
|
177
|
+
const allItems = scoredFiltered.slice(0, backendSlotCount);
|
|
168
178
|
const byType = new Map();
|
|
169
179
|
for (const t of TYPE_ORDER)
|
|
170
180
|
byType.set(t, []);
|
|
@@ -188,7 +198,7 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
188
198
|
return [
|
|
189
199
|
`**${rank}. ${title}**`,
|
|
190
200
|
` ${s.description}`,
|
|
191
|
-
` ${step.method} ${step.path}
|
|
201
|
+
` ${step.method} ${step.path} \u2192 ${step.expectedStatusCode}`,
|
|
192
202
|
` Tool: \`skyramp_contract_test_generation({ endpointURL: "${endpointURL}", method: "${step.method}"${authRef}${dataParam} })\``,
|
|
193
203
|
` From source: fill in requestData field names and the specific production boundary this validates`,
|
|
194
204
|
].join("\n");
|
|
@@ -197,7 +207,7 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
197
207
|
const stepLines = s.steps.map(st => {
|
|
198
208
|
const isBody = ["POST", "PUT", "PATCH"].includes(st.method);
|
|
199
209
|
const bodyHint = isBody ? ` \u2014 body: <${st.method} ${st.path} required fields from source>` : "";
|
|
200
|
-
return ` ${st.order}. ${st.method} ${st.path}
|
|
210
|
+
return ` ${st.order}. ${st.method} ${st.path} \u2192 ${st.expectedStatusCode}: ${st.description}${bodyHint}`;
|
|
201
211
|
}).join("\n");
|
|
202
212
|
const isTraceBased = testType === "e2e" || testType === "ui";
|
|
203
213
|
let toolCallsBlock;
|
|
@@ -243,7 +253,7 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
243
253
|
dataParam = `, requestBody: <${st.method} ${st.path} required fields from source code>`;
|
|
244
254
|
}
|
|
245
255
|
}
|
|
246
|
-
return ` { method: "${st.method}", path: "${st.path}"
|
|
256
|
+
return ` { method: "${st.method}", path: "${st.path}", statusCode: ${st.expectedStatusCode}${dataParam} }`;
|
|
247
257
|
}).join(",\n");
|
|
248
258
|
toolCallsBlock = [
|
|
249
259
|
` skyramp_batch_scenario_test_generation({ scenarioName: "${s.scenarioName}", destination: "${destinationHost}", baseURL: "${baseUrl}"${scenarioAuthRef}, steps: [\n${batchSteps}\n ] })`,
|
|
@@ -275,11 +285,55 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
275
285
|
const entries = items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n");
|
|
276
286
|
return `### ${label} (${items.length})\n\n${entries}`;
|
|
277
287
|
});
|
|
278
|
-
|
|
288
|
+
// Pre-allocate E2E and UI placeholder sections for full-stack repos.
|
|
289
|
+
const e2eSectionParts = [];
|
|
290
|
+
const uiSectionParts = [];
|
|
291
|
+
if (isFrontendProject) {
|
|
292
|
+
for (let i = 0; i < minE2ESlots; i++) {
|
|
293
|
+
const rank = i + 1;
|
|
294
|
+
e2eSectionParts.push(`**${rank}. E2E User Journey ${i + 1}**\n` +
|
|
295
|
+
` End-to-end test covering a complete user journey through the frontend and backend.\n` +
|
|
296
|
+
` To generate: record a browser trace, then call the generation tool.\n` +
|
|
297
|
+
` browser_navigate({ url: "${baseUrl}" }) \u2192 exercise key user flow \u2192 skyramp_export_zip({ outputPath: "<repo>/.skyramp/e2e_journey_${i + 1}.zip" })\n` +
|
|
298
|
+
` Tool: \`skyramp_e2e_test_generation({ playwrightInput: "<repo>/.skyramp/e2e_journey_${i + 1}.zip"${authHeaderOnlyRef} })\`\n` +
|
|
299
|
+
` From source: read frontend components and their API calls to identify the highest-value user journey`);
|
|
300
|
+
}
|
|
301
|
+
for (let i = 0; i < minUISlots; i++) {
|
|
302
|
+
const rank = minE2ESlots + i + 1;
|
|
303
|
+
uiSectionParts.push(`**${rank}. UI Component Test ${i + 1}**\n` +
|
|
304
|
+
` Test key UI component interactions and state changes.\n` +
|
|
305
|
+
` To generate: record a browser trace, then call the generation tool.\n` +
|
|
306
|
+
` browser_navigate({ url: "${baseUrl}" }) \u2192 interact with UI components \u2192 skyramp_export_zip({ outputPath: "<repo>/.skyramp/ui_component_${i + 1}.zip" })\n` +
|
|
307
|
+
` Tool: \`skyramp_ui_test_generation({ playwrightInput: "<repo>/.skyramp/ui_component_${i + 1}.zip"${authHeaderOnlyRef} })\`\n` +
|
|
308
|
+
` From source: read frontend component files to identify interactions, form submissions, and state transitions`);
|
|
309
|
+
}
|
|
310
|
+
// Offset backend section ranks by the number of E2E + UI placeholders
|
|
311
|
+
const offset = minE2ESlots + minUISlots;
|
|
312
|
+
backendSections.forEach((_, idx) => {
|
|
313
|
+
const t = TYPE_ORDER.filter(t => (byType.get(t) ?? []).length > 0)[idx];
|
|
314
|
+
if (!t)
|
|
315
|
+
return;
|
|
316
|
+
const items = byType.get(t);
|
|
317
|
+
const label = TYPE_LABEL[t];
|
|
318
|
+
let globalRank = offset;
|
|
319
|
+
for (const prev of TYPE_ORDER) {
|
|
320
|
+
if (prev === t)
|
|
321
|
+
break;
|
|
322
|
+
globalRank += (byType.get(prev) ?? []).length;
|
|
323
|
+
}
|
|
324
|
+
backendSections[idx] = `### ${label} (${items.length})\n\n${items.map((item, i) => renderItem(item, globalRank + i + 1)).join("\n\n")}`;
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
const allSections = [
|
|
328
|
+
...(e2eSectionParts.length > 0 ? [`### E2E (${e2eSectionParts.length})\n\n${e2eSectionParts.join("\n\n")}`] : []),
|
|
329
|
+
...(uiSectionParts.length > 0 ? [`### UI (${uiSectionParts.length})\n\n${uiSectionParts.join("\n\n")}`] : []),
|
|
330
|
+
...backendSections,
|
|
331
|
+
];
|
|
332
|
+
const sections = allSections.join("\n\n");
|
|
279
333
|
const frontendTierNote = isFrontendOnlyProject
|
|
280
|
-
? `\n\n**Frontend repo:**
|
|
334
|
+
? `\n\n**Frontend repo:** supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.`
|
|
281
335
|
: isFrontendProject
|
|
282
|
-
? `\n\n**Full-stack repo:**
|
|
336
|
+
? `\n\n**Full-stack repo:** supplement MUST include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Add these before exhausting backend tiers.`
|
|
283
337
|
: "";
|
|
284
338
|
const repoSupplementNote = supplementCount > 0
|
|
285
339
|
? `
|
|
@@ -306,9 +360,9 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
306
360
|
</supplement_guidance>`
|
|
307
361
|
: "";
|
|
308
362
|
const typeMixText = isFrontendOnlyProject
|
|
309
|
-
? `This is a frontend repo. Focus on E2E and UI tests only.
|
|
363
|
+
? `This is a frontend repo. Focus on E2E and UI tests only. Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`). Do NOT add integration or contract tests.`
|
|
310
364
|
: isFrontendProject
|
|
311
|
-
? `This is a full-stack repo. Coverage ranking: E2E > UI > Integration > Contract.
|
|
365
|
+
? `This is a full-stack repo. Coverage ranking: E2E > UI > Integration > Contract. Include at least ${minE2ESlots} E2E test${minE2ESlots > 1 ? "s" : ""} (\`skyramp_e2e_test_generation\`) and at least ${minUISlots} UI test${minUISlots > 1 ? "s" : ""} (\`skyramp_ui_test_generation\`), in addition to backend integration and contract tests.`
|
|
312
366
|
: `Focus on integration and contract tests for all API endpoints.`;
|
|
313
367
|
return `## Test Recommendations — ${topN} total (grouped by test type)
|
|
314
368
|
|
|
@@ -337,7 +391,7 @@ Before filling in tool call parameters for each item, use the analysis data alre
|
|
|
337
391
|
- Computed/derived response fields and their formulas — assert exact values; read source for formula details not captured in the analysis
|
|
338
392
|
- Auth middleware — set authHeader/authScheme from the repository context above; FastAPI HTTPBearer → 403 not 401
|
|
339
393
|
- Storage backend — if Redis or schema-less, discard unique-constraint and cascade-delete scenarios
|
|
340
|
-
- Delete behavior —
|
|
394
|
+
- Delete behavior — hard-delete → 204; soft-delete/cancel → 200
|
|
341
395
|
|
|
342
396
|
${buildTestQualityCriteria()}
|
|
343
397
|
|
|
@@ -351,6 +405,16 @@ ${buildTestQualityCriteria()}
|
|
|
351
405
|
</enrichment_notes>`;
|
|
352
406
|
}
|
|
353
407
|
function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, externalCoverage = new Set(), relevantExternalTestPaths = []) {
|
|
408
|
+
const frontendUrl = "<frontend_url>";
|
|
409
|
+
// Slot allocation:
|
|
410
|
+
// - UI-only PR: all GENERATE slots are UI placeholders (no pre-ranked backend scenarios)
|
|
411
|
+
// - Mixed PR: last GENERATE slot is a UI placeholder; remaining slots are backend
|
|
412
|
+
// - Backend-only PR: all GENERATE slots are backend scenarios
|
|
413
|
+
const backendGenerateCount = isUIOnlyPR
|
|
414
|
+
? 0
|
|
415
|
+
: hasFrontendChanges
|
|
416
|
+
? Math.max(0, maxGen - 1)
|
|
417
|
+
: maxGen;
|
|
354
418
|
// Filter out scenarios whose primary method + resource + test type is already covered by external tests.
|
|
355
419
|
// Method-aware: an external test covering GET /orders won't block PUT /orders scenarios.
|
|
356
420
|
// This is the programmatic complement to the prompt-level Step 0 dedup instructions.
|
|
@@ -364,10 +428,8 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
364
428
|
return true;
|
|
365
429
|
})
|
|
366
430
|
: scored;
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
const generateItems = scoredAfterExternalDedup.slice(0, Math.min(maxGen, scoredAfterExternalDedup.length));
|
|
370
|
-
const rawAdditionalItems = scoredAfterExternalDedup.slice(maxGen, topN);
|
|
431
|
+
const generateItems = scoredAfterExternalDedup.slice(0, Math.min(backendGenerateCount, scoredAfterExternalDedup.length));
|
|
432
|
+
const rawAdditionalItems = scoredAfterExternalDedup.slice(backendGenerateCount, topN);
|
|
371
433
|
// Filter additional items whose primary resource + test type already appear in GENERATE
|
|
372
434
|
const generatedCoverage = new Set(generateItems.map(item => scenarioCoverageKey(item.scenario)));
|
|
373
435
|
const additionalItems = rawAdditionalItems.filter(item => !generatedCoverage.has(scenarioCoverageKey(item.scenario)));
|
|
@@ -380,10 +442,47 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
380
442
|
: authHeaderValue
|
|
381
443
|
? `, authHeader: "${authHeaderValue}"`
|
|
382
444
|
: `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
|
|
383
|
-
// UI-only
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
445
|
+
// UI-only: all GENERATE slots are UI test placeholders (one per changed component/flow)
|
|
446
|
+
const uiGenerateBlocks = isUIOnlyPR
|
|
447
|
+
? Array.from({ length: maxGen }, (_, i) => {
|
|
448
|
+
const rank = i + 1;
|
|
449
|
+
const zipPath = `<repositoryPath>/.skyramp/ui_test_${rank}_trace.zip`;
|
|
450
|
+
return hasTraces
|
|
451
|
+
? (`**#${rank} — GENERATE** | ui | workflow | new\n` +
|
|
452
|
+
`Scenario: ui-test-from-trace-${rank} (rename from the actual changed component/flow)\n` +
|
|
453
|
+
`Validates: UI interactions for a changed frontend component or flow.\n\n` +
|
|
454
|
+
`**Tool**: \`skyramp_ui_test_generation({ playwrightInput: "<discovered_trace_file_path>", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}`)
|
|
455
|
+
: (`**#${rank} — GENERATE** | ui | workflow | new\n` +
|
|
456
|
+
`Scenario: ui-test-for-changed-component-${rank} (rename from the actual changed component/flow)\n` +
|
|
457
|
+
`Validates: UI interactions for changed frontend component/flow ${rank}.\n\n` +
|
|
458
|
+
`**Tool workflow:**\n` +
|
|
459
|
+
` 1. \`browser_navigate({ url: "${frontendUrl}" })\`\n` +
|
|
460
|
+
` 2. Interact with the changed component (read the diff to identify which component changed and what interactions it supports)\n` +
|
|
461
|
+
` 3. \`browser_snapshot()\` after each key interaction\n` +
|
|
462
|
+
` 4. \`skyramp_export_zip({ outputPath: "${zipPath}" })\` — absolute path\n` +
|
|
463
|
+
` 5. \`skyramp_ui_test_generation({ playwrightInput: "${zipPath}", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}\n\n` +
|
|
464
|
+
`Each item must target a distinct changed component or user flow.`);
|
|
465
|
+
}).join("\n\n")
|
|
466
|
+
: "";
|
|
467
|
+
// Mixed PR: reserve the last GENERATE slot for a UI test for the changed frontend components.
|
|
468
|
+
// Guard: skip when maxGen=0 (caller explicitly requested no generation)
|
|
469
|
+
const uiRank = generateItems.length + 1;
|
|
470
|
+
const uiPlaceholderBlock = (hasFrontendChanges && !isUIOnlyPR && maxGen > 0)
|
|
471
|
+
? hasTraces
|
|
472
|
+
? (`**#${uiRank} — GENERATE** | ui | workflow | new\n` +
|
|
473
|
+
`Scenario: ui-test-for-changed-components (rename from the actual changed component/flow)\n` +
|
|
474
|
+
`Validates: UI interactions for the changed frontend components in this PR.\n\n` +
|
|
475
|
+
`**Tool**: \`skyramp_ui_test_generation({ playwrightInput: "<discovered_trace_file_path>", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}`)
|
|
476
|
+
: (`**#${uiRank} — GENERATE** | ui | workflow | new\n` +
|
|
477
|
+
`Scenario: ui-test-for-changed-components (rename from the actual changed component/flow)\n` +
|
|
478
|
+
`Validates: UI interactions for the changed frontend components in this PR.\n\n` +
|
|
479
|
+
`**Tool workflow:**\n` +
|
|
480
|
+
` 1. \`browser_navigate({ url: "${frontendUrl}" })\`\n` +
|
|
481
|
+
` 2. Interact with the changed component (read the diff to identify which component changed and what interactions it supports)\n` +
|
|
482
|
+
` 3. \`browser_snapshot()\` after each key interaction\n` +
|
|
483
|
+
` 4. \`skyramp_export_zip({ outputPath: "<repositoryPath>/.skyramp/ui_mixed_pr_trace.zip" })\` — absolute path\n` +
|
|
484
|
+
` 5. \`skyramp_ui_test_generation({ playwrightInput: "<repositoryPath>/.skyramp/ui_mixed_pr_trace.zip", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}\n\n` +
|
|
485
|
+
`Derive scenario name and steps from the actual changed frontend files.`)
|
|
387
486
|
: "";
|
|
388
487
|
const generateBlocks = generateItems.map((item, i) => {
|
|
389
488
|
const rank = i + 1;
|
|
@@ -400,7 +499,7 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
400
499
|
? `\n authHeader: "${authHeaderValue}"${authSchemeSnippet}`
|
|
401
500
|
: `\n authHeader: <resolve from workspace or OpenAPI securitySchemes>; authScheme: <if Authorization>`;
|
|
402
501
|
return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | ${item.novelty}\n` +
|
|
403
|
-
`${step.method} ${step.path}
|
|
502
|
+
`${step.method} ${step.path} → ${step.expectedStatusCode}\n` +
|
|
404
503
|
`Validates: ${s.description}\n\n` +
|
|
405
504
|
`**Context for generation**:\n` +
|
|
406
505
|
` Endpoint URL: ${endpointURL}${requestBodyData}${authContext}\n\n` +
|
|
@@ -423,7 +522,7 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
423
522
|
const bodyData = st.requestBody && Object.keys(st.requestBody).length > 0
|
|
424
523
|
? ` [use requestBody: ${JSON.stringify(st.requestBody)} — pass as JSON string in tool call]`
|
|
425
524
|
: "";
|
|
426
|
-
return ` ${st.order}. ${st.method} ${st.path}
|
|
525
|
+
return ` ${st.order}. ${st.method} ${st.path} → ${st.expectedStatusCode}: ${st.description}${chains}${bodyHint}${bodyData}${responseHint}`;
|
|
427
526
|
}).join("\n");
|
|
428
527
|
let destinationHost = "localhost";
|
|
429
528
|
try {
|
|
@@ -434,7 +533,9 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
434
533
|
const authContext = authHeaderValue
|
|
435
534
|
? `authHeader: "${authHeaderValue}"${authSchemeSnippet}`
|
|
436
535
|
: "authHeader: <resolve from workspace or OpenAPI securitySchemes>; authScheme: <if Authorization>";
|
|
437
|
-
const prereqNote =
|
|
536
|
+
const prereqNote = s.category === "new_endpoint"
|
|
537
|
+
? `\n**Prerequisite discovery**: Check for FK fields (product_id, user_id, order_id) in the endpoint's request body. If found, prepend a step to create that prerequisite resource first, then chain its primary key field into the dependent step using template variable syntax. Check the actual field name from the response body (\`id\`, \`uuid\`, \`_id\`, etc.), response header (\`Location\`), or cookie — do not assume \`id\`.`
|
|
538
|
+
: "";
|
|
438
539
|
const bugLine = s.bugCatchingTarget
|
|
439
540
|
? `**Bug to catch**: ${s.bugCatchingTarget}\n`
|
|
440
541
|
: "";
|
|
@@ -463,16 +564,17 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
463
564
|
const s = item.scenario;
|
|
464
565
|
const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
|
|
465
566
|
const target = s.steps.length === 1
|
|
466
|
-
? `${s.steps[0].method} ${s.steps[0].path}
|
|
567
|
+
? `${s.steps[0].method} ${s.steps[0].path} → ${s.steps[0].expectedStatusCode}`
|
|
467
568
|
: `Scenario: ${s.scenarioName} (${s.steps.map(st => `${st.method} ${st.path}`).join(" → ")})`;
|
|
468
569
|
return `#${rank} [ADDITIONAL] | ${testType} | ${s.category} | ${item.novelty}\n ${target}\n Validates: ${s.description}`;
|
|
469
570
|
}).join("\n\n");
|
|
470
|
-
// UI/E2E guidance — the LLM adds
|
|
471
|
-
//
|
|
472
|
-
|
|
473
|
-
|
|
571
|
+
// UI/E2E guidance — the LLM adds as many as its Budget Plan calls for.
|
|
572
|
+
// Note: if a UI test already occupies a GENERATE slot (uiPlaceholderBlock), that slot
|
|
573
|
+
// satisfies the UI generate count — do not add it again in ADDITIONAL.
|
|
574
|
+
const uiGuidance = !isUIOnlyPR ? `
|
|
575
|
+
**UI/E2E tests (add per your Budget Plan):** If your Budget Plan requires UI/E2E items beyond what is already in your GENERATE list, append an [ADDITIONAL] entry for each. If a UI test already occupies a GENERATE slot above, that slot satisfies your UI/E2E generate count — do NOT add it again to ADDITIONAL. Tool workflow for each new item:
|
|
474
576
|
- **E2E**: ${hasTraces ? "Use discovered trace/recording files with `skyramp_e2e_test_generation`." : "Add to additionalRecommendations with a note that both a backend API trace (`skyramp_start_trace_collection` / `skyramp_stop_trace_collection`) and a browser Playwright recording must be collected in a live environment first. Do NOT attempt `skyramp_e2e_test_generation` without both traces present."}
|
|
475
|
-
- **UI**: ${hasTraces ? "Use an existing Playwright `.zip` trace with `skyramp_ui_test_generation`." : `Record a trace using \`browser_navigate\` + \`browser_snapshot\` + \`skyramp_export_zip\`, then call \`skyramp_ui_test_generation({ playwrightInput: "<zip_path>", outputDir: "<
|
|
577
|
+
- **UI**: ${hasTraces ? "Use an existing Playwright `.zip` trace with `skyramp_ui_test_generation`." : `Record a trace using \`browser_navigate\` + \`browser_snapshot\` + \`skyramp_export_zip\`, then call \`skyramp_ui_test_generation({ playwrightInput: "<zip_path>", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}.`}
|
|
476
578
|
Derive scenario names and steps from the actual changed frontend files. If your Budget Plan calls for 0% UI/E2E, omit this entirely.` : "";
|
|
477
579
|
const supplementNote = `\n**If your Budget Plan total exceeds the pre-ranked items listed above:** draft additional tests from source-code enrichment (Step 1). For each new or changed endpoint, identify boundary or variation scenarios — formula parameters, search/filter constraints, required field validation. Only after exhausting PR-specific scenarios, add generic patterns (auth boundary → 401, non-existent ID → 404). Do NOT supplement with tests whose endpoint + test type match a GENERATE item.`;
|
|
478
580
|
// ── PR / branch-diff mode: execution plan ────────────────────────────────
|
|
@@ -499,7 +601,7 @@ ${externalTestFilesList}For every GENERATE item below, check its endpoint path a
|
|
|
499
601
|
|
|
500
602
|
**Step 1 — Source-Code Enrichment (before executing anything)**
|
|
501
603
|
Read the source code for ALL changed files. Before generating each recommendation, quote the relevant source code in a <source_evidence> block — include the route handler signature, request body schema fields, response shape, and any computed field formulas. Use these quotes to derive tool call parameters. Look for:
|
|
502
|
-
- **Auth middleware** — check for known signals (${AUTH_MIDDLEWARE_PATTERNS_STR}). If any match, override \`authHeader\` and \`authScheme\` even if
|
|
604
|
+
- **Auth middleware** — check for known signals (${AUTH_MIDDLEWARE_PATTERNS_STR}). If any match, override \`authHeader\` and \`authScheme\` even if workspace.yml says authType: none. **If no known signal matches but the diff shows security-adjacent code** (decorators like \`@requiresRole\`/\`@Protected\`, function names like \`validateToken\`/\`checkPermission\`/\`verifyHMAC\`, or imports from auth/security packages), read the relevant source file to determine the actual auth scheme before proceeding. Auth handling for \`skyramp_integration_test_generation\` with \`scenarioFile\` is covered in the Tool Workflows section below.
|
|
503
605
|
- Business rules and formulas (e.g. total_cost = compute * rate + memory * rate)
|
|
504
606
|
- State transitions and domain constraints (e.g. budget cannot drop below current spend)
|
|
505
607
|
- Validation logic (field constraints, cross-field dependencies)
|
|
@@ -534,7 +636,7 @@ If these conditions are not met, add it to ADDITIONAL only — do NOT displace a
|
|
|
534
636
|
When a qualifying candidate is inserted: place it HIGH before MEDIUM before LOW; within the same priority, source-code-derived candidates go BEFORE structural ones. Re-number ranks after insertion. The top ${maxGen} ranked items become GENERATE candidates.
|
|
535
637
|
|
|
536
638
|
**Source-code validation gates (apply during Step 1):**
|
|
537
|
-
- **Cascade vs referential integrity**: If both a cascade-delete and a delete-blocked scenario appear for the same resource pair, keep only the one matching the source
|
|
639
|
+
- **Cascade vs referential integrity**: If both a cascade-delete and a delete-blocked scenario appear for the same resource pair, keep only the one matching the source FK delete policy (ON DELETE CASCADE / cascade=True / onDelete: 'CASCADE' → keep cascade-delete; RESTRICT/PROTECT/no annotation → keep delete-blocked). Remove the inapplicable variant.
|
|
538
640
|
- **Unique constraints**: Unique-constraint scenarios (duplicate POST → 409) are pre-drafted for all resources. Confirm enforcement before keeping: SQL UNIQUE index, Mongoose unique: true, Prisma @unique, or explicit duplicate-check code. If the backend is Redis, schema-less, or has no explicit constraint in the changed files, move to ADDITIONAL with a note — do NOT generate.
|
|
539
641
|
|
|
540
642
|
**Step 2 — Diversity check (using enriched knowledge from Step 1)**
|
|
@@ -550,7 +652,7 @@ For each pair of GENERATE items, ask: same HTTP method + path + step sequence +
|
|
|
550
652
|
Same step sequence with only payload differences (e.g. 10% vs 5% discount both returning 200) = same code path = duplicate. Different scenario names do not make duplicate tests distinct.
|
|
551
653
|
|
|
552
654
|
**Step 3 — Execute merged plan in rank order**
|
|
553
|
-
Replace any scenario that pairs unrelated resources with one reflecting actual
|
|
655
|
+
Replace any scenario that pairs unrelated resources with one reflecting actual FK relationships in the codebase.
|
|
554
656
|
Use the field names and values from the \`<source_evidence>\` blocks you quoted in Step 1 to fill all tool call parameters. Prefer reusing Step 1 evidence when it already resolves a placeholder, but if a placeholder cannot be replaced with concrete values from files already read, you may read the specific schema, model, or handler file needed to resolve it. Assert response field values, not just status codes.
|
|
555
657
|
|
|
556
658
|
${buildTestQualityCriteria()}
|
|
@@ -566,10 +668,10 @@ ${buildGenerationRules(isUIOnlyPR)}
|
|
|
566
668
|
### GENERATE (process these EXACTLY as listed, in order — after completing Steps 0–2 above; if Step 0 converts an item to UPDATE, backfill the ADD slot from ADDITIONAL following the priority order in Step 0)
|
|
567
669
|
|
|
568
670
|
${isUIOnlyPR
|
|
569
|
-
? (
|
|
570
|
-
: (generateBlocks || " (no pre-ranked generate items — draft your own based on endpoint analysis)")}
|
|
671
|
+
? (uiGenerateBlocks || " (no UI generate items — derive scenarios from changed frontend files)")
|
|
672
|
+
: ([generateBlocks, uiPlaceholderBlock].filter(Boolean).join("\n\n") || " (no pre-ranked generate items — draft your own based on endpoint analysis)")}
|
|
571
673
|
|
|
572
|
-
**
|
|
674
|
+
**COMPLIANCE CHECK**: Before proceeding, verify your generate list matches the items above. If you plan to generate a scenario with a different name than what is listed (e.g. you want to generate "order-update-discount-calculation" but the plan says "orders-patch-add-items-recalculate"), STOP — use the plan's scenario name and steps. Add your alternative to ADDITIONAL instead. One retry on failure then skip to next item.
|
|
573
675
|
|
|
574
676
|
### ADDITIONAL (list in additionalRecommendations in this order after Step 1 insertion)
|
|
575
677
|
|
|
@@ -604,17 +706,10 @@ export function buildRecommendationPrompt(analysis, analysisScope = AnalysisScop
|
|
|
604
706
|
const hasFrontendChanges = isDiffScope && diffContext
|
|
605
707
|
? filteredChangedFiles.some(f => isFrontendFile(f))
|
|
606
708
|
: false;
|
|
607
|
-
// Backend changes detected if:
|
|
608
|
-
// 1. Endpoints directly matched from changed files (new/modified/removed), OR
|
|
609
|
-
// 2. Changed files are in backend service/model/middleware directories (affectedServices non-empty)
|
|
610
|
-
// but couldn't be mapped to specific endpoints (service-layer changes like services/items.ts)
|
|
611
709
|
const hasApiChanges = isDiffScope && diffContext
|
|
612
710
|
? (diffContext.newEndpoints.length > 0 || diffContext.modifiedEndpoints.length > 0 || (diffContext.removedEndpoints?.length ?? 0) > 0)
|
|
613
711
|
: false;
|
|
614
|
-
const
|
|
615
|
-
? (diffContext.affectedServices.length > 0 && filteredChangedFiles.some(f => !isFrontendFile(f) && /\.(ts|js|py|java|go|rb|rs|cs)$/.test(f)))
|
|
616
|
-
: false;
|
|
617
|
-
const isUIOnlyPR = hasFrontendChanges && !hasApiChanges && !hasBackendServiceChanges;
|
|
712
|
+
const isUIOnlyPR = hasFrontendChanges && !hasApiChanges;
|
|
618
713
|
const hasTraces = (analysis.artifacts?.traceFiles?.length ?? 0) > 0 ||
|
|
619
714
|
(analysis.artifacts?.playwrightRecordings?.length ?? 0) > 0;
|
|
620
715
|
// ── Mode preamble ──
|
|
@@ -661,7 +756,7 @@ Output should be concise and immediately actionable.`
|
|
|
661
756
|
changedLines.push(` ${m.method} ${ep.path} [removed]`);
|
|
662
757
|
}
|
|
663
758
|
}
|
|
664
|
-
endpointLines = `**
|
|
759
|
+
endpointLines = `**Likely changed in this PR (from static file→endpoint mapping — verify against diff in Step 2):**\n${changedLines.join("\n") || " none"}\n\n**Other endpoints (reference only):**\n${otherLines.join("\n") || " none"}`;
|
|
665
760
|
}
|
|
666
761
|
else {
|
|
667
762
|
endpointLines = allEndpoints
|
|
@@ -734,7 +829,7 @@ Framework: ${analysis.projectClassification.primaryFramework} (${analysis.projec
|
|
|
734
829
|
Project type: ${analysis.projectClassification.projectType}
|
|
735
830
|
Auth: ${authMethod} (header: ${authHeaderValue}${authTypeValue ? `, type: ${authTypeValue}` : ""})
|
|
736
831
|
Base URL: ${analysis.apiEndpoints.baseUrl}
|
|
737
|
-
|
|
832
|
+
Candidate endpoints from static scan — unverified, confirm paths against spec or source before use (${analysis.apiEndpoints.totalCount}):
|
|
738
833
|
${endpointLines}${testFingerprint}
|
|
739
834
|
`.trim();
|
|
740
835
|
// ── Branch diff ──
|
|
@@ -755,7 +850,7 @@ Affected services: ${diffContext.affectedServices.join(", ") || "N/A"}
|
|
|
755
850
|
|
|
756
851
|
Focus on tests that validate these changes and how they interact with existing resources.
|
|
757
852
|
For removed endpoints: verify they now return 404 or the appropriate deprecation status code.
|
|
758
|
-
Allocate your test budget to endpoints listed under "
|
|
853
|
+
Allocate your test budget to endpoints listed under "Likely changed in this PR". Use other endpoints only as setup steps (e.g. creating a resource before testing its deletion).
|
|
759
854
|
`;
|
|
760
855
|
}
|
|
761
856
|
// ── Interactions ──
|
|
@@ -814,11 +909,17 @@ ${detailBlocks}
|
|
|
814
909
|
const na = NOVELTY_ORDER[a.novelty], nb = NOVELTY_ORDER[b.novelty];
|
|
815
910
|
if (nb !== na)
|
|
816
911
|
return nb - na;
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
912
|
+
const crossA = a.scenario.steps.length > 2 ? 1 : 0;
|
|
913
|
+
const crossB = b.scenario.steps.length > 2 ? 1 : 0;
|
|
914
|
+
if (crossB !== crossA)
|
|
915
|
+
return crossB - crossA;
|
|
916
|
+
if (b.scenario.steps.length !== a.scenario.steps.length)
|
|
917
|
+
return b.scenario.steps.length - a.scenario.steps.length;
|
|
918
|
+
const errorA = a.scenario.steps.some(s => s.interactionType === "error" || s.interactionType === "edge-case") ? 1 : 0;
|
|
919
|
+
const errorB = b.scenario.steps.some(s => s.interactionType === "error" || s.interactionType === "edge-case") ? 1 : 0;
|
|
920
|
+
if (errorB !== errorA)
|
|
921
|
+
return errorB - errorA;
|
|
922
|
+
// Use locale-independent comparison to avoid runtime-locale non-determinism
|
|
822
923
|
const nameA = a.scenario.scenarioName;
|
|
823
924
|
const nameB = b.scenario.scenarioName;
|
|
824
925
|
if (nameA < nameB)
|
|
@@ -853,23 +954,12 @@ ${detailBlocks}
|
|
|
853
954
|
mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, externalCoverage, analysis.existingTests.relevantExternalTestPaths ?? []);
|
|
854
955
|
}
|
|
855
956
|
else {
|
|
856
|
-
// Endpoint discovery hint: when backend service files changed but no endpoints were
|
|
857
|
-
// directly matched, guide the LLM to trace from service files → controllers → routes.
|
|
858
|
-
const endpointDiscoveryHint = hasBackendServiceChanges && diffContext
|
|
859
|
-
? `
|
|
860
|
-
**Endpoint Discovery Required** — the diff modifies backend service files (affected services: ${diffContext.affectedServices.join(", ")}) that don't directly define routes. You MUST:
|
|
861
|
-
1. Read the Routing entry-point files listed above
|
|
862
|
-
2. Trace which controllers/routers import the affected services
|
|
863
|
-
3. Identify the specific HTTP endpoints those controllers register
|
|
864
|
-
4. Use discovered endpoints as your GENERATE targets (contract + integration tests)
|
|
865
|
-
Do NOT default to UI-only tests — this PR has backend logic changes that require API-level testing.`
|
|
866
|
-
: "";
|
|
867
957
|
mainSection = `
|
|
868
958
|
## Draft Your Execution Plan
|
|
869
959
|
|
|
870
|
-
No pre-drafted scenarios available
|
|
960
|
+
No pre-drafted scenarios available.
|
|
871
961
|
|
|
872
|
-
${buildScopeAssessmentSection(topN, maxGen
|
|
962
|
+
${buildScopeAssessmentSection(topN, maxGen)}
|
|
873
963
|
|
|
874
964
|
Draft tests from the endpoint interactions and source code above, following the same tool pipeline described in Tool Workflows below. Prioritize critical categories: security_boundary > data_integrity > business_rule > workflow > crud.
|
|
875
965
|
|
|
@@ -877,8 +967,6 @@ For each test: pick the highest-impact endpoint(s), draft a realistic scenario w
|
|
|
877
967
|
|
|
878
968
|
**Honor your Budget Plan: produce exactly the total you committed to (GENERATE + ADDITIONAL). No fewer, no padding with low-value tests.**
|
|
879
969
|
|
|
880
|
-
**Coverage breadth enforcement:** Your GENERATE items must span DIFFERENT HTTP methods or endpoints from your Coverage Reasoning surfaces. If you identified 5+ testable surfaces but all GENERATE items target the same method + path (e.g. all POST /permissions), you are violating diversity. Spread GENERATE slots across distinct surfaces; put remaining surfaces in ADDITIONAL recommendations.
|
|
881
|
-
|
|
882
970
|
## Recommendation Stability
|
|
883
971
|
- **Carry forward** previous additionalRecommendations that still apply — match by scenarioName (multi-step) or endpoint (single-endpoint). Re-derive category and priority from test content.
|
|
884
972
|
- **Only drop** a previous recommendation if its target endpoint was removed, its business logic changed, or it is now covered by a generated test.
|