@skyramp/mcp 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +6 -5
- package/build/prompts/initialize-workspace/initializeWorkspacePrompt.js +150 -149
- package/build/prompts/personas.js +2 -1
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +2 -1
- package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +28 -0
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +72 -14
- package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -0
- package/build/prompts/test-recommendation/diffExecutionPlan.js +290 -0
- package/build/prompts/test-recommendation/fullRepoCatalog.js +271 -0
- package/build/prompts/test-recommendation/recommendationSections.js +4 -2
- package/build/prompts/test-recommendation/recommendationShared.js +68 -0
- package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +20 -4
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +11 -640
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +6 -6
- package/build/prompts/testbot/testbot-prompts.js +19 -7
- package/build/prompts/testbot/testbot-prompts.test.js +22 -5
- package/build/resources/analysisResources.js +1 -0
- package/build/services/ScenarioGenerationService.js +5 -1
- package/build/services/TestGenerationService.js +3 -0
- package/build/tools/code-refactor/codeReuseTool.js +3 -0
- package/build/tools/code-refactor/enhanceAssertionsTool.js +5 -1
- package/build/tools/code-refactor/modularizationTool.js +3 -0
- package/build/tools/generate-tests/generateBatchScenarioRestTool.js +123 -1
- package/build/tools/generate-tests/generateBatchScenarioRestTool.test.js +205 -9
- package/build/tools/generate-tests/generateContractRestTool.js +19 -19
- package/build/tools/generate-tests/generateIntegrationRestTool.js +9 -2
- package/build/tools/generate-tests/generateUIRestTool.js +23 -8
- package/build/tools/test-management/analyzeChangesTool.js +218 -2
- package/build/tools/test-management/analyzeChangesTool.test.js +233 -1
- package/build/tools/workspace/initializeWorkspaceTool.js +1 -1
- package/build/utils/docker.test.js +1 -1
- package/build/utils/featureFlags.js +7 -0
- package/build/utils/featureFlags.test.js +81 -0
- package/build/utils/gitStaging.js +18 -0
- package/build/utils/gitStaging.test.js +87 -0
- package/build/utils/httpDefaults.js +17 -0
- package/build/utils/httpDefaults.test.js +21 -0
- package/build/utils/scenarioDrafting.js +37 -15
- package/build/utils/scenarioDrafting.test.js +66 -0
- package/build/utils/telemetry.js +2 -1
- package/build/utils/utils.js +23 -0
- package/build/utils/versions.js +1 -1
- package/node_modules/playwright/lib/mcp/browser/context.js +2 -0
- package/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +2 -2
- package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +17 -26
- package/package.json +2 -2
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
jest.mock("@skyramp/skyramp", () => ({
|
|
2
|
+
WorkspaceConfigManager: { create: jest.fn() },
|
|
3
|
+
}));
|
|
4
|
+
import { buildAnalysisOutputText } from "./analysisOutputPrompt.js";
|
|
5
|
+
import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
// Minimal fixture factory
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
function baseParams(overrides = {}) {
|
|
10
|
+
return {
|
|
11
|
+
sessionId: "test-session-id",
|
|
12
|
+
repositoryPath: "/repo",
|
|
13
|
+
analysisScope: AnalysisScope.CurrentBranchDiff,
|
|
14
|
+
scannedEndpoints: [],
|
|
15
|
+
wsBaseUrl: "http://localhost:3000",
|
|
16
|
+
wsAuthHeader: "Authorization",
|
|
17
|
+
wsAuthType: "",
|
|
18
|
+
wsSchemaPath: "",
|
|
19
|
+
routerMountContext: [],
|
|
20
|
+
parsedDiff: {
|
|
21
|
+
changedFiles: [],
|
|
22
|
+
newEndpoints: [],
|
|
23
|
+
modifiedEndpoints: [],
|
|
24
|
+
},
|
|
25
|
+
...overrides,
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// Step 2.3 caller-tracing block
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
describe("buildAnalysisOutputText — unmatchedFiles / Step 2.3 caller-tracing", () => {
|
|
32
|
+
it("includes Step 2.3 block when unmatchedFiles is non-empty and scope is CurrentBranchDiff", () => {
|
|
33
|
+
const params = baseParams({
|
|
34
|
+
unmatchedFiles: [
|
|
35
|
+
"server/src/main/java/helpers/DataUtils.java",
|
|
36
|
+
"server/src/main/java/helpers/MustacheHelper.java",
|
|
37
|
+
],
|
|
38
|
+
});
|
|
39
|
+
const output = buildAnalysisOutputText(params);
|
|
40
|
+
expect(output).toContain("### Step 2.3: Trace callers of changed non-route files");
|
|
41
|
+
expect(output).toContain("DataUtils.java");
|
|
42
|
+
expect(output).toContain("MustacheHelper.java");
|
|
43
|
+
expect(output).toContain("/execute");
|
|
44
|
+
});
|
|
45
|
+
it("lists each unmatched file as a bullet in the Step 2.3 block", () => {
|
|
46
|
+
const params = baseParams({
|
|
47
|
+
unmatchedFiles: ["src/services/OrderService.ts", "src/utils/pricingHelper.ts"],
|
|
48
|
+
});
|
|
49
|
+
const output = buildAnalysisOutputText(params);
|
|
50
|
+
expect(output).toContain("- `src/services/OrderService.ts`");
|
|
51
|
+
expect(output).toContain("- `src/utils/pricingHelper.ts`");
|
|
52
|
+
});
|
|
53
|
+
it("omits Step 2.3 block when unmatchedFiles is empty", () => {
|
|
54
|
+
const params = baseParams({ unmatchedFiles: [] });
|
|
55
|
+
const output = buildAnalysisOutputText(params);
|
|
56
|
+
expect(output).not.toContain("Step 2.3");
|
|
57
|
+
expect(output).not.toContain("Trace callers of changed non-route files");
|
|
58
|
+
});
|
|
59
|
+
it("omits Step 2.3 block when unmatchedFiles is undefined", () => {
|
|
60
|
+
const params = baseParams({ unmatchedFiles: undefined });
|
|
61
|
+
const output = buildAnalysisOutputText(params);
|
|
62
|
+
expect(output).not.toContain("Step 2.3");
|
|
63
|
+
});
|
|
64
|
+
it("omits Step 2.3 block when scope is full_repo even if unmatchedFiles is non-empty", () => {
|
|
65
|
+
const params = baseParams({
|
|
66
|
+
analysisScope: AnalysisScope.FullRepo,
|
|
67
|
+
unmatchedFiles: ["src/services/SomeService.ts"],
|
|
68
|
+
});
|
|
69
|
+
const output = buildAnalysisOutputText(params);
|
|
70
|
+
expect(output).not.toContain("Step 2.3");
|
|
71
|
+
});
|
|
72
|
+
it("Step 2.3 appears before Step 2.5 in the output", () => {
|
|
73
|
+
const params = baseParams({
|
|
74
|
+
unmatchedFiles: ["src/utils/helper.ts"],
|
|
75
|
+
});
|
|
76
|
+
const output = buildAnalysisOutputText(params);
|
|
77
|
+
const pos23 = output.indexOf("Step 2.3");
|
|
78
|
+
const pos25 = output.indexOf("Step 2.5");
|
|
79
|
+
expect(pos23).toBeGreaterThan(-1);
|
|
80
|
+
expect(pos25).toBeGreaterThan(-1);
|
|
81
|
+
expect(pos23).toBeLessThan(pos25);
|
|
82
|
+
});
|
|
83
|
+
it("Step 2.5 critical-patterns block is always present regardless of unmatchedFiles", () => {
|
|
84
|
+
const withUnmatched = buildAnalysisOutputText(baseParams({ unmatchedFiles: ["src/utils/foo.ts"] }));
|
|
85
|
+
const withoutUnmatched = buildAnalysisOutputText(baseParams({ unmatchedFiles: [] }));
|
|
86
|
+
expect(withUnmatched).toContain("Step 2.5: Identify critical patterns");
|
|
87
|
+
expect(withoutUnmatched).toContain("Step 2.5: Identify critical patterns");
|
|
88
|
+
});
|
|
89
|
+
it("omits Step 2.3 block when unmatchedFiles contains only frontend component files (UI-only PR)", () => {
|
|
90
|
+
// Frontend files (.tsx, .jsx, .vue, .svelte) end up in unmatchedFiles because they
|
|
91
|
+
// have no route annotations, but they have no HTTP callers to trace — emitting
|
|
92
|
+
// Step 2.3 for them would produce irrelevant instructions. (Copilot review fix)
|
|
93
|
+
const params = baseParams({
|
|
94
|
+
unmatchedFiles: [
|
|
95
|
+
"src/components/Button.tsx",
|
|
96
|
+
"src/pages/Dashboard.jsx",
|
|
97
|
+
"src/views/UserProfile.vue",
|
|
98
|
+
"src/routes/Settings.svelte",
|
|
99
|
+
],
|
|
100
|
+
});
|
|
101
|
+
const output = buildAnalysisOutputText(params);
|
|
102
|
+
expect(output).not.toContain("Step 2.3");
|
|
103
|
+
expect(output).not.toContain("Trace callers of changed non-route files");
|
|
104
|
+
});
|
|
105
|
+
it("omits Step 2.3 block when unmatchedFiles contains only non-code files (docs/config)", () => {
|
|
106
|
+
// README.md, package.json, etc. have no changed symbols to trace — listing them
|
|
107
|
+
// in Step 2.3 is misleading. (Copilot review fix)
|
|
108
|
+
const params = baseParams({
|
|
109
|
+
unmatchedFiles: [
|
|
110
|
+
"README.md",
|
|
111
|
+
"package.json",
|
|
112
|
+
"docker-compose.yml",
|
|
113
|
+
".github/workflows/ci.yml",
|
|
114
|
+
],
|
|
115
|
+
});
|
|
116
|
+
const output = buildAnalysisOutputText(params);
|
|
117
|
+
expect(output).not.toContain("Step 2.3");
|
|
118
|
+
expect(output).not.toContain("Trace callers of changed non-route files");
|
|
119
|
+
});
|
|
120
|
+
it("emits Step 2.3 for backend code files but excludes frontend/non-code siblings", () => {
|
|
121
|
+
// Mixed PR: one Java helper + one React component + one config file.
|
|
122
|
+
// Only the Java file should appear in the Step 2.3 bullets.
|
|
123
|
+
const params = baseParams({
|
|
124
|
+
unmatchedFiles: [
|
|
125
|
+
"server/helpers/DataUtils.java",
|
|
126
|
+
"client/components/ActionButton.tsx",
|
|
127
|
+
"package.json",
|
|
128
|
+
],
|
|
129
|
+
});
|
|
130
|
+
const output = buildAnalysisOutputText(params);
|
|
131
|
+
expect(output).toContain("Step 2.3");
|
|
132
|
+
expect(output).toContain("DataUtils.java");
|
|
133
|
+
expect(output).not.toContain("ActionButton.tsx");
|
|
134
|
+
expect(output).not.toContain("package.json");
|
|
135
|
+
});
|
|
136
|
+
it("omits Step 2.3 when unmatchedFiles contains .ts/.js frontend files but isUIOnly is true", () => {
|
|
137
|
+
// Angular services, React hooks, Vue composables — all .ts/.js — pass the
|
|
138
|
+
// BACKEND_CODE_EXT filter but belong to a UI-only PR. The !isUIOnly guard
|
|
139
|
+
// prevents Step 2.3 from emitting contradictory caller-tracing instructions
|
|
140
|
+
// alongside the UI-only Step 2 guidance. (Copilot review fix)
|
|
141
|
+
const params = baseParams({
|
|
142
|
+
// parsedDiff.changedFiles drives isUIOnly detection; all frontend-ext → isUIOnly=true
|
|
143
|
+
parsedDiff: {
|
|
144
|
+
changedFiles: ["src/services/auth.service.ts", "src/hooks/useAuth.ts"],
|
|
145
|
+
newEndpoints: [],
|
|
146
|
+
modifiedEndpoints: [],
|
|
147
|
+
},
|
|
148
|
+
unmatchedFiles: ["src/services/auth.service.ts", "src/hooks/useAuth.ts"],
|
|
149
|
+
});
|
|
150
|
+
const output = buildAnalysisOutputText(params);
|
|
151
|
+
expect(output).not.toContain("Step 2.3");
|
|
152
|
+
expect(output).not.toContain("Trace callers of changed non-route files");
|
|
153
|
+
});
|
|
154
|
+
});
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
import { AUTH_MIDDLEWARE_PATTERNS_STR } from "../../utils/workspaceAuth.js";
|
|
2
|
+
import { resolveServiceDetailsRef } from "../../utils/utils.js";
|
|
3
|
+
import { logger } from "../../utils/logger.js";
|
|
4
|
+
import { TEST_CATEGORIES } from "../../types/TestRecommendation.js";
|
|
5
|
+
import { buildScopeAssessmentSection } from "./scopeAssessment.js";
|
|
6
|
+
import { buildTestPatternGuidelines, buildTestQualityCriteria, buildGenerationRules, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
|
|
7
|
+
import { externalDedupKey, scenarioCoverageKey } from "./recommendationShared.js";
|
|
8
|
+
const SERVICE_REFS = resolveServiceDetailsRef();
|
|
9
|
+
export function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, externalCoverage = new Set(), relevantExternalTestPaths = []) {
|
|
10
|
+
const frontendUrl = "<frontend_url>";
|
|
11
|
+
// Slot allocation:
|
|
12
|
+
// - UI-only PR: all GENERATE slots are UI placeholders (no pre-ranked backend scenarios)
|
|
13
|
+
// - Mixed PR: last GENERATE slot is a UI placeholder; remaining slots are backend
|
|
14
|
+
// - Backend-only PR: all GENERATE slots are backend scenarios
|
|
15
|
+
const backendGenerateCount = isUIOnlyPR
|
|
16
|
+
? 0
|
|
17
|
+
: hasFrontendChanges
|
|
18
|
+
? Math.max(0, maxGen - 1)
|
|
19
|
+
: maxGen;
|
|
20
|
+
// Filter out scenarios whose primary method + resource + test type is already covered by external tests.
|
|
21
|
+
// Method-aware: an external test covering GET /orders won't block PUT /orders scenarios.
|
|
22
|
+
// This is the programmatic complement to the prompt-level Step 0 dedup instructions.
|
|
23
|
+
const scoredAfterExternalDedup = externalCoverage.size > 0
|
|
24
|
+
? scored.filter(item => {
|
|
25
|
+
const key = externalDedupKey(item.scenario);
|
|
26
|
+
if (externalCoverage.has(key)) {
|
|
27
|
+
logger.info(`External dedup: skipping "${item.scenario.scenarioName}" (${key}) — covered by external test`);
|
|
28
|
+
return false;
|
|
29
|
+
}
|
|
30
|
+
return true;
|
|
31
|
+
})
|
|
32
|
+
: scored;
|
|
33
|
+
const generateItems = scoredAfterExternalDedup.slice(0, Math.min(backendGenerateCount, scoredAfterExternalDedup.length));
|
|
34
|
+
const rawAdditionalItems = scoredAfterExternalDedup.slice(backendGenerateCount, topN);
|
|
35
|
+
// Filter additional items whose primary resource + test type already appear in GENERATE
|
|
36
|
+
const generatedCoverage = new Set(generateItems.map(item => scenarioCoverageKey(item.scenario)));
|
|
37
|
+
const additionalItems = rawAdditionalItems.filter(item => !generatedCoverage.has(scenarioCoverageKey(item.scenario)));
|
|
38
|
+
const hasWorkspaceAuthType = !!authTypeValue && authTypeValue !== "none";
|
|
39
|
+
// For skyramp_integration_test_generation with scenarioFile:
|
|
40
|
+
// - If workspace has authType set: omit auth entirely — workspace handles Bearer prefix.
|
|
41
|
+
// - If no authType: pass authHeader only (no authScheme).
|
|
42
|
+
const authHeaderOnlyRef = hasWorkspaceAuthType
|
|
43
|
+
? ""
|
|
44
|
+
: authHeaderValue
|
|
45
|
+
? `, authHeader: "${authHeaderValue}"`
|
|
46
|
+
: `, authHeader: <check OpenAPI securitySchemes or auth middleware; "" if confirmed unauthenticated>`;
|
|
47
|
+
// UI-only: all GENERATE slots are UI test placeholders (one per changed component/flow)
|
|
48
|
+
const uiGenerateBlocks = isUIOnlyPR
|
|
49
|
+
? Array.from({ length: maxGen }, (_, i) => {
|
|
50
|
+
const rank = i + 1;
|
|
51
|
+
const zipPath = `<repositoryPath>/.skyramp/ui_test_${rank}_trace.zip`;
|
|
52
|
+
return hasTraces
|
|
53
|
+
? (`**#${rank} — GENERATE** | ui | workflow | new\n` +
|
|
54
|
+
`Scenario: ui-test-from-trace-${rank} (rename from the actual changed component/flow)\n` +
|
|
55
|
+
`Validates: UI interactions for a changed frontend component or flow.\n\n` +
|
|
56
|
+
`**Tool**: \`skyramp_ui_test_generation({ playwrightInput: "<discovered_trace_file_path>", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}`)
|
|
57
|
+
: (`**#${rank} — GENERATE** | ui | workflow | new\n` +
|
|
58
|
+
`Scenario: ui-test-for-changed-component-${rank} (rename from the actual changed component/flow)\n` +
|
|
59
|
+
`Validates: UI interactions for changed frontend component/flow ${rank}.\n\n` +
|
|
60
|
+
`**Tool workflow:**\n` +
|
|
61
|
+
` 1. \`browser_navigate({ url: "${frontendUrl}" })\`\n` +
|
|
62
|
+
` 2. Interact with the changed component (read the diff to identify which component changed and what interactions it supports)\n` +
|
|
63
|
+
` 3. \`browser_snapshot()\` after each key interaction\n` +
|
|
64
|
+
` 4. \`skyramp_export_zip({ outputPath: "${zipPath}" })\` — absolute path\n` +
|
|
65
|
+
` 5. \`skyramp_ui_test_generation({ playwrightInput: "${zipPath}", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}\n\n` +
|
|
66
|
+
`Each item must target a distinct changed component or user flow.`);
|
|
67
|
+
}).join("\n\n")
|
|
68
|
+
: "";
|
|
69
|
+
// Mixed PR: reserve the last GENERATE slot for a UI test for the changed frontend components.
|
|
70
|
+
// Guard: skip when maxGen=0 (caller explicitly requested no generation)
|
|
71
|
+
const uiRank = generateItems.length + 1;
|
|
72
|
+
const uiPlaceholderBlock = (hasFrontendChanges && !isUIOnlyPR && maxGen > 0)
|
|
73
|
+
? hasTraces
|
|
74
|
+
? (`**#${uiRank} — GENERATE** | ui | workflow | new\n` +
|
|
75
|
+
`Scenario: ui-test-for-changed-components (rename from the actual changed component/flow)\n` +
|
|
76
|
+
`Validates: UI interactions for the changed frontend components in this PR.\n\n` +
|
|
77
|
+
`**Tool**: \`skyramp_ui_test_generation({ playwrightInput: "<discovered_trace_file_path>", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}`)
|
|
78
|
+
: (`**#${uiRank} — GENERATE** | ui | workflow | new\n` +
|
|
79
|
+
`Scenario: ui-test-for-changed-components (rename from the actual changed component/flow)\n` +
|
|
80
|
+
`Validates: UI interactions for the changed frontend components in this PR.\n\n` +
|
|
81
|
+
`**Tool workflow:**\n` +
|
|
82
|
+
` 1. \`browser_navigate({ url: "${frontendUrl}" })\`\n` +
|
|
83
|
+
` 2. Interact with the changed component (read the diff to identify which component changed and what interactions it supports)\n` +
|
|
84
|
+
` 3. \`browser_snapshot()\` after each key interaction\n` +
|
|
85
|
+
` 4. \`skyramp_export_zip({ outputPath: "<repositoryPath>/.skyramp/ui_mixed_pr_trace.zip" })\` — absolute path\n` +
|
|
86
|
+
` 5. \`skyramp_ui_test_generation({ playwrightInput: "<repositoryPath>/.skyramp/ui_mixed_pr_trace.zip", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}\n\n` +
|
|
87
|
+
`Derive scenario name and steps from the actual changed frontend files.`)
|
|
88
|
+
: "";
|
|
89
|
+
const generateBlocks = generateItems.map((item, i) => {
|
|
90
|
+
const rank = i + 1;
|
|
91
|
+
const s = item.scenario;
|
|
92
|
+
const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
|
|
93
|
+
if (testType === "contract") {
|
|
94
|
+
const step = s.steps[0];
|
|
95
|
+
const endpointURL = `${baseUrl}${step.path}`;
|
|
96
|
+
const isBodyMethod = ["POST", "PUT", "PATCH"].includes(step.method);
|
|
97
|
+
const requestBodyData = step.requestBody && Object.keys(step.requestBody).length > 0
|
|
98
|
+
? `\n Request body: ${JSON.stringify(step.requestBody)} (pass as JSON string in tool call, NOT as object)`
|
|
99
|
+
: (isBodyMethod ? `\n Request body: <derive from source code schemas>` : "");
|
|
100
|
+
const authContext = authHeaderValue
|
|
101
|
+
? `\n authHeader: "${authHeaderValue}"${authSchemeSnippet}`
|
|
102
|
+
: `\n authHeader: <resolve from workspace or OpenAPI securitySchemes>; authScheme: <if Authorization>`;
|
|
103
|
+
return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | ${item.novelty}\n` +
|
|
104
|
+
`${step.method} ${step.path} → ${step.expectedStatusCode}\n` +
|
|
105
|
+
`Validates: ${s.description}\n\n` +
|
|
106
|
+
`**Context for generation**:\n` +
|
|
107
|
+
` Endpoint URL: ${endpointURL}${requestBodyData}${authContext}\n\n` +
|
|
108
|
+
`**Tool**: skyramp_contract_test_generation (see tool description for parameter structure)`);
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
// integration / e2e / ui — multi-step scenario pipeline
|
|
112
|
+
const stepLines = s.steps.map((st) => {
|
|
113
|
+
const chains = st.chainsFrom
|
|
114
|
+
? ` (chains: ${Array.isArray(st.chainsFrom)
|
|
115
|
+
? st.chainsFrom.map(c => `${c.sourceField} from step ${c.sourceStep}`).join(", ")
|
|
116
|
+
: `${st.chainsFrom.sourceField} from step ${st.chainsFrom.sourceStep}`})`
|
|
117
|
+
: "";
|
|
118
|
+
const bodyHint = st.bodyMustInclude?.length
|
|
119
|
+
? ` [required fields: ${st.bodyMustInclude.join(", ")}]`
|
|
120
|
+
: "";
|
|
121
|
+
const responseHint = st.expectedResponseFields?.length
|
|
122
|
+
? ` [assert: ${st.expectedResponseFields.join(", ")}]`
|
|
123
|
+
: "";
|
|
124
|
+
const bodyData = st.requestBody && Object.keys(st.requestBody).length > 0
|
|
125
|
+
? ` [use requestBody: ${JSON.stringify(st.requestBody)} — pass as JSON string in tool call]`
|
|
126
|
+
: "";
|
|
127
|
+
return ` ${st.order}. ${st.method} ${st.path} → ${st.expectedStatusCode}: ${st.description}${chains}${bodyHint}${bodyData}${responseHint}`;
|
|
128
|
+
}).join("\n");
|
|
129
|
+
let destinationHost = "localhost";
|
|
130
|
+
try {
|
|
131
|
+
const parsed = new URL(baseUrl);
|
|
132
|
+
destinationHost = parsed.hostname;
|
|
133
|
+
}
|
|
134
|
+
catch { /* use localhost as fallback */ }
|
|
135
|
+
const authContext = authHeaderValue
|
|
136
|
+
? `authHeader: "${authHeaderValue}"${authSchemeSnippet}`
|
|
137
|
+
: "authHeader: <resolve from workspace or OpenAPI securitySchemes>; authScheme: <if Authorization>";
|
|
138
|
+
const prereqNote = s.category === "new_endpoint"
|
|
139
|
+
? `\n**Prerequisite discovery**: Check for FK fields (product_id, user_id, order_id) in the endpoint's request body. If found, prepend a step to create that prerequisite resource first, then chain its primary key field into the dependent step using template variable syntax. Check the actual field name from the response body (\`id\`, \`uuid\`, \`_id\`, etc.), response header (\`Location\`), or cookie — do not assume \`id\`.`
|
|
140
|
+
: "";
|
|
141
|
+
const bugLine = s.bugCatchingTarget
|
|
142
|
+
? `**Bug to catch**: ${s.bugCatchingTarget}\n`
|
|
143
|
+
: "";
|
|
144
|
+
const fromSource = s.source === "agent-enriched"
|
|
145
|
+
? "Auth: OpenAPI securitySchemes or auth middleware"
|
|
146
|
+
: "Request/response shapes: source code schemas; Auth: OpenAPI securitySchemes or auth middleware";
|
|
147
|
+
return (`**#${rank} — GENERATE** | ${testType} | ${s.category} | ${item.novelty}\n` +
|
|
148
|
+
`Scenario: ${s.scenarioName} (${s.steps.length} steps)\n` +
|
|
149
|
+
bugLine +
|
|
150
|
+
`${stepLines}\n\n` +
|
|
151
|
+
`**Context for generation**:\n` +
|
|
152
|
+
` - Destination: ${destinationHost}\n` +
|
|
153
|
+
` - Base URL: ${baseUrl}\n` +
|
|
154
|
+
` - ${authContext}\n` +
|
|
155
|
+
` - From source: ${fromSource}\n\n` +
|
|
156
|
+
`**Tool pipeline**:\n` +
|
|
157
|
+
` 1. skyramp_batch_scenario_test_generation (see tool description for parameter structure)\n` +
|
|
158
|
+
` 2. skyramp_integration_test_generation with returned scenarioFile${authHeaderOnlyRef ? ` and ${authHeaderOnlyRef.replace(/^,\s*/, '')}` : ""}\n` +
|
|
159
|
+
` **Note**: requestBody/responseBody must be JSON strings (e.g. "{\\"field\\":\\"value\\"}"), not objects.` +
|
|
160
|
+
prereqNote);
|
|
161
|
+
}
|
|
162
|
+
}).join("\n\n");
|
|
163
|
+
// Pre-ranked backend additional candidates — the LLM picks from these per its Budget Plan.
|
|
164
|
+
const additionalLines = additionalItems.map((item, i) => {
|
|
165
|
+
const rank = maxGen + i + 1;
|
|
166
|
+
const s = item.scenario;
|
|
167
|
+
const testType = s.testType ?? (s.steps.length === 1 ? "contract" : "integration");
|
|
168
|
+
const target = s.steps.length === 1
|
|
169
|
+
? `${s.steps[0].method} ${s.steps[0].path} → ${s.steps[0].expectedStatusCode}`
|
|
170
|
+
: `Scenario: ${s.scenarioName} (${s.steps.map(st => `${st.method} ${st.path}`).join(" → ")})`;
|
|
171
|
+
return `#${rank} [ADDITIONAL] | ${testType} | ${s.category} | ${item.novelty}\n ${target}\n Validates: ${s.description}`;
|
|
172
|
+
}).join("\n\n");
|
|
173
|
+
// UI/E2E guidance — the LLM adds as many as its Budget Plan calls for.
|
|
174
|
+
// Note: if a UI test already occupies a GENERATE slot (uiPlaceholderBlock), that slot
|
|
175
|
+
// satisfies the UI generate count — do not add it again in ADDITIONAL.
|
|
176
|
+
const uiGuidance = !isUIOnlyPR ? `
|
|
177
|
+
**UI/E2E tests (add per your Budget Plan):** If your Budget Plan requires UI/E2E items beyond what is already in your GENERATE list, append an [ADDITIONAL] entry for each. If a UI test already occupies a GENERATE slot above, that slot satisfies your UI/E2E generate count — do NOT add it again to ADDITIONAL. Tool workflow for each new item:
|
|
178
|
+
- **E2E**: ${hasTraces ? "Use discovered trace/recording files with `skyramp_e2e_test_generation`." : "Add to additionalRecommendations with a note that both a backend API trace (`skyramp_start_trace_collection` / `skyramp_stop_trace_collection`) and a browser Playwright recording must be collected in a live environment first. Do NOT attempt `skyramp_e2e_test_generation` without both traces present."}
|
|
179
|
+
- **UI**: ${hasTraces ? "Use an existing Playwright `.zip` trace with `skyramp_ui_test_generation`." : `Record a trace using \`browser_navigate\` + \`browser_snapshot\` + \`skyramp_export_zip\`, then call \`skyramp_ui_test_generation({ playwrightInput: "<zip_path>", outputDir: "<frontend_output_dir>" })\` — set \`outputDir\` to ${SERVICE_REFS.frontendTestDirRef}.`}
|
|
180
|
+
Derive scenario names and steps from the actual changed frontend files. If your Budget Plan calls for 0% UI/E2E, omit this entirely.` : "";
|
|
181
|
+
const supplementNote = `\n**If your Budget Plan total exceeds the pre-ranked items listed above:** draft additional tests from source-code enrichment (Step 1). For each new or changed endpoint, identify boundary or variation scenarios — formula parameters, search/filter constraints, required field validation. Only after exhausting PR-specific scenarios, add generic patterns (auth boundary → 401, non-existent ID → 404). Do NOT supplement with tests whose endpoint + test type match a GENERATE item.`;
|
|
182
|
+
// ── PR / branch-diff mode: execution plan ────────────────────────────────
|
|
183
|
+
const externalTestFilesList = relevantExternalTestPaths.length > 0
|
|
184
|
+
? `**Read these external test files first** (paths are relative to the \`repositoryPath\` you passed to \`skyramp_analyze_changes\` — prepend it to get the absolute path). Determine exactly which HTTP methods + paths each one covers. This is the definitive source of truth for external coverage:\n${relevantExternalTestPaths.map(p => `- \`${p}\``).join("\n")}\n\n`
|
|
185
|
+
: "";
|
|
186
|
+
return `## Execution Plan
|
|
187
|
+
Seed: ${seed} | Endpoints: ${endpointCount} | Max: ${maxGen} generate + up to ${Math.max(topN - maxGen, 0)} additional (your Budget Plan determines the exact count)
|
|
188
|
+
|
|
189
|
+
${buildScopeAssessmentSection(topN, maxGen, isUIOnlyPR)}
|
|
190
|
+
|
|
191
|
+
**Step 0 — External test coverage verification (before executing anything)**
|
|
192
|
+
${externalTestFilesList}For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
|
|
193
|
+
- **\`[external]\` tests**: If the endpoint is already covered by an \`[external]\` test of the same type → skip the resource entirely (do NOT create or update). Backfill from ADDITIONAL using the priority order below:
|
|
194
|
+
1. **BUG-CATCHING TESTS FIRST (CRITICAL)**: If source code analysis revealed a bug, logic error, or incorrect formula (e.g. discount math adding instead of subtracting, off-by-one errors, missing validation), CREATE A TEST THAT EXPOSES IT. The test SHOULD FAIL — that's the point. Document the bug. Example: if discount formula is wrong, test with discount=20% and assert correct math. If no bug found, skip to #2.
|
|
195
|
+
2. **PR-endpoint edge cases**: Look for integration test candidates covering error paths, boundary values, or alternative scenarios for the SAME endpoints changed in the PR diff. If no suitable candidate exists in ADDITIONAL, derive one from your source-code enrichment findings.
|
|
196
|
+
3. **Same-resource other scenarios**: Other HTTP methods or flows on the same resource group touched by the PR.
|
|
197
|
+
4. **Cross-resource workflows involving the PR endpoint**: Integration scenarios that include the PR's changed endpoint as one of the steps.
|
|
198
|
+
5. **Unrelated endpoint coverage (last resort)**: Tests for endpoints with no connection to the PR diff, only when ALL options above have been exhausted.
|
|
199
|
+
**Avoid backfilling with a test for a completely unrelated resource (e.g. \`POST /reviews\` when the PR only changes \`/orders\`) if any PR-endpoint edge-case integration test is feasible.**
|
|
200
|
+
- **Contract tests (\`[skyramp]\`)**: If an existing \`[skyramp]\` contract test already covers that resource path → UPDATE the existing test file instead of creating a new one. A new test case is a new test even if the file already exists — count it toward \`newTestsCreated\`.
|
|
201
|
+
- **Integration/scenario tests**: Always generate as a new file via the scenario pipeline, even if an existing integration test covers the same resource. A new multi-step scenario is a distinct test. Count it toward \`newTestsCreated\`.
|
|
202
|
+
- **UI tests**: Always generate as a new file. Count toward \`newTestsCreated\`.
|
|
203
|
+
|
|
204
|
+
**Step 1 — Source-Code Enrichment (before executing anything)**
|
|
205
|
+
Read the source code for ALL changed files. Before generating each recommendation, quote the relevant source code in a <source_evidence> block — include the route handler signature, request body schema fields, response shape, and any computed field formulas. Use these quotes to derive tool call parameters. Look for:
|
|
206
|
+
- **Auth middleware** — check for known signals (${AUTH_MIDDLEWARE_PATTERNS_STR}). If any match, override \`authHeader\` and \`authScheme\` even if workspace.yml says authType: none. **If no known signal matches but the diff shows security-adjacent code** (decorators like \`@requiresRole\`/\`@Protected\`, function names like \`validateToken\`/\`checkPermission\`/\`verifyHMAC\`, or imports from auth/security packages), read the relevant source file to determine the actual auth scheme before proceeding. Auth handling for \`skyramp_integration_test_generation\` with \`scenarioFile\` is covered in the Tool Workflows section below.
|
|
207
|
+
- Business rules and formulas (e.g. total_cost = compute * rate + memory * rate)
|
|
208
|
+
- State transitions and domain constraints (e.g. budget cannot drop below current spend)
|
|
209
|
+
- Validation logic (field constraints, cross-field dependencies)
|
|
210
|
+
- Security boundaries not covered by the structural candidates below
|
|
211
|
+
|
|
212
|
+
For each one found, evaluate it against these 6 dimensions and assign priority:
|
|
213
|
+
| Dimension | What to assess |
|
|
214
|
+
| Production Safety | Guards a critical boundary (auth, unique constraint, cascade delete, data integrity, breaking migration)? → HIGH |
|
|
215
|
+
| Bug-Finding Potential | Targets a known failure mode (race condition, data consistency, state transition, cascade effect)? → HIGH |
|
|
216
|
+
| Mutation Side Effects | Does PUT/PATCH modify a collection of child items (line items, cart entries) and trigger recalculation (totals, counts, amounts)? → HIGH — this is the most common source of user-reported bugs |
|
|
217
|
+
| User Journey Relevance | Reflects how real users interact (from traces, business flows, critical paths)? → HIGH or MEDIUM |
|
|
218
|
+
| Coverage Gap | Addresses an area with zero existing test coverage? → bump up one tier |
|
|
219
|
+
| Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
|
|
220
|
+
|
|
221
|
+
Quality gate — ask all three questions:
|
|
222
|
+
1. "Would this test prevent a production incident?" → YES = HIGH priority regardless of other dimensions
|
|
223
|
+
2. "Does this test exercise a real workflow or catch a real bug?" → YES = at least MEDIUM
|
|
224
|
+
3. "Does this test cover a mutation that modifies child items and triggers total/amount recalculation?" → YES = HIGH priority, and prefer it for GENERATE over simple single-field update tests for the same endpoint
|
|
225
|
+
|
|
226
|
+
Assign category: ${TEST_CATEGORIES.join(" | ")}
|
|
227
|
+
|
|
228
|
+
${buildTestPatternGuidelines()}
|
|
229
|
+
|
|
230
|
+
INSERT a source-code-derived candidate into the ranked list **only if ALL three conditions are met**:
|
|
231
|
+
1. Priority is HIGH (it guards a critical boundary or would prevent a production incident)
|
|
232
|
+
2. It is specific to THIS codebase — derived from a concrete business rule, formula, or constraint found in the changed files (not a general pattern that applies to any API)
|
|
233
|
+
3. It is not already covered by a structural candidate in the list below
|
|
234
|
+
|
|
235
|
+
If these conditions are not met, add it to ADDITIONAL only — do NOT displace a pre-ranked GENERATE item.
|
|
236
|
+
**CRITICAL-tier items (category: new_endpoint) should never be displaced** — they test the actual endpoints introduced in this PR and must always occupy GENERATE slots.
|
|
237
|
+
|
|
238
|
+
When a qualifying candidate is inserted: place it HIGH before MEDIUM before LOW; within the same priority, source-code-derived candidates go BEFORE structural ones. Re-number ranks after insertion. The top ${maxGen} ranked items become GENERATE candidates.
|
|
239
|
+
|
|
240
|
+
**Source-code validation gates (apply during Step 1):**
|
|
241
|
+
- **Cascade vs referential integrity**: If both a cascade-delete and a delete-blocked scenario appear for the same resource pair, keep only the one matching the source FK delete policy (ON DELETE CASCADE / cascade=True / onDelete: 'CASCADE' → keep cascade-delete; RESTRICT/PROTECT/no annotation → keep delete-blocked). Remove the inapplicable variant.
|
|
242
|
+
- **Unique constraints**: Unique-constraint scenarios (duplicate POST → 409) are pre-drafted for all resources. Confirm enforcement before keeping: SQL UNIQUE index, Mongoose unique: true, Prisma @unique, or explicit duplicate-check code. If the backend is Redis, schema-less, or has no explicit constraint in the changed files, move to ADDITIONAL with a note — do NOT generate.
|
|
243
|
+
|
|
244
|
+
**Step 2 — Diversity check (using enriched knowledge from Step 1)**
|
|
245
|
+
Each GENERATE item must exercise a **distinct code path** — not just different input values on the same path.
|
|
246
|
+
|
|
247
|
+
For each pair of GENERATE items, ask: same HTTP method + path + step sequence + expected status? → DUPLICATE. Keep the richer item; replace the other with a test from a different path below. Move the displaced item to ADDITIONAL.
|
|
248
|
+
|
|
249
|
+
**Good diversity — aim for this mix across GENERATE slots:**
|
|
250
|
+
- **Happy-path**: create prerequisites → call the new endpoint → verify computed fields and child collections
|
|
251
|
+
- **Error-path**: trigger a distinct error status (404 for non-existent resource, 422 for invalid input, 400 for malformed request — whichever the source code handles)
|
|
252
|
+
- **State-variation**: same endpoint, different logic branch (empty array, remove instead of add, boundary value that triggers a guard)
|
|
253
|
+
|
|
254
|
+
Same step sequence with only payload differences (e.g. 10% vs 5% discount both returning 200) = same code path = duplicate. Different scenario names do not make duplicate tests distinct.
|
|
255
|
+
|
|
256
|
+
**Step 3 — Execute merged plan in rank order**
|
|
257
|
+
Replace any scenario that pairs unrelated resources with one reflecting actual FK relationships in the codebase.
|
|
258
|
+
Use the field names and values from the \`<source_evidence>\` blocks you quoted in Step 1 to fill all tool call parameters. Prefer reusing Step 1 evidence when it already resolves a placeholder, but if a placeholder cannot be replaced with concrete values from files already read, you may read the specific schema, model, or handler file needed to resolve it. Assert response field values, not just status codes.
|
|
259
|
+
|
|
260
|
+
${buildTestQualityCriteria()}
|
|
261
|
+
|
|
262
|
+
${buildGenerationRules(isUIOnlyPR)}
|
|
263
|
+
|
|
264
|
+
**ADDITIONAL recommendations** are submitted via \`skyramp_submit_report\`. Refer to its schema for required fields. Only include recommendations that add distinct coverage beyond what was generated.
|
|
265
|
+
|
|
266
|
+
**Never mark a recommendation "blocked":** No OpenAPI spec → use source code for shapes. No traces → provide \`skyramp_start_trace_collection\` instructions. No backend trace → use the scenario pipeline.
|
|
267
|
+
|
|
268
|
+
**Critical-category minimum:** At least ${Math.min(MAX_CRITICAL_TESTS, maxGen)} of the ${maxGen} GENERATE items should be from HIGH-priority categories (security_boundary, business_rule, data_integrity, breaking_change). The pre-ranked plan below already prioritises this — only override if source-code enrichment reveals a higher-value candidate.
|
|
269
|
+
|
|
270
|
+
### GENERATE (process these EXACTLY as listed, in order — after completing Steps 0–2 above; if Step 0 converts an item to UPDATE, backfill the ADD slot from ADDITIONAL following the priority order in Step 0)
|
|
271
|
+
|
|
272
|
+
${isUIOnlyPR
|
|
273
|
+
? (uiGenerateBlocks || " (no UI generate items — derive scenarios from changed frontend files)")
|
|
274
|
+
: ([generateBlocks, uiPlaceholderBlock].filter(Boolean).join("\n\n") || " (no pre-ranked generate items — draft your own based on endpoint analysis)")}
|
|
275
|
+
|
|
276
|
+
**COMPLIANCE CHECK**: Before proceeding, verify your generate list matches the items above. If you plan to generate a scenario with a different name than what is listed (e.g. you want to generate "order-update-discount-calculation" but the plan says "orders-patch-add-items-recalculate"), STOP — use the plan's scenario name and steps. Add your alternative to ADDITIONAL instead. One retry on failure then skip to next item.
|
|
277
|
+
|
|
278
|
+
### ADDITIONAL (list in additionalRecommendations in this order after Step 1 insertion)
|
|
279
|
+
|
|
280
|
+
${additionalLines || " (none pre-ranked)"}
|
|
281
|
+
${uiGuidance}
|
|
282
|
+
${supplementNote}
|
|
283
|
+
|
|
284
|
+
**Honor your Budget Plan: produce exactly the total you committed to (GENERATE + ADDITIONAL). No fewer, no padding with low-value tests.**
|
|
285
|
+
|
|
286
|
+
## Recommendation Stability
|
|
287
|
+
- **Carry forward** previous additionalRecommendations that still apply — match by scenarioName (multi-step) or endpoint (single-endpoint). Re-derive category and priority from test content.
|
|
288
|
+
- **Only drop** a previous recommendation if its target endpoint was removed, its business logic changed, or it is now covered by a generated test.
|
|
289
|
+
- **Only add** new recommendations for code paths introduced since the last run.`;
|
|
290
|
+
}
|