@skyramp/mcp 0.1.0-rc.4 → 0.1.0-rc.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +2 -1
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +18 -2
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +85 -26
- package/build/prompts/test-recommendation/recommendationSections.js +3 -2
- package/build/prompts/test-recommendation/registerRecommendTestsPrompt.js +5 -1
- package/build/prompts/test-recommendation/scopeAssessment.js +76 -0
- package/build/prompts/test-recommendation/scopeAssessment.test.js +76 -0
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +92 -137
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +157 -38
- package/build/prompts/testbot/testbot-prompts.js +14 -51
- package/build/prompts/testbot/testbot-prompts.test.js +6 -7
- package/build/resources/testbotResource.js +40 -0
- package/build/tools/test-management/analyzeChangesTool.js +24 -12
- package/build/tools/test-management/analyzeChangesTool.test.js +1 -0
- package/build/tools/test-management/analyzeTestHealthTool.js +4 -0
- package/build/utils/AnalysisStateManager.test.js +2 -1
- package/build/utils/branchDiff.js +61 -29
- package/build/utils/projectMetadata.js +13 -1
- package/build/utils/repoScanner.js +131 -293
- package/build/utils/routeParsers.js +137 -30
- package/build/utils/routeParsers.test.js +154 -1
- package/build/utils/skyrampMdContent.js +2 -2
- package/package.json +1 -1
package/build/index.js
CHANGED
|
@@ -23,7 +23,8 @@ import { registerCodeReuseTool } from "./tools/code-refactor/codeReuseTool.js";
|
|
|
23
23
|
import { registerBatchScenarioTestTool } from "./tools/generate-tests/generateBatchScenarioRestTool.js";
|
|
24
24
|
import { registerMockTool } from "./tools/generate-tests/generateMockRestTool.js";
|
|
25
25
|
import { registerAnalyzeChangesTool, registerAnalyzeTestHealthTool, registerActionsTool, } from "./tools/test-management/index.js";
|
|
26
|
-
import { registerTestbotPrompt
|
|
26
|
+
import { registerTestbotPrompt } from "./prompts/testbot/testbot-prompts.js";
|
|
27
|
+
import { registerTestbotResource } from "./resources/testbotResource.js";
|
|
27
28
|
import { registerSubmitReportTool } from "./tools/submitReportTool.js";
|
|
28
29
|
import { registerInitializeWorkspaceTool } from "./tools/workspace/initializeWorkspaceTool.js";
|
|
29
30
|
import { registerInitScanWorkspaceTool } from "./tools/workspace/initScanWorkspaceTool.js";
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { buildDriftScoringGuide, buildActionDecisionMatrix, buildBreakingChangePatterns, buildTestAssessmentGuidelines, buildAddRecommendationGuidelines, buildDriftOutputChecklist, buildUpdateExecutionRules, } from "./driftAnalysisSections.js";
|
|
2
2
|
export function buildDriftAnalysisPrompt(params) {
|
|
3
|
-
const { existingTests, parsedDiff, scannedEndpoints, repositoryPath, stateFile } = params;
|
|
3
|
+
const { existingTests, parsedDiff, scannedEndpoints, repositoryPath, stateFile, routerMountContext, candidateRouteFiles } = params;
|
|
4
4
|
const inlineMode = !stateFile;
|
|
5
5
|
// Detect new endpoints count from parsedDiff
|
|
6
6
|
let newEndpointCount = 0;
|
|
@@ -30,6 +30,7 @@ No existing Skyramp tests found in repository.
|
|
|
30
30
|
`;
|
|
31
31
|
const scannedSection = scannedEndpoints.length > 0
|
|
32
32
|
? `## Scanned Endpoints (${scannedEndpoints.length})
|
|
33
|
+
Note: paths below come from static analysis and may be incomplete for nested resources or unsupported frameworks. Use the Routing entry-point files section below to verify and reconstruct full paths.
|
|
33
34
|
${scannedEndpoints.map((ep) => {
|
|
34
35
|
let methods;
|
|
35
36
|
if (Array.isArray(ep.methods)) {
|
|
@@ -40,6 +41,19 @@ ${scannedEndpoints.map((ep) => {
|
|
|
40
41
|
}
|
|
41
42
|
return `- ${methods} ${ep.path}`;
|
|
42
43
|
}).join("\n")}
|
|
44
|
+
`
|
|
45
|
+
: "";
|
|
46
|
+
const mountSection = routerMountContext?.length
|
|
47
|
+
? `## Routing entry-point files
|
|
48
|
+
Read these to trace the full router/module hierarchy when verifying endpoint paths:
|
|
49
|
+
${routerMountContext.map(f => `- \`${f}\``).join("\n")}
|
|
50
|
+
`
|
|
51
|
+
: "";
|
|
52
|
+
const hasJavaFiles = candidateRouteFiles?.some(f => /\.(java|kt)$/.test(f)) ?? false;
|
|
53
|
+
const candidateFilesSection = candidateRouteFiles && candidateRouteFiles.length > 0
|
|
54
|
+
? `## Route Files (read these to find endpoints from any framework)
|
|
55
|
+
${candidateRouteFiles.map(f => `- ${f}`).join("\n")}
|
|
56
|
+
${hasJavaFiles ? "Note — Java Spring: full URL = class-level `@RequestMapping` prefix + method-level path. If the prefix is a constant reference (e.g. `@RequestMapping(Url.PAGE_URL)`), find the constant — same file, inner class, or a separate `Url.java` — and resolve it (including `+` concatenation)." : ""}
|
|
43
57
|
`
|
|
44
58
|
: "";
|
|
45
59
|
// In inline mode (testbot), skip the context header — existing tests and diff
|
|
@@ -54,7 +68,9 @@ ${scannedEndpoints.map((ep) => {
|
|
|
54
68
|
|
|
55
69
|
${diffSection}
|
|
56
70
|
${testListSection}
|
|
57
|
-
${scannedSection}
|
|
71
|
+
${scannedSection}
|
|
72
|
+
${mountSection}
|
|
73
|
+
${candidateFilesSection}`;
|
|
58
74
|
if (inlineMode) {
|
|
59
75
|
// Testbot inline mode: all maintenance logic lives here so the testbot
|
|
60
76
|
// prompt only orchestrates steps without duplicating rules.
|
|
@@ -1,4 +1,44 @@
|
|
|
1
1
|
import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
|
|
2
|
+
// File extensions that indicate a frontend-only file — no API route definitions.
|
|
3
|
+
// Uses extensions rather than path-segment patterns so backend files named
|
|
4
|
+
// e.g. Spring /services/Foo.java or Gin handlers/main.go are not misclassified.
|
|
5
|
+
// Keep this narrow: .json/.md can contain backend-affecting artifacts (OpenAPI specs,
|
|
6
|
+
// config, API docs) and would incorrectly classify a PR as UI-only.
|
|
7
|
+
const FRONTEND_EXT = /\.(tsx?|jsx?|vue|svelte|css|scss|less|html|svg)$/i;
|
|
8
|
+
/**
|
|
9
|
+
* Returns a Step 1.5 instruction block that forces the LLM to read the
|
|
10
|
+
* entry-point files from the Router Mounting section and build an authoritative
|
|
11
|
+
* file→full-prefix table before touching any endpoint URLs.
|
|
12
|
+
* Returned as an empty string when no router context is available.
|
|
13
|
+
*/
|
|
14
|
+
function buildPathResolutionTableStep(p) {
|
|
15
|
+
if (!p.routerMountContext.length || p.wsSchemaPath)
|
|
16
|
+
return "";
|
|
17
|
+
return `### Step 1.5: Build path resolution table
|
|
18
|
+
The **Routing entry-point files** section above lists the files to read.
|
|
19
|
+
|
|
20
|
+
**Read each of those files** and trace every router mount call to understand nesting — the pattern varies by framework but the structure is universal: a parent attaches a child router with an optional extra prefix segment. If a prefix is a variable (e.g. \`prefix=api_prefix\`), resolve the variable's value by reading the assignment or the config/settings file it comes from. Examples of what to look for (non-exhaustive):
|
|
21
|
+
- Python (FastAPI/Flask): \`parent.include_router(child, prefix="...")\`, \`app.register_blueprint(...)\`
|
|
22
|
+
- JS/TS (Express/Fastify/Hapi): \`app.use('/path', childRouter)\`, \`router.use('/path', sub)\`
|
|
23
|
+
- NestJS: \`@Module({ imports: [FeatureModule] })\` — trace the module import chain; each \`@Controller('prefix')\` contributes a segment
|
|
24
|
+
- Go (Gin/Echo/Chi): \`r.Group('/path')\`, \`r.Mount('/path', sub)\`
|
|
25
|
+
- Ruby (Rails): \`namespace\`, \`scope\`, \`resources ... do\`
|
|
26
|
+
- Django: \`path('prefix/', include(urls))\`
|
|
27
|
+
|
|
28
|
+
Chain all segments from the app root down through every intermediate mount to each leaf router file. Build a table:
|
|
29
|
+
|
|
30
|
+
| Source file | Full URL prefix |
|
|
31
|
+
|-------------|----------------|
|
|
32
|
+
| (leaf router file) | (fully chained prefix, e.g. /api/v1/products/{id}/reviews) |
|
|
33
|
+
|
|
34
|
+
**This table is authoritative.** Before placing any URL in a tool call, look up the source file. If the pre-built catalog shows a different path, use the table value.
|
|
35
|
+
|
|
36
|
+
`;
|
|
37
|
+
}
|
|
38
|
+
// Inline note added to any step where the LLM reads Java source files. Java Spring
|
|
39
|
+
// has no router-mounting file — each controller defines its own class-level prefix,
|
|
40
|
+
// and that prefix may reference a constant defined elsewhere.
|
|
41
|
+
const JAVA_SPRING_NOTE = `For Java Spring: full URL = class-level \`@RequestMapping\` prefix + method-level path. If the prefix is a constant reference (e.g. \`@RequestMapping(Url.PAGE_URL)\`), find the constant — same file, inner class, or a separate \`Url.java\` — and resolve it (including \`+\` concatenation).`;
|
|
2
42
|
function buildEnrichmentInstructions(p) {
|
|
3
43
|
const isDiffScope = p.analysisScope === AnalysisScope.CurrentBranchDiff;
|
|
4
44
|
const useHealthFlow = p.nextTool === "skyramp_analyze_test_health";
|
|
@@ -16,38 +56,59 @@ The ranked test recommendation catalog is pre-built and shown below (after the s
|
|
|
16
56
|
3. Do NOT call any Skyramp generation tools. The catalog shows ready-to-use tool calls that can be executed on demand.
|
|
17
57
|
|
|
18
58
|
**If** Steps 1–2 revealed additional scenarios the catalog does not cover (e.g. a computed formula or FK relationship that was missed), you may optionally call \`skyramp_recommend_tests\` with \`stateFile: "${p.stateFile ?? p.sessionId}"\` and \`enrichedScenarios\` to regenerate a more complete catalog — but only after presenting the current one.`;
|
|
59
|
+
const hasJavaFiles = p.candidateRouteFiles?.some(f => /\.(java|kt)$/.test(f)) ?? false;
|
|
60
|
+
const routeFilesSection = p.candidateRouteFiles && p.candidateRouteFiles.length > 0
|
|
61
|
+
? `\nRoute/controller files found by static scan (read these to discover endpoints — the regex-based catalog below may be incomplete for your framework):\n${p.candidateRouteFiles.map(f => `- ${f}`).join("\n")}\n`
|
|
62
|
+
: "";
|
|
63
|
+
const resolvePathsNote = p.routerMountContext.length
|
|
64
|
+
? `**Resolve nested paths** using your Step 1.5 table — a router in the table with prefix \`/api/v1/products/{product_id}/reviews\` means every endpoint in that file lives under that full path.`
|
|
65
|
+
: `**Resolve full paths** using the prefixes you identified in Step 1 (e.g. Java Spring class-level \`@RequestMapping\` prefix + method-level path).`;
|
|
19
66
|
return `## Your Task — Fill in and Present the Catalog (full repo)
|
|
20
67
|
|
|
21
68
|
### Step 1: Read key files
|
|
22
|
-
Read route/controller files and model/schema files (Pydantic models, Zod schemas, DTOs)
|
|
23
|
-
|
|
69
|
+
${routeFilesSection}Read the route/controller files above **and** model/schema files (Pydantic models, Zod schemas, DTOs) to find: required request body fields, computed response fields and formulas, auth middleware type, storage backend, and how sub-routers are mounted (cross-check against Router Mounting section above).
|
|
70
|
+
${hasJavaFiles ? JAVA_SPRING_NOTE : ""}
|
|
71
|
+
If the endpoint catalog below is missing endpoints visible in these files (e.g. from a framework the static scanner doesn't recognise), extract them now and include them in Step 3's \`enrichedScenarios\`.
|
|
24
72
|
|
|
25
|
-
### Step 2: Map cross-resource relationships and resolve endpoint paths
|
|
73
|
+
${buildPathResolutionTableStep(p)}### Step 2: Map cross-resource relationships and resolve endpoint paths
|
|
26
74
|
(Distinct from Step 1 — Step 1 reads individual schemas; Step 2 maps how endpoints relate to each other.)
|
|
27
75
|
For each endpoint: which POST creates resources consumed by other endpoints?
|
|
28
|
-
|
|
76
|
+
${resolvePathsNote}
|
|
29
77
|
For GET list endpoints: identify query params (\`limit\`, \`offset\`, \`order\`, \`orderBy\`) from framework annotations (FastAPI \`Query()\`, Express \`req.query\`, etc.).
|
|
30
78
|
|
|
31
79
|
${nextStep}`;
|
|
32
80
|
}
|
|
33
81
|
const changedFiles = p.parsedDiff?.changedFiles.join(", ") ?? "";
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
82
|
+
// Whether the regex pre-detected any API endpoints — used as a hint only.
|
|
83
|
+
// Step 2 always asks the LLM to extract endpoints from the diff so unknown
|
|
84
|
+
// frameworks (e.g. Spring class-level @RequestMapping, Django, Rails) are
|
|
85
|
+
// covered even when the static regex returns nothing.
|
|
86
|
+
const regexFoundEndpoints = p.parsedDiff && (p.parsedDiff.newEndpoints.length > 0 || p.parsedDiff.modifiedEndpoints.length > 0);
|
|
87
|
+
const diffFiles = p.parsedDiff?.changedFiles ?? [];
|
|
88
|
+
const isUIOnly = diffFiles.length > 0 &&
|
|
89
|
+
!regexFoundEndpoints &&
|
|
90
|
+
diffFiles.every(f => FRONTEND_EXT.test(f));
|
|
91
|
+
const diffHasJavaFiles = diffFiles.some(f => /\.(java|kt)$/.test(f));
|
|
92
|
+
const diffSection = p.diffContent
|
|
93
|
+
? `\n<diff>\n${p.diffContent}\n</diff>`
|
|
94
|
+
: "";
|
|
95
|
+
const step2 = isUIOnly
|
|
96
|
+
? `### Step 2: Identify consumed API endpoints
|
|
97
|
+
UI-only PR — read changed components to find API calls (fetch, axios, hooks).`
|
|
98
|
+
: p.diffContent
|
|
99
|
+
? `### Step 2: Extract new and modified API endpoints from the diff
|
|
100
|
+
Read the \`<diff>\` above and identify every new or modified API endpoint — route registrations, handler methods, controller annotations. Then use the **Router Mounting / Nesting** section above to reconstruct the full URL path for each endpoint by chaining all parent router prefixes down to the handler (e.g. a handler in a file with \`prefix="/reviews"\` that is mounted at \`/{product_id}\` under a router mounted at \`/api/v1/products\` → full path \`/api/v1/products/{product_id}/reviews\`).
|
|
101
|
+
${diffHasJavaFiles ? JAVA_SPRING_NOTE : ""}
|
|
102
|
+
For each endpoint found: note the HTTP method, full path, and source file.
|
|
103
|
+
${regexFoundEndpoints ? "The static analysis above pre-detected some endpoints — verify and augment with anything it missed." : "The static analysis did not detect endpoints for this framework — rely on the diff to extract them."}
|
|
42
104
|
**CRITICAL — Query params vs body:** For GET endpoints (especially search/filter/list),
|
|
43
105
|
identify which parameters are URL query params vs request body. Look at framework-specific
|
|
44
106
|
annotations (FastAPI \`Query()\`, Express \`req.query\`, Spring \`@RequestParam\`, etc.).
|
|
45
107
|
Pass these as \`queryParams\` (not \`requestBody\`) when generating scenarios.`
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
No API route changes detected — read changed files to identify affected endpoints.`;
|
|
108
|
+
: `### Step 2: Extract new and modified API endpoints from source files
|
|
109
|
+
No diff was available — read the changed source files listed above directly to identify new or modified API endpoints. Use the **Router Mounting / Nesting** section to reconstruct full paths.
|
|
110
|
+
${diffHasJavaFiles ? JAVA_SPRING_NOTE : ""}
|
|
111
|
+
For each endpoint found: note the HTTP method, full path, and source file.`;
|
|
51
112
|
const criticalPatternStep = `### Step 2.5: Identify critical patterns for test categorization
|
|
52
113
|
Look for these patterns in model/schema/handler files to inform test recommendations:
|
|
53
114
|
- **Unique constraints**: \`@unique\`, \`unique: true\`, unique indexes, \`.refine()\` uniqueness checks, \`UNIQUE\` in SQL migrations
|
|
@@ -88,10 +149,10 @@ Call \`skyramp_recommend_tests\` with:
|
|
|
88
149
|
- \`enrichedScenarios\`: (optional) JSON array of your Step 3 scenarios — see the tool's inputSchema for the exact shape. Your enriched scenarios override server-side ones with the same \`scenarioName\` and are prioritized in ranking. Omit if you drafted nothing in Step 3.`;
|
|
89
150
|
return `## Your Task — Enrich & Recommend (PR-scoped)
|
|
90
151
|
|
|
91
|
-
### Step 1: Read the changed files
|
|
92
|
-
${changedFiles}
|
|
152
|
+
### Step 1: Read the changed files and diff
|
|
153
|
+
${changedFiles}${diffSection}
|
|
93
154
|
|
|
94
|
-
${step2}
|
|
155
|
+
${buildPathResolutionTableStep(p)}${step2}
|
|
95
156
|
|
|
96
157
|
${criticalPatternStep}
|
|
97
158
|
|
|
@@ -103,13 +164,11 @@ export function buildAnalysisOutputText(p) {
|
|
|
103
164
|
// Branch diff, endpoint catalog, auth config, and OpenAPI spec are omitted here
|
|
104
165
|
// because they are already present in the recommendation prompt that is
|
|
105
166
|
// concatenated in the same tool response.
|
|
106
|
-
const routerSection = !p.wsSchemaPath && p.routerMountContext
|
|
167
|
+
const routerSection = !p.wsSchemaPath && p.routerMountContext.length
|
|
107
168
|
? `
|
|
108
|
-
##
|
|
109
|
-
|
|
110
|
-
${p.routerMountContext}
|
|
111
|
-
\`\`\`
|
|
112
|
-
Use this to resolve full URL paths for nested endpoints.`
|
|
169
|
+
## Routing entry-point files
|
|
170
|
+
Read these in Step 1.5 to trace the full router/module hierarchy:
|
|
171
|
+
${p.routerMountContext.map(f => `- \`${f}\``).join("\n")}`
|
|
113
172
|
: "";
|
|
114
173
|
const enrichment = buildEnrichmentInstructions(p);
|
|
115
174
|
return `# Repository Analysis
|
|
@@ -156,9 +156,10 @@ When no Playwright trace exists, use the Playwright browser tools (\`browser_nav
|
|
|
156
156
|
recommend a different test that adds new coverage.`;
|
|
157
157
|
}
|
|
158
158
|
export function buildVerificationChecklist(topN, maxGen) {
|
|
159
|
+
const minTotal = Math.min(maxGen + 1, topN);
|
|
159
160
|
return `<verification>
|
|
160
161
|
Before finalizing your output, verify:
|
|
161
|
-
1. **Count**: Total recommendation count equals
|
|
162
|
+
1. **Count**: Total recommendation count equals the total you stated in your Budget Plan (between ${minTotal} and ${topN}). Your GENERATE + ADDITIONAL counts must match the split you committed to. Not fewer than your stated Budget Plan total.
|
|
162
163
|
2. **Distinct paths**: Each GENERATE item targets a distinct code path — no two share the same HTTP method + endpoint + expected status.
|
|
163
164
|
3. **Auth parameters are consistent** across all tool calls (same authHeader and authScheme).
|
|
164
165
|
4. Every endpointURL includes both the base URL and the path (not just the base, e.g. \`http://host/api/v1/orders/{id}\`).
|
|
@@ -334,7 +335,7 @@ ${PATH_PARAM_UUID_GUIDANCE}
|
|
|
334
335
|
3. Interact using \`browser_click\`, \`browser_type\`, \`browser_fill_form\`, etc.
|
|
335
336
|
4. \`browser_snapshot\` after each interaction that changes the page
|
|
336
337
|
5. \`skyramp_export_zip\` with an **absolute** output path: \`<repositoryPath>/.skyramp/<test_name>_trace.zip\`
|
|
337
|
-
6. \`skyramp_ui_test_generation\` with \`playwrightInput\` = the **absolute** path of the exported zip
|
|
338
|
+
6. \`skyramp_ui_test_generation\` with \`playwrightInput\` = the **absolute** path of the exported zip, and \`outputDir\` = the **frontend** service's \`testDirectory\` from workspace.yml (e.g. \`frontend/tests\`). Do NOT use the backend service's testDirectory — UI tests must go in the frontend service's test directory.
|
|
338
339
|
|
|
339
340
|
Tips: For custom dropdowns (Radix, MUI): click combobox → snapshot → click option (NOT \`browser_select_option\`).
|
|
340
341
|
|
|
@@ -158,7 +158,11 @@ export function registerRecommendTestsPrompt(server) {
|
|
|
158
158
|
if (!fullAnalysis) {
|
|
159
159
|
throw new Error(`Analysis data for session not found in memory or on disk. Re-run skyramp_analyze_changes.`);
|
|
160
160
|
}
|
|
161
|
-
|
|
161
|
+
// Normalize legacy state files: before AnalysisScope enum normalization, state stored
|
|
162
|
+
// the user-facing param value "branch_diff". Map it explicitly so diff-mode detection
|
|
163
|
+
// works correctly on state created before this deployment (2-hour TTL window).
|
|
164
|
+
const rawScope = state.analysisScope;
|
|
165
|
+
const analysisScope = rawScope === "branch_diff" || rawScope === AnalysisScope.CurrentBranchDiff
|
|
162
166
|
? AnalysisScope.CurrentBranchDiff
|
|
163
167
|
: AnalysisScope.FullRepo;
|
|
164
168
|
const effectiveTopN = args.topN;
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { MAX_RECOMMENDATIONS, MAX_TESTS_TO_GENERATE } from "./recommendationSections.js";
|
|
2
|
+
const FRONTEND_FILE_PATTERN = /\.(tsx?|jsx?|vue|svelte|css|scss|less|html?|erb|jsp|asp|jinja2?|twig)$/;
|
|
3
|
+
const API_DIR_PATTERN = /\/(api|routes?|controllers?|routers?|handlers?|endpoints?|server)\//;
|
|
4
|
+
const FRONTEND_DIR_PATTERN = /(^|\/)(components?|pages?|views?|layouts?|app|src\/app|frontend|client|public|styles?|templates?)\//i;
|
|
5
|
+
/**
|
|
6
|
+
* Returns true if the file path is an unambiguously frontend file —
|
|
7
|
+
* matches a frontend extension, lives in a frontend directory, and is not
|
|
8
|
+
* in an API/backend directory.
|
|
9
|
+
*/
|
|
10
|
+
export function isFrontendFile(filePath) {
|
|
11
|
+
return (FRONTEND_FILE_PATTERN.test(filePath) &&
|
|
12
|
+
!API_DIR_PATTERN.test(filePath) &&
|
|
13
|
+
FRONTEND_DIR_PATTERN.test(filePath));
|
|
14
|
+
}
|
|
15
|
+
// ── LLM scope assessment ──────────────────────────────────────────────────────
|
|
16
|
+
/**
|
|
17
|
+
* Builds the PR scope assessment section embedded as the first step in the
|
|
18
|
+
* execution plan prompt.
|
|
19
|
+
*
|
|
20
|
+
* This replaces fixed formula-computed topN and uiFraction values. The LLM has
|
|
21
|
+
* richer context than a file-count formula: it understands semantic complexity
|
|
22
|
+
* (one auth change > ten CSS tweaks), can identify UI tests that are warranted
|
|
23
|
+
* even on mostly-backend PRs (frontend logic bugs, form validation errors), and
|
|
24
|
+
* can down-scale when the diff is trivial regardless of file count.
|
|
25
|
+
*
|
|
26
|
+
* The LLM is asked to state a concrete Budget Plan before proceeding, which the
|
|
27
|
+
* rest of the prompt references to enforce count discipline.
|
|
28
|
+
*/
|
|
29
|
+
export function buildScopeAssessmentSection(maxTotal = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, isUIOnly = false) {
|
|
30
|
+
// Clamp minTotal to maxTotal so the range is never inverted (e.g. when maxGenerateOverride === topN).
|
|
31
|
+
const minTotal = Math.min(maxGenerate + 1, maxTotal);
|
|
32
|
+
const minAdditional = minTotal - maxGenerate; // 1 normally; 0 when maxTotal === maxGenerate
|
|
33
|
+
const baselineFormula = `${maxGenerate} (generate) + ${minAdditional} (min additional) = ${minTotal}`;
|
|
34
|
+
const stepD = isUIOnly
|
|
35
|
+
? `**Step D — UI/E2E confirmation (frontend-only PR):**
|
|
36
|
+
This is a frontend-only PR — set **100% UI/E2E** in your Budget Plan.
|
|
37
|
+
Budget generate slots toward directly recording and generating UI tests; budget additional slots toward more UI/E2E flows derived from the changed components.`
|
|
38
|
+
: `**Step D — Determine UI vs backend split:**
|
|
39
|
+
- Backend-only PR (0 frontend files changed or only CSS/copy): **0% UI/E2E** — focus on integration and contract tests
|
|
40
|
+
- Frontend-only PR: **100% UI/E2E** — all tests should be UI/E2E
|
|
41
|
+
- Mixed PR — non-UI slots are backend tests; start from file-count ratio for UI%, then apply judgment:
|
|
42
|
+
- Pure CSS/style changes inflate the frontend file count without adding test value → reduce UI%
|
|
43
|
+
- Frontend logic bugs (state management, calculation errors, form validation) in the diff → increase UI% even if few frontend files
|
|
44
|
+
- Frontend component calls a changed backend API → an E2E test covers both sides → count toward UI%
|
|
45
|
+
- Frontend files only in \`__tests__/\` or \`.stories.\` → exclude from the ratio`;
|
|
46
|
+
return `### PR Scope Assessment — complete this first, before planning any recommendations
|
|
47
|
+
|
|
48
|
+
Read the Changed Files list and endpoint changes above, then work through the four steps below. This determines your **Budget Plan** (total count + backend/frontend split) for the rest of the prompt.
|
|
49
|
+
|
|
50
|
+
**Step A — Classify changed files:**
|
|
51
|
+
Count each type from the diff context (ignore generated test files, lock files, and build artifacts):
|
|
52
|
+
- **Frontend files**: .tsx / .jsx / .vue / .svelte / .html / .erb / .jsp / .asp in components/, pages/, views/, layouts/, app/, frontend/, templates/ directories
|
|
53
|
+
- **Backend files**: route handlers, controllers, services, models, API modules, middleware, config with business logic
|
|
54
|
+
- **Non-application** (exclude from test value): CSS-only, copy/string changes, README, CI config with no logic
|
|
55
|
+
|
|
56
|
+
**Step B — Assess semantic complexity (quality over quantity):**
|
|
57
|
+
Weigh changes by their test value, not file count:
|
|
58
|
+
- New API endpoint → HIGH test value: needs happy path + at least one error path (contributes ~2 to budget)
|
|
59
|
+
- Modified endpoint with formula / business logic change → HIGH: edge cases matter (contributes ~1–2)
|
|
60
|
+
- Auth middleware change → CRITICAL: flag for extra security tests regardless of file count
|
|
61
|
+
- Frontend state / validation / calculation logic → HIGH for UI tests even if zero backend endpoints changed
|
|
62
|
+
- CSS / copy / purely cosmetic changes → LOW: may not justify any new test
|
|
63
|
+
|
|
64
|
+
**Step C — Determine total recommendation count (${minTotal}–${maxTotal}):**
|
|
65
|
+
Start from the baseline formula: *${baselineFormula}*, then adjust:
|
|
66
|
+
- **Scale up** for: critical auth/data-integrity changes (+2), complex multi-step business workflows (+1 each), new endpoints with non-trivial validation (+1 each beyond the formula)
|
|
67
|
+
- **Scale down** for: style/copy-only changes (may reach minimum of ${minTotal}), already well-tested paths confirmed by existing test list, trivial CRUD with no validation
|
|
68
|
+
- **Hard cap**: ${maxTotal}
|
|
69
|
+
|
|
70
|
+
${stepD}
|
|
71
|
+
|
|
72
|
+
**State your Budget Plan now** (one line, before any recommendation):
|
|
73
|
+
\`Budget Plan: <total> total (<generate> generate + <additional> additional), <ui_pct>% UI/E2E\`
|
|
74
|
+
|
|
75
|
+
Use these exact numbers throughout the rest of the prompt.`;
|
|
76
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
jest.mock("@skyramp/skyramp", () => ({
|
|
2
|
+
WorkspaceConfigManager: { create: jest.fn() },
|
|
3
|
+
}));
|
|
4
|
+
import { isFrontendFile, buildScopeAssessmentSection } from "./scopeAssessment.js";
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
// isFrontendFile
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
describe("isFrontendFile", () => {
|
|
9
|
+
it("returns true for a TSX component in components/", () => {
|
|
10
|
+
expect(isFrontendFile("src/components/Button.tsx")).toBe(true);
|
|
11
|
+
});
|
|
12
|
+
it("returns true for a JSX file in pages/", () => {
|
|
13
|
+
expect(isFrontendFile("src/pages/Home.jsx")).toBe(true);
|
|
14
|
+
});
|
|
15
|
+
it("returns true for a Vue file in views/", () => {
|
|
16
|
+
expect(isFrontendFile("src/views/Dashboard.vue")).toBe(true);
|
|
17
|
+
});
|
|
18
|
+
it("returns false for a TS route handler (API directory)", () => {
|
|
19
|
+
expect(isFrontendFile("src/routes/items.ts")).toBe(false);
|
|
20
|
+
});
|
|
21
|
+
it("returns false for a TS file in a backend service directory", () => {
|
|
22
|
+
expect(isFrontendFile("src/services/AuthService.ts")).toBe(false);
|
|
23
|
+
});
|
|
24
|
+
it("returns true for a TS file in components/ (tsx? matches .ts)", () => {
|
|
25
|
+
// FRONTEND_FILE_PATTERN uses tsx? which matches both .ts and .tsx
|
|
26
|
+
expect(isFrontendFile("src/components/utils.ts")).toBe(true);
|
|
27
|
+
});
|
|
28
|
+
it("returns false for a backend TS file in api/ even with tsx extension", () => {
|
|
29
|
+
expect(isFrontendFile("src/api/handlers/UserHandler.tsx")).toBe(false);
|
|
30
|
+
});
|
|
31
|
+
});
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
// buildScopeAssessmentSection
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
describe("buildScopeAssessmentSection", () => {
|
|
36
|
+
it("produces a valid range in Step C for normal inputs (topN > maxGenerate)", () => {
|
|
37
|
+
const section = buildScopeAssessmentSection(10, 3);
|
|
38
|
+
// minTotal = min(3+1, 10) = 4; range should be "4–10"
|
|
39
|
+
expect(section).toContain("4–10");
|
|
40
|
+
expect(section).toContain("3 (generate) + 1 (min additional) = 4");
|
|
41
|
+
});
|
|
42
|
+
it("clamps minTotal to maxTotal when maxTotal === maxGenerate (regression: no inverted range)", () => {
|
|
43
|
+
// Edge case from review: topN=4, maxGenerate=4 → minTotal must not exceed maxTotal
|
|
44
|
+
const section = buildScopeAssessmentSection(4, 4);
|
|
45
|
+
// minTotal = min(4+1, 4) = 4; range should be "4–4", NOT "5–4"
|
|
46
|
+
expect(section).toContain("4–4");
|
|
47
|
+
// minAdditional = 4 - 4 = 0; baseline should say "0 (min additional)"
|
|
48
|
+
expect(section).toContain("4 (generate) + 0 (min additional) = 4");
|
|
49
|
+
// Must NOT contain an inverted range
|
|
50
|
+
expect(section).not.toMatch(/\b5–4\b/);
|
|
51
|
+
});
|
|
52
|
+
it("clamps minTotal to maxTotal when maxTotal < maxGenerate", () => {
|
|
53
|
+
// Defensive: maxGenerate clamped to topN upstream, but guard applies here too
|
|
54
|
+
const section = buildScopeAssessmentSection(3, 5);
|
|
55
|
+
// minTotal = min(5+1, 3) = 3; range "3–3"
|
|
56
|
+
expect(section).toContain("3–3");
|
|
57
|
+
expect(section).not.toMatch(/\b[6-9]–3\b/);
|
|
58
|
+
});
|
|
59
|
+
it("embeds UI/E2E confirmation step when isUIOnly=true", () => {
|
|
60
|
+
const section = buildScopeAssessmentSection(10, 3, true);
|
|
61
|
+
expect(section).toContain("frontend-only PR");
|
|
62
|
+
expect(section).toContain("100% UI/E2E");
|
|
63
|
+
expect(section).not.toContain("Step D — Determine UI vs backend split");
|
|
64
|
+
});
|
|
65
|
+
it("embeds UI vs backend split step when isUIOnly=false", () => {
|
|
66
|
+
const section = buildScopeAssessmentSection(10, 3, false);
|
|
67
|
+
expect(section).toContain("Step D — Determine UI vs backend split");
|
|
68
|
+
expect(section).not.toContain("UI/E2E confirmation");
|
|
69
|
+
});
|
|
70
|
+
it("uses defaults matching MAX_RECOMMENDATIONS and MAX_TESTS_TO_GENERATE", () => {
|
|
71
|
+
// Should not throw and should produce a non-empty string
|
|
72
|
+
const section = buildScopeAssessmentSection();
|
|
73
|
+
expect(section.length).toBeGreaterThan(0);
|
|
74
|
+
expect(section).toContain("Budget Plan");
|
|
75
|
+
});
|
|
76
|
+
});
|