@skyramp/mcp 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +146 -27
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +202 -5
- package/build/prompts/testbot/testbot-prompts.js +10 -9
- package/build/services/TestDiscoveryService.js +417 -58
- package/build/services/TestDiscoveryService.test.js +361 -0
- package/build/tools/test-management/actionsTool.js +4 -1
- package/build/tools/test-management/analyzeChangesTool.js +76 -9
- package/build/tools/test-management/analyzeTestHealthTool.js +6 -2
- package/build/types/RepositoryAnalysis.js +1 -0
- package/build/types/TestAnalysis.js +6 -1
- package/build/utils/routeParsers.js +7 -0
- package/build/utils/routeParsers.test.js +29 -1
- package/package.json +1 -1
|
@@ -2,6 +2,7 @@ import * as crypto from "crypto";
|
|
|
2
2
|
import { AnalysisScope, isDiff, } from "../../types/RepositoryAnalysis.js";
|
|
3
3
|
import { WorkspaceAuthType, getDefaultAuthHeader, AUTH_MIDDLEWARE_PATTERNS_STR } from "../../utils/workspaceAuth.js";
|
|
4
4
|
import { logger } from "../../utils/logger.js";
|
|
5
|
+
import { extractResourceFromPath } from "../../utils/routeParsers.js";
|
|
5
6
|
import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildTestPatternGuidelines, buildTestQualityCriteria, buildFewShotExamples, buildVerificationChecklist, buildGenerationRules, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, MAX_CRITICAL_TESTS, } from "./recommendationSections.js";
|
|
6
7
|
import { CATEGORY_PRIORITY, TEST_CATEGORIES } from "../../types/TestRecommendation.js";
|
|
7
8
|
import { buildScopeAssessmentSection, isFrontendFile } from "./scopeAssessment.js";
|
|
@@ -10,15 +11,16 @@ function formatTestLocations(locs) {
|
|
|
10
11
|
if (entries.length === 0)
|
|
11
12
|
return "";
|
|
12
13
|
const rows = entries.map(([type, files]) => `| ${type.padEnd(12)} | ${files} |`).join("\n");
|
|
13
|
-
return ("\n**Existing Skyramp
|
|
14
|
+
return ("\n**Existing test coverage (Skyramp + external):**\n" +
|
|
14
15
|
"| Test type | File (covers: endpoints) |\n" +
|
|
15
16
|
"|--------------|---------------------------------------------------------|\n" +
|
|
16
17
|
rows + "\n\n" +
|
|
17
18
|
"**Deduplication rule (apply this table before generating anything):**\n" +
|
|
18
|
-
"-
|
|
19
|
-
"-
|
|
19
|
+
"- `[external]` tests: if a resource is covered by an `[external]` test, do NOT create a new test for the same HTTP method + resource + test type (e.g. an external integration test covering `POST /orders` blocks any new `POST` integration test on the `orders` resource). Do NOT attempt to UPDATE, REGENERATE, or DELETE external tests — they are user-maintained.\n" +
|
|
20
|
+
"- `[skyramp]` contract test: if the HTTP method + path already appears in a `[skyramp]` `covers:` entry of type `contract` → UPDATE that file, do NOT create a new one.\n" +
|
|
21
|
+
"- `[skyramp]` integration test: if the primary (last mutating) step's method + path already appears in a `[skyramp]` `covers:` entry of type `integration` → UPDATE, do NOT create a new one.\n" +
|
|
20
22
|
"- UI/E2E test: always create a new file — traces are distinct recordings.\n" +
|
|
21
|
-
"For contract and integration tests: if in doubt, prefer UPDATE over creating a duplicate.");
|
|
23
|
+
"For `[skyramp]` contract and integration tests: if in doubt, prefer UPDATE over creating a duplicate.");
|
|
22
24
|
}
|
|
23
25
|
// ── Priority-tier ordering (replaces numeric CATEGORY_WEIGHTS) ──
|
|
24
26
|
// Categories map to HIGH / MEDIUM / LOW tiers.
|
|
@@ -49,24 +51,87 @@ function computeTiebreakerSeed(endpoints, diffFiles) {
|
|
|
49
51
|
return crypto.createHash("sha256").update(canonical).digest("hex").slice(0, 8);
|
|
50
52
|
}
|
|
51
53
|
// ── Helpers ──
|
|
52
|
-
|
|
53
|
-
function
|
|
54
|
-
const segments = path.split("/").filter(Boolean);
|
|
55
|
-
const nonParam = segments.filter(s => !s.startsWith("{") && !SKIP_SEGMENTS_SET.has(s));
|
|
56
|
-
return nonParam[nonParam.length - 1] || "unknown";
|
|
57
|
-
}
|
|
58
|
-
function scenarioCoverageKey(scenario) {
|
|
54
|
+
/** Resolve the primary step and inferred test type for a scenario. */
|
|
55
|
+
function resolvePrimaryStep(scenario) {
|
|
59
56
|
const testType = scenario.testType ?? (scenario.steps.length === 1 ? "contract" : "integration");
|
|
60
57
|
const mutatingSteps = scenario.steps.filter(st => ["POST", "PUT", "PATCH", "DELETE"].includes(st.method));
|
|
61
58
|
// Use the last mutating step — earlier steps are typically prerequisite setup
|
|
62
59
|
// (e.g. POST /products before PATCH /orders), while the final mutation is the
|
|
63
60
|
// primary action under test.
|
|
64
61
|
const primaryStep = mutatingSteps[mutatingSteps.length - 1] ?? scenario.steps[scenario.steps.length - 1];
|
|
62
|
+
return { primaryStep, testType };
|
|
63
|
+
}
|
|
64
|
+
function scenarioCoverageKey(scenario) {
|
|
65
|
+
const { primaryStep, testType } = resolvePrimaryStep(scenario);
|
|
65
66
|
const resource = extractResourceFromPath(primaryStep?.path ?? "");
|
|
66
67
|
return `${resource}::${testType}`;
|
|
67
68
|
}
|
|
69
|
+
/**
|
|
70
|
+
* Method-aware coverage key for external test dedup.
|
|
71
|
+
* Unlike scenarioCoverageKey (resource::testType), this includes the HTTP method
|
|
72
|
+
* so that e.g. an external test covering "GET /orders" doesn't block generating
|
|
73
|
+
* a test for "PUT /orders" — a different operation on the same resource.
|
|
74
|
+
*/
|
|
75
|
+
function externalDedupKey(scenario) {
|
|
76
|
+
const { primaryStep, testType } = resolvePrimaryStep(scenario);
|
|
77
|
+
const method = primaryStep?.method ?? "GET";
|
|
78
|
+
const resource = extractResourceFromPath(primaryStep?.path ?? "");
|
|
79
|
+
return `${method}::${resource}::${testType}`;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Build a set of coverage keys from external (non-Skyramp) tests.
|
|
83
|
+
* Parses `testLocations` entries tagged with `[external]` to extract the
|
|
84
|
+
* method-aware `METHOD::resource::testType` keys they cover. This allows
|
|
85
|
+
* programmatic filtering of scenarios that duplicate external test coverage
|
|
86
|
+
* while preserving distinct operations on the same resource (for example,
|
|
87
|
+
* `GET::orders::integration` vs `PUT::orders::integration`) — complementing
|
|
88
|
+
* the prompt-level Step 0 dedup instructions with an algorithmic guarantee.
|
|
89
|
+
*
|
|
90
|
+
* Format of testLocations: Record<testType, "file1 [external] (covers: GET /api/v1/orders, POST /api/v1/orders), file2 (covers: ...)">
|
|
91
|
+
*/
|
|
92
|
+
function buildExternalCoverageSet(testLocations) {
|
|
93
|
+
const coverage = new Set();
|
|
94
|
+
let externalWithoutCoverage = 0;
|
|
95
|
+
for (const [testType, fileList] of Object.entries(testLocations)) {
|
|
96
|
+
// Count external files with no covers clause — these fall back to prompt-level dedup only
|
|
97
|
+
const externalCount = (fileList.match(/\[external\]/g) || []).length;
|
|
98
|
+
const coveredCount = (fileList.match(/\[external\]\s*\(covers:/g) || []).length;
|
|
99
|
+
externalWithoutCoverage += externalCount - coveredCount;
|
|
100
|
+
// Match all "[external] (covers: ...)" segments in the file list string.
|
|
101
|
+
// Each match captures the covers clause for one external test file.
|
|
102
|
+
for (const m of fileList.matchAll(/\[external\]\s*\(covers:\s*([^)]+)\)/g)) {
|
|
103
|
+
const endpoints = m[1].split(",").map(e => e.trim());
|
|
104
|
+
for (const ep of endpoints) {
|
|
105
|
+
// ep is "METHOD /path" e.g. "GET /api/v1/orders/{order_id}"
|
|
106
|
+
const spaceIdx = ep.indexOf(" ");
|
|
107
|
+
if (spaceIdx < 0)
|
|
108
|
+
continue;
|
|
109
|
+
const method = ep.slice(0, spaceIdx).toUpperCase();
|
|
110
|
+
const epPath = ep.slice(spaceIdx + 1);
|
|
111
|
+
const resource = extractResourceFromPath(epPath);
|
|
112
|
+
if (resource !== "unknown") {
|
|
113
|
+
// Method-aware key: "GET::orders::integration" — matches externalDedupKey() format.
|
|
114
|
+
// When testType is "unknown" (heuristic failed), emit keys for both integration and
|
|
115
|
+
// contract to avoid silent misses — conservative over-blocking is preferable.
|
|
116
|
+
if (testType === "unknown") {
|
|
117
|
+
coverage.add(`${method}::${resource}::integration`);
|
|
118
|
+
coverage.add(`${method}::${resource}::contract`);
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
coverage.add(`${method}::${resource}::${testType}`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
if (externalWithoutCoverage > 0) {
|
|
128
|
+
logger.info(`${externalWithoutCoverage} external test file(s) have no extractable endpoint coverage — ` +
|
|
129
|
+
`programmatic dedup skipped for these; Step 0 semantic check is the fallback.`);
|
|
130
|
+
}
|
|
131
|
+
return coverage;
|
|
132
|
+
}
|
|
68
133
|
// ── Execution Plan (replaces pre-ranked + scenarios + heuristic sections) ──
|
|
69
|
-
function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject = false, isFrontendOnlyProject = false) {
|
|
134
|
+
function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject = false, isFrontendOnlyProject = false, externalCoverage = new Set()) {
|
|
70
135
|
// Full-repo mode only — percentage-based UI/E2E slot targets (15% each, floor 1).
|
|
71
136
|
const rawE2E = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
|
|
72
137
|
const rawUI = isFrontendProject ? Math.max(1, Math.round(topN * 0.15)) : 0;
|
|
@@ -90,11 +155,22 @@ function buildFullRepoRecommendations(scored, topN, baseUrl, authHeaderValue, au
|
|
|
90
155
|
const TYPE_LABEL = {
|
|
91
156
|
e2e: "E2E", ui: "UI", integration: "Integration", contract: "Contract",
|
|
92
157
|
};
|
|
158
|
+
// Filter out scenarios already covered by external tests before slicing.
|
|
159
|
+
const scoredFiltered = externalCoverage.size > 0
|
|
160
|
+
? scored.filter(item => {
|
|
161
|
+
const key = externalDedupKey(item.scenario);
|
|
162
|
+
if (externalCoverage.has(key)) {
|
|
163
|
+
logger.info(`External dedup (full-repo): skipping "${item.scenario.scenarioName}" (${key})`);
|
|
164
|
+
return false;
|
|
165
|
+
}
|
|
166
|
+
return true;
|
|
167
|
+
})
|
|
168
|
+
: scored;
|
|
93
169
|
// For full-stack repos, carve out E2E and UI slots before filling with backend tests.
|
|
94
170
|
const backendSlotCount = isFrontendProject
|
|
95
171
|
? Math.max(0, topN - minE2ESlots - minUISlots)
|
|
96
172
|
: topN;
|
|
97
|
-
const allItems =
|
|
173
|
+
const allItems = scoredFiltered.slice(0, backendSlotCount);
|
|
98
174
|
const byType = new Map();
|
|
99
175
|
for (const t of TYPE_ORDER)
|
|
100
176
|
byType.set(t, []);
|
|
@@ -304,7 +380,7 @@ ${repoSupplementNote}
|
|
|
304
380
|
Cross-check every endpoint path against the Router Mounting / Nesting section in the analysis above. Sub-routers may be mounted at nested prefixes — e.g. a reviews router with \`@router.get("/")\` may actually be \`GET /api/v1/products/{product_id}/reviews\` if mounted under that prefix. Always use the fully-qualified nested path in tool calls, not the path as it appears in the route file alone.
|
|
305
381
|
|
|
306
382
|
**Existing test files (check before assigning output filenames):**
|
|
307
|
-
See the Existing Tests section above. If a recommendation's primary resource already has a test file listed there, prefer passing an explicit \`output\` filename (e.g. \`output: "orders_integration_test.py"\`) to update the existing file rather than creating a duplicate.
|
|
383
|
+
See the Existing Tests section above. If a recommendation's primary resource already has a \`[skyramp]\` test file listed there, prefer passing an explicit \`output\` filename (e.g. \`output: "orders_integration_test.py"\`) to update the existing file rather than creating a duplicate. Do NOT update \`[external]\` test files — they are user-maintained.
|
|
308
384
|
|
|
309
385
|
Before filling in tool call parameters for each item, use the analysis data already provided above (endpoint interactions, source context) first. Only read the route handler source code directly when the analysis data does not contain the specific value you need:
|
|
310
386
|
- Required request body fields (POST/PUT/PATCH) — use field names from the analysis interactions; read source only if they show \`{}\` or are missing
|
|
@@ -324,7 +400,7 @@ ${buildTestQualityCriteria()}
|
|
|
324
400
|
| Code Insight | Derived from actual implementation (spotted middleware pattern, N+1 risk, unique constraint)? → bump up one tier |
|
|
325
401
|
</enrichment_notes>`;
|
|
326
402
|
}
|
|
327
|
-
function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false) {
|
|
403
|
+
function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges = false, hasTraces = false, externalCoverage = new Set(), relevantExternalTestPaths = []) {
|
|
328
404
|
const frontendUrl = "<frontend_url>";
|
|
329
405
|
// Slot allocation:
|
|
330
406
|
// - UI-only PR: all GENERATE slots are UI placeholders (no pre-ranked backend scenarios)
|
|
@@ -335,8 +411,21 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
335
411
|
: hasFrontendChanges
|
|
336
412
|
? Math.max(0, maxGen - 1)
|
|
337
413
|
: maxGen;
|
|
338
|
-
|
|
339
|
-
|
|
414
|
+
// Filter out scenarios whose primary method + resource + test type is already covered by external tests.
|
|
415
|
+
// Method-aware: an external test covering GET /orders won't block PUT /orders scenarios.
|
|
416
|
+
// This is the programmatic complement to the prompt-level Step 0 dedup instructions.
|
|
417
|
+
const scoredAfterExternalDedup = externalCoverage.size > 0
|
|
418
|
+
? scored.filter(item => {
|
|
419
|
+
const key = externalDedupKey(item.scenario);
|
|
420
|
+
if (externalCoverage.has(key)) {
|
|
421
|
+
logger.info(`External dedup: skipping "${item.scenario.scenarioName}" (${key}) — covered by external test`);
|
|
422
|
+
return false;
|
|
423
|
+
}
|
|
424
|
+
return true;
|
|
425
|
+
})
|
|
426
|
+
: scored;
|
|
427
|
+
const generateItems = scoredAfterExternalDedup.slice(0, Math.min(backendGenerateCount, scoredAfterExternalDedup.length));
|
|
428
|
+
const rawAdditionalItems = scoredAfterExternalDedup.slice(backendGenerateCount, topN);
|
|
340
429
|
// Filter additional items whose primary resource + test type already appear in GENERATE
|
|
341
430
|
const generatedCoverage = new Set(generateItems.map(item => scenarioCoverageKey(item.scenario)));
|
|
342
431
|
const additionalItems = rawAdditionalItems.filter(item => !generatedCoverage.has(scenarioCoverageKey(item.scenario)));
|
|
@@ -485,20 +574,24 @@ function buildExecutionPlan(scored, maxGen, topN, baseUrl, authHeaderValue, auth
|
|
|
485
574
|
Derive scenario names and steps from the actual changed frontend files. If your Budget Plan calls for 0% UI/E2E, omit this entirely.` : "";
|
|
486
575
|
const supplementNote = `\n**If your Budget Plan total exceeds the pre-ranked items listed above:** draft additional tests from source-code enrichment (Step 1). For each new or changed endpoint, identify boundary or variation scenarios — formula parameters, search/filter constraints, required field validation. Only after exhausting PR-specific scenarios, add generic patterns (auth boundary → 401, non-existent ID → 404). Do NOT supplement with tests whose endpoint + test type match a GENERATE item.`;
|
|
487
576
|
// ── PR / branch-diff mode: execution plan ────────────────────────────────
|
|
577
|
+
const externalTestFilesList = relevantExternalTestPaths.length > 0
|
|
578
|
+
? `**Read these external test files first** (paths are relative to the \`repositoryPath\` you passed to \`skyramp_analyze_changes\` — prepend it to get the absolute path). Determine exactly which HTTP methods + paths each one covers. This is the definitive source of truth for external coverage:\n${relevantExternalTestPaths.map(p => `- \`${p}\``).join("\n")}\n\n`
|
|
579
|
+
: "";
|
|
488
580
|
return `## Execution Plan
|
|
489
581
|
Seed: ${seed} | Endpoints: ${endpointCount} | Max: ${maxGen} generate + up to ${Math.max(topN - maxGen, 0)} additional (your Budget Plan determines the exact count)
|
|
490
582
|
|
|
491
583
|
${buildScopeAssessmentSection(topN, maxGen, isUIOnlyPR)}
|
|
492
584
|
|
|
493
|
-
**Step 0 —
|
|
494
|
-
For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
|
|
495
|
-
-
|
|
585
|
+
**Step 0 — External test coverage verification (before executing anything)**
|
|
586
|
+
${externalTestFilesList}For every GENERATE item below, check its endpoint path and test type against the Existing Tests list (further down in the prompt).
|
|
587
|
+
- **\`[external]\` tests**: If the endpoint is already covered by an \`[external]\` test of the same type → skip the resource entirely (do NOT create or update). Backfill from ADDITIONAL using the priority order below:
|
|
496
588
|
1. **BUG-CATCHING TESTS FIRST (CRITICAL)**: If source code analysis revealed a bug, logic error, or incorrect formula (e.g. discount math adding instead of subtracting, off-by-one errors, missing validation), CREATE A TEST THAT EXPOSES IT. The test SHOULD FAIL — that's the point. Document the bug. Example: if discount formula is wrong, test with discount=20% and assert correct math. If no bug found, skip to #2.
|
|
497
589
|
2. **PR-endpoint edge cases**: Look for integration test candidates covering error paths, boundary values, or alternative scenarios for the SAME endpoints changed in the PR diff. If no suitable candidate exists in ADDITIONAL, derive one from your source-code enrichment findings.
|
|
498
590
|
3. **Same-resource other scenarios**: Other HTTP methods or flows on the same resource group touched by the PR.
|
|
499
591
|
4. **Cross-resource workflows involving the PR endpoint**: Integration scenarios that include the PR's changed endpoint as one of the steps.
|
|
500
|
-
5. **Unrelated endpoint coverage (last resort)**: Tests for endpoints with no connection to the PR diff, only when ALL options above have been exhausted
|
|
592
|
+
5. **Unrelated endpoint coverage (last resort)**: Tests for endpoints with no connection to the PR diff, only when ALL options above have been exhausted.
|
|
501
593
|
**Avoid backfilling with a test for a completely unrelated resource (e.g. \`POST /reviews\` when the PR only changes \`/orders\`) if any PR-endpoint edge-case integration test is feasible.**
|
|
594
|
+
- **Contract tests (\`[skyramp]\`)**: If an existing \`[skyramp]\` contract test already covers that resource path → UPDATE the existing test file instead of creating a new one. A new test case is a new test even if the file already exists — count it toward \`newTestsCreated\`.
|
|
502
595
|
- **Integration/scenario tests**: Always generate as a new file via the scenario pipeline, even if an existing integration test covers the same resource. A new multi-step scenario is a distinct test. Count it toward \`newTestsCreated\`.
|
|
503
596
|
- **UI tests**: Always generate as a new file. Count toward \`newTestsCreated\`.
|
|
504
597
|
|
|
@@ -589,6 +682,8 @@ ${supplementNote}
|
|
|
589
682
|
- **Only drop** a previous recommendation if its target endpoint was removed, its business logic changed, or it is now covered by a generated test.
|
|
590
683
|
- **Only add** new recommendations for code paths introduced since the last run.`;
|
|
591
684
|
}
|
|
685
|
+
// Exported for testing — these are the core dedup primitives.
|
|
686
|
+
export { buildExternalCoverageSet, externalDedupKey };
|
|
592
687
|
export function buildRecommendationPrompt(analysis, analysisScope = AnalysisScope.FullRepo, topN = MAX_RECOMMENDATIONS, prContext, workspaceAuthHeader, workspaceAuthType, workspaceAuthScheme, maxGenerateOverride, sessionId) {
|
|
593
688
|
const isDiffScope = isDiff(analysisScope);
|
|
594
689
|
const diffContext = analysis.branchDiffContext;
|
|
@@ -653,7 +748,7 @@ Output should be concise and immediately actionable.`
|
|
|
653
748
|
## Source Priority
|
|
654
749
|
When information conflicts, prefer: **Traces** (actual behavior) > **Code** (implemented behavior) > **Spec/Docs** (documented behavior).
|
|
655
750
|
`;
|
|
656
|
-
// Compact fingerprint of
|
|
751
|
+
// Compact fingerprint of tests already covering endpoints in this repo (Skyramp + external).
|
|
657
752
|
// Re-derived fresh each run from test files on disk — no separate persistence needed.
|
|
658
753
|
const testLocations = analysis.existingTests?.testLocations ?? {};
|
|
659
754
|
const testFingerprint = (() => {
|
|
@@ -663,13 +758,17 @@ When information conflicts, prefer: **Traces** (actual behavior) > **Code** (imp
|
|
|
663
758
|
// Each value is a comma-joined list of "file (covers: ep1, ep2)" entries — one per file.
|
|
664
759
|
// Count files by splitting on "), " boundaries (each entry ends with ")").
|
|
665
760
|
let totalFiles = 0;
|
|
761
|
+
let externalCount = 0;
|
|
666
762
|
const byType = new Map();
|
|
667
763
|
for (const [type, fileList] of entries) {
|
|
668
764
|
// Use matchAll to extract covers from ALL files of this type, not just the first.
|
|
669
765
|
const allEndpoints = [...fileList.matchAll(/covers:\s*([^)]+)/g)].map(m => m[1].trim());
|
|
670
|
-
// Count files: strip "(covers: ...)" clauses then split on ", " to count entries,
|
|
766
|
+
// Count files: strip "[external]", "[skyramp]", and "(covers: ...)" clauses then split on ", " to count entries,
|
|
671
767
|
// correctly handling both files-with-covers and files-without-covers (e.g. UI tests).
|
|
672
|
-
|
|
768
|
+
const strippedList = fileList.replace(/\s*\[(?:external|skyramp)\]/g, "").replace(/\s*\(covers:[^)]*\)/g, "");
|
|
769
|
+
totalFiles += strippedList.split(", ").filter(s => s.trim().length > 0).length;
|
|
770
|
+
// Count external files from [external] annotations
|
|
771
|
+
externalCount += (fileList.match(/\[external\]/g) || []).length;
|
|
673
772
|
if (!byType.has(type))
|
|
674
773
|
byType.set(type, []);
|
|
675
774
|
byType.get(type).push(...allEndpoints);
|
|
@@ -677,7 +776,11 @@ When information conflicts, prefer: **Traces** (actual behavior) > **Code** (imp
|
|
|
677
776
|
const lines = [...byType.entries()]
|
|
678
777
|
.filter(([, eps]) => eps.length > 0)
|
|
679
778
|
.map(([type, eps]) => ` ${type}: ${[...new Set(eps)].join(", ")}`);
|
|
680
|
-
|
|
779
|
+
const skyrampCount = totalFiles - externalCount;
|
|
780
|
+
const breakdown = externalCount > 0
|
|
781
|
+
? `${skyrampCount} Skyramp + ${externalCount} external`
|
|
782
|
+
: `${totalFiles} files`;
|
|
783
|
+
return `\nTests already covering endpoints in this repo (${breakdown}):\n${lines.join("\n")}\n(Use this to focus on coverage gaps. External tests block new recommendations but cannot be updated.)`;
|
|
681
784
|
})();
|
|
682
785
|
const repoContext = `
|
|
683
786
|
Repository: ${analysis.metadata.repositoryName}
|
|
@@ -789,10 +892,21 @@ ${detailBlocks}
|
|
|
789
892
|
const projectType = analysis.projectClassification.projectType;
|
|
790
893
|
const isFrontendProject = projectType === "full-stack" || projectType === "frontend";
|
|
791
894
|
const isFrontendOnlyProject = projectType === "frontend";
|
|
792
|
-
|
|
895
|
+
const externalCoverageFullRepo = buildExternalCoverageSet(testLocations);
|
|
896
|
+
if (externalCoverageFullRepo.size > 0) {
|
|
897
|
+
logger.info(`External test coverage keys (full-repo): ${[...externalCoverageFullRepo].join(", ")}`);
|
|
898
|
+
}
|
|
899
|
+
mainSection = buildFullRepoRecommendations(scored, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject, isFrontendOnlyProject, externalCoverageFullRepo);
|
|
793
900
|
}
|
|
794
901
|
else if (isDiffScope && (isUIOnlyPR || scored.length > 0)) {
|
|
795
|
-
|
|
902
|
+
// Build external coverage set for programmatic dedup — prevents recommending
|
|
903
|
+
// tests that duplicate existing non-Skyramp tests at the METHOD::resource::testType
|
|
904
|
+
// level, so different methods on the same resource (e.g. GET vs PUT) remain distinct.
|
|
905
|
+
const externalCoverage = buildExternalCoverageSet(testLocations);
|
|
906
|
+
if (externalCoverage.size > 0) {
|
|
907
|
+
logger.info(`External test coverage keys: ${[...externalCoverage].join(", ")}`);
|
|
908
|
+
}
|
|
909
|
+
mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, externalCoverage, analysis.existingTests.relevantExternalTestPaths ?? []);
|
|
796
910
|
}
|
|
797
911
|
else {
|
|
798
912
|
mainSection = `
|
|
@@ -923,6 +1037,11 @@ ${interactionSection}
|
|
|
923
1037
|
|
|
924
1038
|
<existing_tests>
|
|
925
1039
|
## Existing Tests
|
|
1040
|
+
|
|
1041
|
+
**Two categories of test files (identified by tag):**
|
|
1042
|
+
- \`[skyramp]\` — generated by Skyramp tools. You may UPDATE these when the covered endpoint changes.
|
|
1043
|
+
- \`[external]\` — user-written tests (pytest, jest, junit, etc.) maintained outside Skyramp. Treat as read-only: use them to determine existing coverage but NEVER update, regenerate, or delete them.
|
|
1044
|
+
|
|
926
1045
|
- Frameworks: ${analysis.existingTests.frameworks.join(", ") || "none"}
|
|
927
1046
|
${formatTestLocations(analysis.existingTests.testLocations)}
|
|
928
1047
|
</existing_tests>
|
|
@@ -2,7 +2,7 @@ jest.mock("@skyramp/skyramp", () => ({
|
|
|
2
2
|
WorkspaceConfigManager: { create: jest.fn() },
|
|
3
3
|
}));
|
|
4
4
|
import { TestType } from "../../types/TestTypes.js";
|
|
5
|
-
import { buildRecommendationPrompt } from "./test-recommendation-prompt.js";
|
|
5
|
+
import { buildRecommendationPrompt, buildExternalCoverageSet, externalDedupKey } from "./test-recommendation-prompt.js";
|
|
6
6
|
import { PATH_PARAM_UUID_GUIDANCE, MAX_TESTS_TO_GENERATE, buildTestQualityCriteria, buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildFewShotExamples, buildVerificationChecklist, } from "./recommendationSections.js";
|
|
7
7
|
import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
|
|
8
8
|
// ---------------------------------------------------------------------------
|
|
@@ -1123,7 +1123,7 @@ describe("buildRecommendationPrompt — reduced over-prompting", () => {
|
|
|
1123
1123
|
describe("buildRecommendationPrompt — testFingerprint", () => {
|
|
1124
1124
|
it("omits fingerprint when no existing test locations", () => {
|
|
1125
1125
|
const prompt = buildRecommendationPrompt(minimalAnalysis());
|
|
1126
|
-
expect(prompt).not.toContain("
|
|
1126
|
+
expect(prompt).not.toContain("Tests already covering endpoints in this repo");
|
|
1127
1127
|
});
|
|
1128
1128
|
it("includes fingerprint with file count and endpoints when testLocations present", () => {
|
|
1129
1129
|
const analysis = minimalAnalysis({
|
|
@@ -1138,7 +1138,7 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
|
|
|
1138
1138
|
},
|
|
1139
1139
|
});
|
|
1140
1140
|
const prompt = buildRecommendationPrompt(analysis);
|
|
1141
|
-
expect(prompt).toContain("
|
|
1141
|
+
expect(prompt).toContain("Tests already covering endpoints in this repo (2 files)");
|
|
1142
1142
|
expect(prompt).toContain("contract: GET /api/items, POST /api/items");
|
|
1143
1143
|
expect(prompt).toContain("integration: POST /api/orders");
|
|
1144
1144
|
});
|
|
@@ -1155,7 +1155,7 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
|
|
|
1155
1155
|
});
|
|
1156
1156
|
const prompt = buildRecommendationPrompt(analysis);
|
|
1157
1157
|
// File count should be 2, not 1
|
|
1158
|
-
expect(prompt).toContain("
|
|
1158
|
+
expect(prompt).toContain("Tests already covering endpoints in this repo (2 files)");
|
|
1159
1159
|
});
|
|
1160
1160
|
it("omits types with no endpoint coverage from fingerprint lines (no trailing 'ui: ' line)", () => {
|
|
1161
1161
|
const analysis = minimalAnalysis({
|
|
@@ -1170,9 +1170,206 @@ describe("buildRecommendationPrompt — testFingerprint", () => {
|
|
|
1170
1170
|
},
|
|
1171
1171
|
});
|
|
1172
1172
|
const prompt = buildRecommendationPrompt(analysis);
|
|
1173
|
-
expect(prompt).toContain("
|
|
1173
|
+
expect(prompt).toContain("Tests already covering endpoints in this repo (2 files)");
|
|
1174
1174
|
expect(prompt).toContain("integration: POST /api/orders");
|
|
1175
1175
|
// UI type has no endpoints — must not emit a blank "ui: " line
|
|
1176
1176
|
expect(prompt).not.toMatch(/^\s*ui:\s*$/m);
|
|
1177
1177
|
});
|
|
1178
|
+
it("distinguishes external tests from Skyramp tests in fingerprint", () => {
|
|
1179
|
+
const analysis = minimalAnalysis({
|
|
1180
|
+
existingTests: {
|
|
1181
|
+
frameworks: ["pytest"],
|
|
1182
|
+
coverage: { unit: 0, integration: 1, e2e: 0, ui: 0, load: 0, contract: 1, smoke: 0 },
|
|
1183
|
+
testLocations: {
|
|
1184
|
+
contract: "test_items_contract.py (covers: GET /api/items)",
|
|
1185
|
+
integration: "tests/test_api.py [external] (covers: POST /api/orders)",
|
|
1186
|
+
},
|
|
1187
|
+
hasCoverageReports: false,
|
|
1188
|
+
},
|
|
1189
|
+
});
|
|
1190
|
+
const prompt = buildRecommendationPrompt(analysis);
|
|
1191
|
+
expect(prompt).toContain("1 Skyramp + 1 external");
|
|
1192
|
+
expect(prompt).toContain("cannot be updated");
|
|
1193
|
+
});
|
|
1194
|
+
it("uses inclusive header for test coverage table", () => {
|
|
1195
|
+
const analysis = minimalAnalysis({
|
|
1196
|
+
existingTests: {
|
|
1197
|
+
frameworks: ["pytest"],
|
|
1198
|
+
coverage: { unit: 0, integration: 0, e2e: 0, ui: 0, load: 0, contract: 1, smoke: 0 },
|
|
1199
|
+
testLocations: {
|
|
1200
|
+
contract: "test_items_contract.py (covers: GET /api/items)",
|
|
1201
|
+
},
|
|
1202
|
+
hasCoverageReports: false,
|
|
1203
|
+
},
|
|
1204
|
+
});
|
|
1205
|
+
const prompt = buildRecommendationPrompt(analysis);
|
|
1206
|
+
expect(prompt).toContain("Existing test coverage (Skyramp + external)");
|
|
1207
|
+
expect(prompt).not.toContain("Existing Skyramp test coverage");
|
|
1208
|
+
});
|
|
1209
|
+
it("includes external test dedup rule that blocks CREATE", () => {
|
|
1210
|
+
const analysis = minimalAnalysis({
|
|
1211
|
+
existingTests: {
|
|
1212
|
+
frameworks: ["pytest"],
|
|
1213
|
+
coverage: { unit: 0, integration: 1, e2e: 0, ui: 0, load: 0, contract: 0, smoke: 0 },
|
|
1214
|
+
testLocations: {
|
|
1215
|
+
integration: "tests/test_api.py [external] (covers: POST /api/orders)",
|
|
1216
|
+
},
|
|
1217
|
+
hasCoverageReports: false,
|
|
1218
|
+
},
|
|
1219
|
+
});
|
|
1220
|
+
const prompt = buildRecommendationPrompt(analysis);
|
|
1221
|
+
expect(prompt).toContain("[external]");
|
|
1222
|
+
expect(prompt).toContain("do NOT create a new test");
|
|
1223
|
+
expect(prompt).toContain("Do NOT attempt to UPDATE, REGENERATE, or DELETE external tests");
|
|
1224
|
+
});
|
|
1225
|
+
});
|
|
1226
|
+
// ---------------------------------------------------------------------------
|
|
1227
|
+
// Tests — External test dedup primitives
|
|
1228
|
+
// ---------------------------------------------------------------------------
|
|
1229
|
+
describe("buildExternalCoverageSet", () => {
|
|
1230
|
+
it("parses single external test with one endpoint", () => {
|
|
1231
|
+
const set = buildExternalCoverageSet({
|
|
1232
|
+
integration: 'tests/test_api.py [external] (covers: GET /api/v1/orders)',
|
|
1233
|
+
});
|
|
1234
|
+
expect(set.has("GET::orders::integration")).toBe(true);
|
|
1235
|
+
expect(set.size).toBe(1);
|
|
1236
|
+
});
|
|
1237
|
+
it("parses multiple endpoints in one covers clause", () => {
|
|
1238
|
+
const set = buildExternalCoverageSet({
|
|
1239
|
+
integration: 'tests/test_api.py [external] (covers: GET /api/v1/orders, POST /api/v1/orders, DELETE /api/v1/orders/{id})',
|
|
1240
|
+
});
|
|
1241
|
+
expect(set.has("GET::orders::integration")).toBe(true);
|
|
1242
|
+
expect(set.has("POST::orders::integration")).toBe(true);
|
|
1243
|
+
expect(set.has("DELETE::orders::integration")).toBe(true);
|
|
1244
|
+
expect(set.size).toBe(3);
|
|
1245
|
+
});
|
|
1246
|
+
it("parses multiple external files in one test type", () => {
|
|
1247
|
+
const set = buildExternalCoverageSet({
|
|
1248
|
+
integration: 'tests/test_orders.py [external] (covers: GET /api/orders), tests/test_products.py [external] (covers: POST /api/products)',
|
|
1249
|
+
});
|
|
1250
|
+
expect(set.has("GET::orders::integration")).toBe(true);
|
|
1251
|
+
expect(set.has("POST::products::integration")).toBe(true);
|
|
1252
|
+
expect(set.size).toBe(2);
|
|
1253
|
+
});
|
|
1254
|
+
it("handles multiple test types", () => {
|
|
1255
|
+
const set = buildExternalCoverageSet({
|
|
1256
|
+
integration: 'tests/test_api.py [external] (covers: GET /api/orders)',
|
|
1257
|
+
contract: 'tests/test_contract.py [external] (covers: GET /api/orders)',
|
|
1258
|
+
});
|
|
1259
|
+
expect(set.has("GET::orders::integration")).toBe(true);
|
|
1260
|
+
expect(set.has("GET::orders::contract")).toBe(true);
|
|
1261
|
+
expect(set.size).toBe(2);
|
|
1262
|
+
});
|
|
1263
|
+
it("emits both integration and contract keys for unknown test type", () => {
|
|
1264
|
+
const set = buildExternalCoverageSet({
|
|
1265
|
+
unknown: 'tests/test_misc.py [external] (covers: GET /api/items)',
|
|
1266
|
+
});
|
|
1267
|
+
expect(set.has("GET::items::integration")).toBe(true);
|
|
1268
|
+
expect(set.has("GET::items::contract")).toBe(true);
|
|
1269
|
+
expect(set.size).toBe(2);
|
|
1270
|
+
});
|
|
1271
|
+
it("ignores Skyramp tests (no [external] tag)", () => {
|
|
1272
|
+
const set = buildExternalCoverageSet({
|
|
1273
|
+
contract: 'test_items_contract.py (covers: GET /api/items)',
|
|
1274
|
+
});
|
|
1275
|
+
expect(set.size).toBe(0);
|
|
1276
|
+
});
|
|
1277
|
+
it("ignores external tests without covers clause", () => {
|
|
1278
|
+
const set = buildExternalCoverageSet({
|
|
1279
|
+
integration: 'tests/test_api.py [external]',
|
|
1280
|
+
});
|
|
1281
|
+
expect(set.size).toBe(0);
|
|
1282
|
+
});
|
|
1283
|
+
it("returns empty set for empty testLocations", () => {
|
|
1284
|
+
const set = buildExternalCoverageSet({});
|
|
1285
|
+
expect(set.size).toBe(0);
|
|
1286
|
+
});
|
|
1287
|
+
it("skips endpoints with unparseable paths", () => {
|
|
1288
|
+
const set = buildExternalCoverageSet({
|
|
1289
|
+
integration: 'tests/test_api.py [external] (covers: GET )',
|
|
1290
|
+
});
|
|
1291
|
+
// "GET " → method="GET", path="" → resource="unknown" → skipped
|
|
1292
|
+
expect(set.size).toBe(0);
|
|
1293
|
+
});
|
|
1294
|
+
it("strips path parameters from resource extraction", () => {
|
|
1295
|
+
const set = buildExternalCoverageSet({
|
|
1296
|
+
integration: 'tests/test_api.py [external] (covers: PUT /api/v1/orders/{order_id})',
|
|
1297
|
+
});
|
|
1298
|
+
// {order_id} is a path param → skipped, resource is "orders"
|
|
1299
|
+
expect(set.has("PUT::orders::integration")).toBe(true);
|
|
1300
|
+
expect(set.size).toBe(1);
|
|
1301
|
+
});
|
|
1302
|
+
it("normalizes method to uppercase", () => {
|
|
1303
|
+
const set = buildExternalCoverageSet({
|
|
1304
|
+
integration: 'tests/test_api.py [external] (covers: get /api/orders)',
|
|
1305
|
+
});
|
|
1306
|
+
expect(set.has("GET::orders::integration")).toBe(true);
|
|
1307
|
+
});
|
|
1308
|
+
});
|
|
1309
|
+
describe("externalDedupKey", () => {
|
|
1310
|
+
it("builds key from single-step contract scenario", () => {
|
|
1311
|
+
const scenario = {
|
|
1312
|
+
scenarioName: "get_orders",
|
|
1313
|
+
description: "Get orders",
|
|
1314
|
+
category: "crud",
|
|
1315
|
+
priority: "high",
|
|
1316
|
+
steps: [{ order: 1, method: "GET", path: "/api/v1/orders", description: "list orders", interactionType: "success", expectedStatusCode: 200 }],
|
|
1317
|
+
chainingKeys: [],
|
|
1318
|
+
requiresAuth: false,
|
|
1319
|
+
estimatedComplexity: "simple",
|
|
1320
|
+
};
|
|
1321
|
+
expect(externalDedupKey(scenario)).toBe("GET::orders::contract");
|
|
1322
|
+
});
|
|
1323
|
+
it("builds key from multi-step integration scenario using last mutating step", () => {
|
|
1324
|
+
const scenario = {
|
|
1325
|
+
scenarioName: "create_and_update_order",
|
|
1326
|
+
description: "Create then update order",
|
|
1327
|
+
category: "workflow",
|
|
1328
|
+
priority: "high",
|
|
1329
|
+
steps: [
|
|
1330
|
+
{ order: 1, method: "POST", path: "/api/v1/orders", description: "create order", interactionType: "success", expectedStatusCode: 201 },
|
|
1331
|
+
{ order: 2, method: "PUT", path: "/api/v1/orders/{order_id}", description: "update order", interactionType: "success", expectedStatusCode: 200 },
|
|
1332
|
+
{ order: 3, method: "GET", path: "/api/v1/orders/{order_id}", description: "verify", interactionType: "success", expectedStatusCode: 200 },
|
|
1333
|
+
],
|
|
1334
|
+
chainingKeys: [],
|
|
1335
|
+
requiresAuth: false,
|
|
1336
|
+
estimatedComplexity: "moderate",
|
|
1337
|
+
};
|
|
1338
|
+
// Last mutating step is PUT /orders/{order_id} → resource "orders"
|
|
1339
|
+
expect(externalDedupKey(scenario)).toBe("PUT::orders::integration");
|
|
1340
|
+
});
|
|
1341
|
+
it("falls back to last step when no mutating methods present", () => {
|
|
1342
|
+
const scenario = {
|
|
1343
|
+
scenarioName: "get_items",
|
|
1344
|
+
description: "List and get items",
|
|
1345
|
+
category: "crud",
|
|
1346
|
+
priority: "medium",
|
|
1347
|
+
steps: [
|
|
1348
|
+
{ order: 1, method: "GET", path: "/api/v1/items", description: "list items", interactionType: "success", expectedStatusCode: 200 },
|
|
1349
|
+
{ order: 2, method: "GET", path: "/api/v1/items/{id}", description: "get item", interactionType: "success", expectedStatusCode: 200 },
|
|
1350
|
+
],
|
|
1351
|
+
chainingKeys: [],
|
|
1352
|
+
requiresAuth: false,
|
|
1353
|
+
estimatedComplexity: "simple",
|
|
1354
|
+
};
|
|
1355
|
+
// No mutating steps → falls back to last step → GET /items/{id} → resource "items"
|
|
1356
|
+
expect(externalDedupKey(scenario)).toBe("GET::items::integration");
|
|
1357
|
+
});
|
|
1358
|
+
it("uses explicit testType when provided", () => {
|
|
1359
|
+
const scenario = {
|
|
1360
|
+
scenarioName: "get_orders_contract",
|
|
1361
|
+
description: "Contract test for orders",
|
|
1362
|
+
category: "crud",
|
|
1363
|
+
priority: "high",
|
|
1364
|
+
steps: [
|
|
1365
|
+
{ order: 1, method: "GET", path: "/api/v1/orders", description: "list orders", interactionType: "success", expectedStatusCode: 200 },
|
|
1366
|
+
{ order: 1, method: "POST", path: "/api/v1/orders", description: "create order", interactionType: "success", expectedStatusCode: 201 },
|
|
1367
|
+
],
|
|
1368
|
+
chainingKeys: [],
|
|
1369
|
+
requiresAuth: false,
|
|
1370
|
+
estimatedComplexity: "simple",
|
|
1371
|
+
testType: "contract",
|
|
1372
|
+
};
|
|
1373
|
+
expect(externalDedupKey(scenario)).toBe("POST::orders::contract");
|
|
1374
|
+
});
|
|
1178
1375
|
});
|
|
@@ -87,17 +87,19 @@ ${task1Section}
|
|
|
87
87
|
|
|
88
88
|
## Task 2: Generate New Tests
|
|
89
89
|
|
|
90
|
-
${userPrompt ? "" : "Drift-based maintenance (Task 1) is complete. This step only processes the GENERATE list. Exception: if a GENERATE item targets a resource with an existing contract test, UPDATE that file
|
|
90
|
+
${userPrompt ? "Generate only the tests that the user requested from the Additional Recommendations. The rules below still apply." : "Drift-based maintenance (Task 1) is complete. This step only processes the GENERATE list. Exception: if a GENERATE item targets a resource with an existing `[skyramp]` contract test, UPDATE that test file (see covered-resource handling below) — a new test case added to an existing file counts toward the budget and is reported in `newTestsCreated`."}
|
|
91
91
|
|
|
92
92
|
- **MANDATORY — use the pre-ranked GENERATE list as-is**: The Execution Plan's GENERATE section governs ADD actions. You MUST generate exactly those scenarios in the exact order listed. Do NOT substitute, rename, or replace a GENERATE item. If parameter grounding uncovers a distinct bug-catching scenario not already in the GENERATE or ADDITIONAL list, generate it after all planned GENERATE items are complete and report it in \`newTestsCreated\` — this is an additional test driven by source-code analysis and does not count against the GENERATE budget.
|
|
93
93
|
- Scenario JSON files are always new files — always generate them for new methods. Every generated scenario JSON must have a corresponding new integration test generated from it via \`skyramp_integration_test_generation\`.
|
|
94
|
-
-
|
|
95
|
-
-
|
|
96
|
-
-
|
|
97
|
-
|
|
94
|
+
- Covered-resource handling (aligns with Execution Plan Step 0): When a GENERATE item targets a resource that already has an existing test file covering the same endpoint:
|
|
95
|
+
- If the existing test source is \`[external]\`, skip the resource entirely — the external test already provides coverage. Do NOT UPDATE, REGENERATE, or DELETE external tests.
|
|
96
|
+
- If the existing test is tagged \`[skyramp]\`, apply type-specific rules:
|
|
97
|
+
- Contract tests: UPDATE the existing Skyramp test file (add the new method's test cases). A new test case is a new test even if the file already exists — report in \`newTestsCreated\` and count toward the budget.
|
|
98
|
+
- Integration/scenario tests: Always generate as a new file via the scenario pipeline (\`skyramp_batch_scenario_test_generation\` → \`skyramp_integration_test_generation\`), even if an existing integration test covers the same resource. A new multi-step scenario (e.g. create → PATCH → verify recalculation) is a distinct test file. Report in \`newTestsCreated\` and count toward the budget.
|
|
99
|
+
- UI tests: Always generate as a new file. Report in \`newTestsCreated\`.
|
|
98
100
|
Keep advancing until you have created exactly ${maxGenerate} new test files OR exhausted all candidates.
|
|
99
|
-
-
|
|
100
|
-
-
|
|
101
|
+
- Example: If enrichment reveals that sending \`discount_value\` without \`discount_type\` silently orphans the value (a concrete bug), complete all planned GENERATE items first, then generate this discovered scenario as an extra test and report it in \`newTestsCreated\`.
|
|
102
|
+
- Total generated: Follow the "Budget: N generate" line in the Execution Plan. Process every GENERATE-tagged item in order. Backfill from ADDITIONAL candidates (highest-ranked first) until \`newTestsCreated\` reaches ${maxGenerate} or all candidates are exhausted.
|
|
101
103
|
- **UI test priority**: If the diff contains frontend/UI changes (e.g. \`.tsx\`, \`.jsx\`, \`.vue\`, \`.svelte\` files), you MUST attempt to generate at least one UI test. Use \`browser_navigate\` to the app's base URL — if the app responds, record a trace and generate the test.
|
|
102
104
|
**Skip only if one of these conditions is met:**
|
|
103
105
|
- **(a) App is unreachable** — \`browser_navigate\` fails or connection is refused.
|
|
@@ -252,10 +254,9 @@ This applies when the diff contains ONLY changes with no observable API or UI be
|
|
|
252
254
|
|
|
253
255
|
In these cases:
|
|
254
256
|
- \`newTestsCreated\` must be \`[]\`
|
|
255
|
-
-
|
|
257
|
+
- \`issuesFound\` must be \`[]\` — do NOT add a "No testable behavioral surface" entry; the business case already explains the abstention
|
|
256
258
|
- \`businessCaseAnalysis\` must be a one-sentence summary of what the PR actually does (do NOT leave it blank)
|
|
257
259
|
- \`additionalRecommendations\` must be \`[]\` — do NOT recommend tests for a no-surface PR
|
|
258
|
-
- A blank \`issuesFound\` when tests were intentionally skipped will lose report quality points
|
|
259
260
|
|
|
260
261
|
Otherwise: in \`newTestsCreated\`, you must have exactly ${maxGenerate} budget-counting new tests for the planned GENERATE items. Only new files (ADD) created for those planned GENERATE items count toward this ${maxGenerate} target — GENERATE items converted to UPDATE do not. You may also include at most one additional discovered-scenario file in \`newTestsCreated\` (the bug-catching test generated after all planned items); that extra test does **not** count against the ${maxGenerate} budget. If you have fewer than ${maxGenerate} budget-counting new tests, backfill from the remaining ADDITIONAL candidates before proceeding. Only proceed with fewer than ${maxGenerate} budget-counting new tests if all candidates failed after retry AND the fallback single-contract test also failed.
|
|
261
262
|
|