@skyramp/mcp 0.1.8 → 0.2.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +4 -2
- package/build/playwright/registerPlaywrightTools.js +12 -0
- package/build/playwright/traceRecordingPrompt.js +15 -0
- package/build/prompts/code-reuse.js +106 -7
- package/build/prompts/pom-aware-code-reuse.js +106 -7
- package/build/prompts/startTraceCollectionPrompts.js +37 -15
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +26 -31
- package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +40 -1
- package/build/prompts/test-maintenance/driftAnalysisSections.js +90 -86
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +286 -163
- package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -45
- package/build/prompts/test-recommendation/diffExecutionPlan.js +246 -117
- package/build/prompts/test-recommendation/promptPlan.js +290 -0
- package/build/prompts/test-recommendation/promptPlan.test.js +336 -0
- package/build/prompts/test-recommendation/recommendationSections.js +4 -3
- package/build/prompts/test-recommendation/recommendationShared.js +23 -1
- package/build/prompts/test-recommendation/scopeAssessment.js +65 -14
- package/build/prompts/test-recommendation/scopeAssessment.test.js +93 -2
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +36 -12
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +316 -1
- package/build/prompts/testbot/testbot-prompts.js +73 -13
- package/build/prompts/testbot/testbot-prompts.test.js +114 -1
- package/build/resources/testbotResource.js +1 -1
- package/build/services/ScenarioGenerationService.integration.test.js +158 -0
- package/build/services/ScenarioGenerationService.js +47 -4
- package/build/services/ScenarioGenerationService.test.js +158 -22
- package/build/services/TestExecutionService.js +73 -15
- package/build/services/TestExecutionService.test.js +105 -0
- package/build/services/TestGenerationService.js +11 -1
- package/build/tools/executeSkyrampTestTool.js +1 -10
- package/build/tools/generate-tests/generateBatchScenarioRestTool.js +16 -4
- package/build/tools/generate-tests/generateIntegrationRestTool.js +2 -0
- package/build/tools/generate-tests/generateUIRestTool.js +2 -0
- package/build/tools/test-management/actionsTool.js +152 -63
- package/build/tools/test-management/analyzeChangesTool.js +178 -64
- package/build/tools/test-management/analyzeChangesTool.test.js +103 -16
- package/build/tools/test-management/analyzeTestHealthTool.js +30 -81
- package/build/tools/test-management/index.js +1 -0
- package/build/tools/test-management/uiAnalyzeChangesTool.js +149 -0
- package/build/tools/test-management/uiAnalyzeChangesTool.test.js +100 -0
- package/build/tools/trace/resolveSaveStoragePath.js +16 -0
- package/build/tools/trace/resolveSaveStoragePath.test.js +17 -0
- package/build/tools/trace/resolveSessionPaths.js +39 -0
- package/build/tools/trace/resolveSessionPaths.test.js +103 -0
- package/build/tools/trace/sessionState.js +14 -0
- package/build/tools/trace/sessionState.test.js +17 -0
- package/build/tools/trace/startTraceCollectionTool.js +84 -14
- package/build/tools/trace/stopTraceCollectionTool.js +9 -2
- package/build/types/TestAnalysis.js +50 -0
- package/build/types/TestRecommendation.js +6 -58
- package/build/types/TestTypes.js +1 -1
- package/build/utils/AnalysisStateManager.js +22 -11
- package/build/utils/branchDiff.js +11 -2
- package/build/utils/docker.test.js +1 -1
- package/build/utils/gitStaging.js +52 -3
- package/build/utils/gitStaging.test.js +19 -1
- package/build/utils/repoScanner.js +18 -10
- package/build/utils/repoScanner.test.js +92 -0
- package/build/utils/routeParsers.js +180 -25
- package/build/utils/routeParsers.test.js +180 -1
- package/build/utils/scenarioDrafting.js +220 -17
- package/build/utils/scenarioDrafting.test.js +182 -9
- package/build/utils/sourceRouteExtractor.js +806 -0
- package/build/utils/sourceRouteExtractor.test.js +565 -0
- package/build/utils/uiPageEnumerator.js +319 -0
- package/build/utils/uiPageEnumerator.test.js +422 -0
- package/build/utils/utils.js +27 -0
- package/build/utils/versions.js +1 -1
- package/build/utils/workspaceAuth.js +33 -4
- package/node_modules/playwright/ThirdPartyNotices.txt +6 -6
- package/node_modules/playwright/lib/dom-analyzer/analyze.js +111 -0
- package/node_modules/playwright/lib/dom-analyzer/blueprint.js +1210 -0
- package/node_modules/playwright/lib/dom-analyzer/blueprint.test.js +396 -0
- package/node_modules/playwright/lib/dom-analyzer/blueprintCache.js +57 -0
- package/node_modules/playwright/lib/dom-analyzer/blueprintCache.test.js +57 -0
- package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.js +254 -0
- package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.test.js +304 -0
- package/node_modules/playwright/lib/dom-analyzer/crawler.js +384 -0
- package/node_modules/playwright/lib/dom-analyzer/curatedWidgets.js +73 -0
- package/node_modules/playwright/lib/dom-analyzer/dynamicId.js +43 -0
- package/node_modules/playwright/lib/dom-analyzer/dynamicId.test.js +85 -0
- package/node_modules/playwright/lib/dom-analyzer/fingerprint.js +90 -0
- package/node_modules/playwright/lib/dom-analyzer/fingerprint.test.js +231 -0
- package/node_modules/playwright/lib/dom-analyzer/fingerprintAblation.fixtures.js +145 -0
- package/node_modules/playwright/lib/dom-analyzer/fingerprintAblation.test.js +41 -0
- package/node_modules/playwright/lib/dom-analyzer/graph.js +36 -0
- package/node_modules/playwright/lib/dom-analyzer/liveFingerprints.js +43 -0
- package/node_modules/playwright/lib/dom-analyzer/logicalNameResolver.js +72 -0
- package/node_modules/playwright/lib/dom-analyzer/logicalNameResolver.test.js +182 -0
- package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.js +150 -0
- package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.test.js +470 -0
- package/node_modules/playwright/lib/dom-analyzer/sectionGrouper.js +169 -0
- package/node_modules/playwright/lib/dom-analyzer/sectionGrouper.test.js +269 -0
- package/node_modules/playwright/lib/dom-analyzer/serialization.js +75 -0
- package/node_modules/playwright/lib/dom-analyzer/slug.js +30 -0
- package/node_modules/playwright/lib/dom-analyzer/slug.test.js +84 -0
- package/node_modules/playwright/lib/dom-analyzer/widgetContract.js +127 -0
- package/node_modules/playwright/lib/dom-analyzer/widgetContract.test.js +212 -0
- package/node_modules/playwright/lib/mcp/browser/browserContextFactory.js +3 -1
- package/node_modules/playwright/lib/mcp/browser/config.js +1 -1
- package/node_modules/playwright/lib/mcp/browser/context.js +17 -1
- package/node_modules/playwright/lib/mcp/browser/tab.js +38 -0
- package/node_modules/playwright/lib/mcp/browser/tools/domAnalyzer.js +261 -0
- package/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +3 -3
- package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.js +146 -0
- package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.test.js +140 -0
- package/node_modules/playwright/lib/mcp/browser/tools/sitemap.js +226 -0
- package/node_modules/playwright/lib/mcp/browser/tools/snapshot.js +2 -2
- package/node_modules/playwright/lib/mcp/browser/tools/widgetContract.js +168 -0
- package/node_modules/playwright/lib/mcp/browser/tools.js +6 -0
- package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +52 -12
- package/node_modules/playwright/lib/mcp/test/skyRampExport.js +64 -13
- package/node_modules/playwright/package.json +1 -1
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.3.tgz +0 -0
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.4.tgz +0 -0
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.5.tgz +0 -0
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.6.tgz +0 -0
- package/package.json +3 -3
- package/build/services/TestHealthService.js +0 -694
- package/build/services/TestHealthService.test.js +0 -241
- package/build/types/TestDriftAnalysis.js +0 -1
- package/build/types/TestHealth.js +0 -4
|
@@ -27,6 +27,16 @@ export function externalDedupKey(scenario) {
|
|
|
27
27
|
const resource = extractResourceFromPath(primaryStep?.path ?? "");
|
|
28
28
|
return `${method}::${resource}::${testType}`;
|
|
29
29
|
}
|
|
30
|
+
export function isAttackSurfaceSecurityBoundary(scenario) {
|
|
31
|
+
return scenario.category === "security_boundary" &&
|
|
32
|
+
(scenario.isAttackSurfaceSecurityBoundary === true ||
|
|
33
|
+
scenario.description.startsWith("Attack-surface auth boundary:"));
|
|
34
|
+
}
|
|
35
|
+
export function isOrdinaryDirectAuthBoundary(scenario) {
|
|
36
|
+
return scenario.category === "security_boundary" &&
|
|
37
|
+
!isAttackSurfaceSecurityBoundary(scenario) &&
|
|
38
|
+
scenario.description.startsWith("Auth boundary:");
|
|
39
|
+
}
|
|
30
40
|
/**
|
|
31
41
|
* Build a set of coverage keys from external (non-Skyramp) tests.
|
|
32
42
|
* Parses `testLocations` entries tagged with `[external]` to extract the
|
|
@@ -62,7 +72,19 @@ export function buildExternalCoverageSet(testLocations) {
|
|
|
62
72
|
}
|
|
63
73
|
if (externalWithoutCoverage > 0) {
|
|
64
74
|
logger.info(`${externalWithoutCoverage} external test file(s) have no extractable endpoint coverage — ` +
|
|
65
|
-
`programmatic dedup skipped for these;
|
|
75
|
+
`programmatic dedup skipped for these; prompt-level semantic coverage check is the fallback.`);
|
|
66
76
|
}
|
|
67
77
|
return coverage;
|
|
68
78
|
}
|
|
79
|
+
// Shared TestBot task and step labels used by prompt modules that cannot import
|
|
80
|
+
// testbot-prompts.ts directly without creating a circular dependency.
|
|
81
|
+
export const TASK_UI_PRESCAN = "0";
|
|
82
|
+
export const TASK_ANALYZE_MAINTAIN = "1";
|
|
83
|
+
export const TASK_GENERATE = "2";
|
|
84
|
+
export const TASK_SUBMIT = "3";
|
|
85
|
+
export const TESTBOT_TASK1_STEP_ANALYZE = "1";
|
|
86
|
+
export const TESTBOT_TASK1_STEP_MAINTAIN = "2";
|
|
87
|
+
export const TESTBOT_TASK1_STEP_CODE_REVIEW = "3";
|
|
88
|
+
export const TESTBOT_TASK1_STEP_UI_GROUNDING = "4";
|
|
89
|
+
export const taskRef = (taskId) => `Task ${taskId}`;
|
|
90
|
+
export const taskStepRef = (taskId, stepId) => `Task ${taskId} Step ${stepId}`;
|
|
@@ -25,25 +25,76 @@ export function isFrontendFile(filePath) {
|
|
|
25
25
|
return true;
|
|
26
26
|
return AMBIGUOUS_FRONTEND_PATTERN.test(filePath) && FRONTEND_DIR_PATTERN.test(filePath);
|
|
27
27
|
}
|
|
28
|
+
/**
|
|
29
|
+
* Returns true if the file path looks like a test file rather than UI source.
|
|
30
|
+
*
|
|
31
|
+
* Catches:
|
|
32
|
+
* - Skyramp-generated tests (`*_test.ts`, `*_smoke.ts`, `*_contract.ts`,
|
|
33
|
+
* `*_fuzz.ts`, `*_integration.ts`, `*_load.ts`, `*_e2e.ts`, `*_ui.ts`)
|
|
34
|
+
* - Skyramp scenario files (`scenario_*.json`)
|
|
35
|
+
* - Conventional Vitest/Jest/Playwright spec naming
|
|
36
|
+
* (`*.spec.{ts,tsx,js,jsx}`, `*.test.{ts,tsx,js,jsx}`)
|
|
37
|
+
* - Files inside `__tests__/` directories (Jest convention)
|
|
38
|
+
*
|
|
39
|
+
* Used by callers that want to filter test files OUT of frontend-source
|
|
40
|
+
* processing — `isFrontendFile` returns true for `.spec.ts` under a
|
|
41
|
+
* frontend directory because the rule is "tier-3 ambiguous + frontend dir,"
|
|
42
|
+
* but those tests aren't UI source we'd want to ground recommendations in.
|
|
43
|
+
*/
|
|
44
|
+
export function isTestFile(filePath) {
|
|
45
|
+
return (/(?:_test|_smoke|_contract|_fuzz|_integration|_load|_e2e|_ui)\.[^/]+$/.test(filePath) ||
|
|
46
|
+
/scenario_[^/]+\.json$/.test(filePath) ||
|
|
47
|
+
/\.(spec|test)\.(tsx?|jsx?)$/.test(filePath) ||
|
|
48
|
+
/(?:^|\/)__tests__\//.test(filePath));
|
|
49
|
+
}
|
|
28
50
|
// ── LLM scope assessment ──────────────────────────────────────────────────────
|
|
29
51
|
/**
|
|
30
|
-
* Builds the PR scope assessment section
|
|
31
|
-
*
|
|
52
|
+
* Builds the PR scope assessment section.
|
|
53
|
+
*
|
|
54
|
+
* When `precomputedUIPct` is provided (0 = backend-only, 100 = UI-only) the server
|
|
55
|
+
* has already determined the split unambiguously — skip Steps A–C and emit one line.
|
|
32
56
|
*
|
|
33
|
-
*
|
|
34
|
-
*
|
|
35
|
-
* (one auth change > ten CSS tweaks), can identify UI tests that are warranted
|
|
36
|
-
* even on mostly-backend PRs (frontend logic bugs, form validation errors), and
|
|
37
|
-
* can down-scale when the diff is trivial regardless of file count.
|
|
57
|
+
* For mixed PRs (`precomputedUIPct` is undefined, `hasFrontendChanges` is true) skip
|
|
58
|
+
* Steps A–C but keep Step D so the LLM can apply judgment to determine the UI%.
|
|
38
59
|
*
|
|
39
|
-
*
|
|
40
|
-
* rest of the prompt references to enforce count discipline.
|
|
60
|
+
* Falls back to the full four-step assessment when no precomputed data is available.
|
|
41
61
|
*/
|
|
42
|
-
export function buildScopeAssessmentSection(maxTotal = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, isUIOnly = false
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
62
|
+
export function buildScopeAssessmentSection(maxTotal = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, isUIOnly = false,
|
|
63
|
+
/** Server-determined UI/E2E percentage. `undefined` = mixed PR or unknown. */
|
|
64
|
+
precomputedUIPct,
|
|
65
|
+
/** Whether the diff contains frontend files (true ↔ mixed PR when precomputedUIPct is undefined). */
|
|
66
|
+
hasFrontendChanges) {
|
|
67
|
+
const effectiveGenerate = Math.min(maxGenerate, maxTotal);
|
|
68
|
+
const additional = Math.max(0, maxTotal - effectiveGenerate);
|
|
69
|
+
// Unambiguous backend-only or UI-only: emit a single Budget Plan line — no LLM counting needed.
|
|
70
|
+
if (precomputedUIPct !== undefined) {
|
|
71
|
+
return `### PR Scope Assessment
|
|
72
|
+
Budget Plan: ${maxTotal} total (${effectiveGenerate} generate + ${additional} additional), ${precomputedUIPct}% UI/E2E
|
|
73
|
+
|
|
74
|
+
Use these exact numbers throughout the rest of the prompt.`;
|
|
75
|
+
}
|
|
76
|
+
// Mixed PR: server can pre-compute the total but not the UI/E2E split — keep Step D.
|
|
77
|
+
if (hasFrontendChanges) {
|
|
78
|
+
return `### PR Scope Assessment — determine UI% before planning recommendations
|
|
79
|
+
|
|
80
|
+
Budget Plan (total already determined): **${maxTotal} total (${effectiveGenerate} generate + ${additional} additional)**
|
|
81
|
+
|
|
82
|
+
**Step D — Determine UI vs backend split for the budget above:**
|
|
83
|
+
- Non-UI slots are backend tests; start from file-count ratio for UI%, then apply judgment:
|
|
84
|
+
- Pure CSS/style changes inflate the frontend file count without adding test value → reduce UI%
|
|
85
|
+
- Frontend logic bugs (state management, calculation errors, form validation) in the diff → increase UI% even if few frontend files
|
|
86
|
+
- Frontend component calls a changed backend API → an E2E test covers both sides → count toward UI%
|
|
87
|
+
- Frontend files only in \`__tests__/\` or \`.stories.\` → exclude from the ratio
|
|
88
|
+
|
|
89
|
+
**Append your UI% now** — update the Budget Plan to:
|
|
90
|
+
\`Budget Plan: ${maxTotal} total (${effectiveGenerate} generate + ${additional} additional), <ui_pct>% UI/E2E\`
|
|
91
|
+
|
|
92
|
+
Use these exact numbers throughout the rest of the prompt.`;
|
|
93
|
+
}
|
|
94
|
+
// Fallback (no diff context — full_repo else-branch or test): full four-step assessment.
|
|
95
|
+
const minTotal = Math.min(effectiveGenerate + 1, maxTotal);
|
|
96
|
+
const minAdditional = Math.max(0, minTotal - effectiveGenerate);
|
|
97
|
+
const baselineFormula = `${effectiveGenerate} (generate) + ${minAdditional} (min additional) = ${minTotal}`;
|
|
47
98
|
const stepD = isUIOnly
|
|
48
99
|
? `**Step D — UI/E2E confirmation (frontend-only PR):**
|
|
49
100
|
This is a frontend-only PR — set **100% UI/E2E** in your Budget Plan.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
jest.mock("@skyramp/skyramp", () => ({
|
|
2
2
|
WorkspaceConfigManager: { create: jest.fn() },
|
|
3
3
|
}));
|
|
4
|
-
import { isFrontendFile, buildScopeAssessmentSection } from "./scopeAssessment.js";
|
|
4
|
+
import { isFrontendFile, isTestFile, buildScopeAssessmentSection } from "./scopeAssessment.js";
|
|
5
5
|
// ---------------------------------------------------------------------------
|
|
6
6
|
// isFrontendFile
|
|
7
7
|
// ---------------------------------------------------------------------------
|
|
@@ -58,6 +58,40 @@ describe("isFrontendFile", () => {
|
|
|
58
58
|
});
|
|
59
59
|
});
|
|
60
60
|
// ---------------------------------------------------------------------------
|
|
61
|
+
// isTestFile
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
describe("isTestFile", () => {
|
|
64
|
+
it("matches Skyramp-generated test suffix patterns", () => {
|
|
65
|
+
expect(isTestFile("backend/tests/orders_integration_test.py")).toBe(true);
|
|
66
|
+
expect(isTestFile("frontend/tests/cart_ui_test.ts")).toBe(true);
|
|
67
|
+
expect(isTestFile("backend/tests/users_contract_test.py")).toBe(true);
|
|
68
|
+
expect(isTestFile("backend/tests/load_test.py")).toBe(true);
|
|
69
|
+
});
|
|
70
|
+
it("matches Skyramp scenario JSON files", () => {
|
|
71
|
+
expect(isTestFile("tests/scenario_orders.json")).toBe(true);
|
|
72
|
+
expect(isTestFile("scenario_checkout.json")).toBe(true);
|
|
73
|
+
});
|
|
74
|
+
it("matches conventional Vitest/Jest/Playwright spec naming", () => {
|
|
75
|
+
expect(isTestFile("frontend/tests/cart_labels_baseline.spec.ts")).toBe(true);
|
|
76
|
+
expect(isTestFile("src/components/Button.test.tsx")).toBe(true);
|
|
77
|
+
expect(isTestFile("e2e/login.spec.js")).toBe(true);
|
|
78
|
+
expect(isTestFile("packages/foo/Bar.test.jsx")).toBe(true);
|
|
79
|
+
});
|
|
80
|
+
it("matches files under __tests__/ directories", () => {
|
|
81
|
+
expect(isTestFile("src/__tests__/utils.ts")).toBe(true);
|
|
82
|
+
expect(isTestFile("packages/foo/__tests__/Bar.tsx")).toBe(true);
|
|
83
|
+
});
|
|
84
|
+
it("returns false for ordinary source files", () => {
|
|
85
|
+
expect(isTestFile("frontend/src/pages/Cart.tsx")).toBe(false);
|
|
86
|
+
expect(isTestFile("frontend/src/components/Button.tsx")).toBe(false);
|
|
87
|
+
expect(isTestFile("backend/src/handlers/orders.py")).toBe(false);
|
|
88
|
+
});
|
|
89
|
+
it("returns false for non-spec .ts files in tests/ directories that aren't Skyramp-generated", () => {
|
|
90
|
+
// Tests/ directory alone doesn't trigger isTestFile — only the suffix matters.
|
|
91
|
+
expect(isTestFile("frontend/tests/helpers/setup.ts")).toBe(false);
|
|
92
|
+
});
|
|
93
|
+
});
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
61
95
|
// buildScopeAssessmentSection
|
|
62
96
|
// ---------------------------------------------------------------------------
|
|
63
97
|
describe("buildScopeAssessmentSection", () => {
|
|
@@ -80,9 +114,12 @@ describe("buildScopeAssessmentSection", () => {
|
|
|
80
114
|
it("clamps minTotal to maxTotal when maxTotal < maxGenerate", () => {
|
|
81
115
|
// Defensive: maxGenerate clamped to topN upstream, but guard applies here too
|
|
82
116
|
const section = buildScopeAssessmentSection(3, 5);
|
|
83
|
-
// minTotal = min(
|
|
117
|
+
// effectiveGenerate = min(5, 3) = 3; minTotal = min(3+1, 3) = 3; range "3–3"
|
|
84
118
|
expect(section).toContain("3–3");
|
|
85
119
|
expect(section).not.toMatch(/\b[6-9]–3\b/);
|
|
120
|
+
// Must not show original maxGenerate (5) in the formula
|
|
121
|
+
expect(section).toContain("3 (generate)");
|
|
122
|
+
expect(section).not.toContain("5 (generate)");
|
|
86
123
|
});
|
|
87
124
|
it("embeds UI/E2E confirmation step when isUIOnly=true", () => {
|
|
88
125
|
const section = buildScopeAssessmentSection(10, 3, true);
|
|
@@ -101,4 +138,58 @@ describe("buildScopeAssessmentSection", () => {
|
|
|
101
138
|
expect(section.length).toBeGreaterThan(0);
|
|
102
139
|
expect(section).toContain("Budget Plan");
|
|
103
140
|
});
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
// New branches added in PR 453 (4a) — precomputedUIPct / hasFrontendChanges
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
it("emits a single Budget Plan line for backend-only PR (precomputedUIPct=0)", () => {
|
|
145
|
+
// isUIOnlyPR=false, hasFrontendChanges=false → precomputedUIPct=0
|
|
146
|
+
const section = buildScopeAssessmentSection(10, 3, false, 0, false);
|
|
147
|
+
expect(section).toContain("Budget Plan: 10 total (3 generate + 7 additional), 0% UI/E2E");
|
|
148
|
+
expect(section).toContain("Use these exact numbers throughout the rest of the prompt.");
|
|
149
|
+
// Must NOT include the step-based assessment
|
|
150
|
+
expect(section).not.toContain("Step A");
|
|
151
|
+
expect(section).not.toContain("Step B");
|
|
152
|
+
expect(section).not.toContain("Step C");
|
|
153
|
+
expect(section).not.toContain("Step D");
|
|
154
|
+
});
|
|
155
|
+
it("emits a single Budget Plan line for UI-only PR (precomputedUIPct=100)", () => {
|
|
156
|
+
const section = buildScopeAssessmentSection(10, 3, true, 100, false);
|
|
157
|
+
expect(section).toContain("Budget Plan: 10 total (3 generate + 7 additional), 100% UI/E2E");
|
|
158
|
+
expect(section).not.toContain("Step A");
|
|
159
|
+
expect(section).not.toContain("Step D");
|
|
160
|
+
});
|
|
161
|
+
it("emits Step D only for mixed PR (hasFrontendChanges=true, precomputedUIPct=undefined)", () => {
|
|
162
|
+
const section = buildScopeAssessmentSection(10, 3, false, undefined, true);
|
|
163
|
+
// Total is pre-determined — shows in the Budget Plan header
|
|
164
|
+
expect(section).toContain("Budget Plan (total already determined)");
|
|
165
|
+
expect(section).toContain("10 total (3 generate + 7 additional)");
|
|
166
|
+
// Step D is kept for UI/E2E split judgment
|
|
167
|
+
expect(section).toContain("Step D");
|
|
168
|
+
// The "Append your UI%" instruction must appear (comment #3222061560)
|
|
169
|
+
expect(section).toContain("Append your UI%");
|
|
170
|
+
// Steps A–C are skipped
|
|
171
|
+
expect(section).not.toContain("Step A");
|
|
172
|
+
expect(section).not.toContain("Step B");
|
|
173
|
+
expect(section).not.toContain("Step C");
|
|
174
|
+
});
|
|
175
|
+
it("clamps additional to 0 when maxGenerate equals maxTotal (precomputed path)", () => {
|
|
176
|
+
// maxGenerate=5, maxTotal=5 → additional must be 0, not negative
|
|
177
|
+
const section = buildScopeAssessmentSection(5, 5, false, 0, false);
|
|
178
|
+
expect(section).toContain("5 generate + 0 additional");
|
|
179
|
+
expect(section).not.toMatch(/\d+ generate \+ -\d+ additional/);
|
|
180
|
+
});
|
|
181
|
+
it("clamps additional to 0 when maxGenerate exceeds maxTotal (defensive, precomputed path)", () => {
|
|
182
|
+
// Should never happen in normal usage but guard must hold
|
|
183
|
+
const section = buildScopeAssessmentSection(3, 5, false, 0, false);
|
|
184
|
+
expect(section).toContain("3 generate + 0 additional");
|
|
185
|
+
expect(section).not.toMatch(/\+ -\d+ additional/);
|
|
186
|
+
// Must not show unclamped maxGenerate
|
|
187
|
+
expect(section).not.toContain("5 generate");
|
|
188
|
+
});
|
|
189
|
+
it("clamps generate in mixed PR path when maxGenerate exceeds maxTotal", () => {
|
|
190
|
+
const section = buildScopeAssessmentSection(3, 5, false, undefined, true);
|
|
191
|
+
expect(section).toContain("3 generate + 0 additional");
|
|
192
|
+
expect(section).not.toContain("5 generate");
|
|
193
|
+
expect(section).not.toMatch(/\+ -\d+ additional/);
|
|
194
|
+
});
|
|
104
195
|
});
|
|
@@ -5,9 +5,10 @@ import { logger } from "../../utils/logger.js";
|
|
|
5
5
|
import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildFewShotExamples, buildVerificationChecklist, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, } from "./recommendationSections.js";
|
|
6
6
|
import { CATEGORY_PRIORITY } from "../../types/TestRecommendation.js";
|
|
7
7
|
import { buildScopeAssessmentSection, isFrontendFile } from "./scopeAssessment.js";
|
|
8
|
-
import { buildExecutionPlan } from "./diffExecutionPlan.js";
|
|
8
|
+
import { buildExecutionPlan, EXEC_STEP_CODE_REVIEW } from "./diffExecutionPlan.js";
|
|
9
9
|
import { buildFullRepoRecommendations } from "./fullRepoCatalog.js";
|
|
10
|
-
import {
|
|
10
|
+
import { ANALYSIS_STEP_EXTRACT } from "./analysisOutputPrompt.js";
|
|
11
|
+
import { TASK_GENERATE, buildExternalCoverageSet, externalDedupKey, isAttackSurfaceSecurityBoundary, taskRef, } from "./recommendationShared.js";
|
|
11
12
|
// Re-export for backward compatibility (tests and external callers import these from this module)
|
|
12
13
|
export { buildExternalCoverageSet, externalDedupKey };
|
|
13
14
|
function formatTestLocations(locs) {
|
|
@@ -47,7 +48,9 @@ function classifyNovelty(scenario, diffContext) {
|
|
|
47
48
|
return "existing";
|
|
48
49
|
}
|
|
49
50
|
function prioritiseCandidate(scenario, diffContext) {
|
|
50
|
-
const priority =
|
|
51
|
+
const priority = isAttackSurfaceSecurityBoundary(scenario)
|
|
52
|
+
? "CRITICAL"
|
|
53
|
+
: CATEGORY_PRIORITY[scenario.category] ?? "LOW";
|
|
51
54
|
const novelty = classifyNovelty(scenario, diffContext);
|
|
52
55
|
return { priority, novelty };
|
|
53
56
|
}
|
|
@@ -89,6 +92,25 @@ ${isUIOnlyPR ? `\n**UI-only PR** — no backend changes. UI and E2E tests are mo
|
|
|
89
92
|
: ``}
|
|
90
93
|
Output should be concise and immediately actionable.`
|
|
91
94
|
: `You are in **Repo mode**. Comprehensive test strategy across all endpoints.`;
|
|
95
|
+
// ── UI rec authoring rules ──
|
|
96
|
+
// Anchors every UI recommendation regardless of whether the agent ran
|
|
97
|
+
// skyramp_ui_analyze_changes / browser_blueprint before this call. When the
|
|
98
|
+
// agent has prior captures in its own tool-result history, those serve as
|
|
99
|
+
// grounding; when it doesn't, recommendations fall back to source-grounded
|
|
100
|
+
// prose. Either way, inventing elements / leaking jargon / duplicating UI
|
|
101
|
+
// tests are wrong, so the rules fire unconditionally.
|
|
102
|
+
const uiRecRulesSection = `
|
|
103
|
+
|
|
104
|
+
<ui_recommendation_authoring_rules>
|
|
105
|
+
**Blueprints inform *how* you describe UI tests, not *which* tests to recommend.** The recommendation catalog derives from the same change-signals listed elsewhere in this prompt (new endpoints, schema/field changes, security boundaries, business-logic modifications, frontend route or component additions, layout additions, etc.) — refer to those signals for the source-of-truth list, not this section.
|
|
106
|
+
|
|
107
|
+
**Do not generate near-duplicate UI tests of the same surface;** one well-targeted UI test per surface is enough.
|
|
108
|
+
|
|
109
|
+
For UI recommendations you *do* emit, ground the \`reasoning\` field in elements you have actually observed via \`browser_blueprint\` calls earlier in this session. If a recommendation's target element is not in any blueprint you have observed, either rephrase the recommendation around an element that IS observed, or describe the test target in higher-level terms. Do not invent element names from the PR description, source diff, or component name. If you have not captured any blueprints yet (e.g. backend-only PR, or pre-flight skipped), UI recommendations fall back to source-grounded prose drawn from the diff alone — that is a legitimate outcome, not a reason to invent.
|
|
110
|
+
|
|
111
|
+
Write UI recommendation \`reasoning\` fields in **natural prose** that names elements as a human would describe them (e.g. "the Notifications heading", "the disabled Mark all as read button"). **Do NOT mention "blueprint", "captured blueprint", "DOM analyzer", or any other internal MCP terminology in the reasoning text.** The reader of the report is a developer reviewing test recommendations on a PR; they don't know what a blueprint is and shouldn't have to. Phrases like "the captured blueprint shows X" or "visible from the captured blueprint" leak builder internals — instead just describe what the test verifies in plain product terms ("the disabled mark-all-read button in the empty state"). Likewise do NOT use internal-identifier syntax like \`role=button, accessibleName='X', logicalName=...\`.
|
|
112
|
+
</ui_recommendation_authoring_rules>
|
|
113
|
+
`;
|
|
92
114
|
// ── Endpoint listing ──
|
|
93
115
|
const allEndpoints = analysis.apiEndpoints.endpoints;
|
|
94
116
|
// In PR mode, identify which endpoints were changed so we can partition the listing.
|
|
@@ -124,7 +146,7 @@ Output should be concise and immediately actionable.`
|
|
|
124
146
|
changedLines.push(` ${m.method} ${ep.path} [removed]`);
|
|
125
147
|
}
|
|
126
148
|
}
|
|
127
|
-
endpointLines = `**Likely changed in this PR (from static file→endpoint mapping — verify against diff in Step
|
|
149
|
+
endpointLines = `**Likely changed in this PR (from static file→endpoint mapping — verify against diff in Step ${ANALYSIS_STEP_EXTRACT}):**\n${changedLines.join("\n") || " none"}\n\n**Other endpoints (reference only):**\n${otherLines.join("\n") || " none"}`;
|
|
128
150
|
}
|
|
129
151
|
else {
|
|
130
152
|
endpointLines = allEndpoints
|
|
@@ -311,10 +333,11 @@ ${detailBlocks}
|
|
|
311
333
|
}
|
|
312
334
|
mainSection = buildFullRepoRecommendations(scored, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject, isFrontendOnlyProject, externalCoverageFullRepo);
|
|
313
335
|
}
|
|
314
|
-
else if (isDiffScope
|
|
315
|
-
//
|
|
316
|
-
//
|
|
317
|
-
//
|
|
336
|
+
else if (isDiffScope) {
|
|
337
|
+
// Always use the full execution plan in diff scope — even when scored.length === 0
|
|
338
|
+
// (no pre-ranked scenarios). The execution plan includes the dynamic Code Review step,
|
|
339
|
+
// bug-catching insertion, and the dynamic bug-coverage gate
|
|
340
|
+
// that are critical for catching seeded bugs in new endpoints.
|
|
318
341
|
const externalCoverage = buildExternalCoverageSet(testLocations);
|
|
319
342
|
if (externalCoverage.size > 0) {
|
|
320
343
|
logger.info(`External test coverage keys: ${[...externalCoverage].join(", ")}`);
|
|
@@ -322,6 +345,7 @@ ${detailBlocks}
|
|
|
322
345
|
mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, externalCoverage, analysis.existingTests.relevantExternalTestPaths ?? []);
|
|
323
346
|
}
|
|
324
347
|
else {
|
|
348
|
+
// Full-repo scope with no scored items — rare fallback
|
|
325
349
|
mainSection = `
|
|
326
350
|
## Draft Your Execution Plan
|
|
327
351
|
|
|
@@ -407,7 +431,7 @@ Only add NEW recommendations for code paths introduced in the latest commit.
|
|
|
407
431
|
prHistorySection = `
|
|
408
432
|
## PR History (PR #${prContext.prNumber})
|
|
409
433
|
Tests from prior bot runs are still in the working tree — the maintenance pipeline
|
|
410
|
-
(
|
|
434
|
+
(${taskRef(TASK_GENERATE)}) keeps them up to date. Use the history below to **avoid duplicating** existing
|
|
411
435
|
coverage and to fill gaps:
|
|
412
436
|
- **Do NOT re-recommend** tests listed under "Previously Generated Tests" — they already
|
|
413
437
|
exist and are maintained automatically.
|
|
@@ -419,7 +443,7 @@ coverage and to fill gaps:
|
|
|
419
443
|
- **Stability**: When the code diff between commits is small, the recommendation set
|
|
420
444
|
should be mostly stable. Do not churn recommendations without cause.
|
|
421
445
|
- If prior execution results show failures, note the issue but do not re-recommend
|
|
422
|
-
the test —
|
|
446
|
+
the test — ${taskRef(TASK_GENERATE)} handles fixes for existing tests.
|
|
423
447
|
${historyBody}`;
|
|
424
448
|
}
|
|
425
449
|
// ── Compose all sections ──
|
|
@@ -436,7 +460,7 @@ ${modePreamble}
|
|
|
436
460
|
Scope: ${scopeNote}
|
|
437
461
|
|
|
438
462
|
${sourcePriority}
|
|
439
|
-
|
|
463
|
+
${uiRecRulesSection}
|
|
440
464
|
<repository_context>
|
|
441
465
|
## Repository Context
|
|
442
466
|
|
|
@@ -485,7 +509,7 @@ ${isDiffScope
|
|
|
485
509
|
|
|
486
510
|
${mainSection}
|
|
487
511
|
|
|
488
|
-
${isDiffScope ? buildVerificationChecklist(topN, maxGen) : ""}
|
|
512
|
+
${isDiffScope ? buildVerificationChecklist(topN, maxGen, EXEC_STEP_CODE_REVIEW) : ""}
|
|
489
513
|
</instructions>
|
|
490
514
|
`;
|
|
491
515
|
}
|