@skyramp/mcp 0.1.8 → 0.2.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/build/index.js +4 -2
  2. package/build/playwright/registerPlaywrightTools.js +12 -0
  3. package/build/playwright/traceRecordingPrompt.js +15 -0
  4. package/build/prompts/code-reuse.js +106 -7
  5. package/build/prompts/pom-aware-code-reuse.js +106 -7
  6. package/build/prompts/startTraceCollectionPrompts.js +37 -15
  7. package/build/prompts/test-maintenance/drift-analysis-prompt.js +26 -31
  8. package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +40 -1
  9. package/build/prompts/test-maintenance/driftAnalysisSections.js +90 -86
  10. package/build/prompts/test-recommendation/analysisOutputPrompt.js +286 -163
  11. package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -45
  12. package/build/prompts/test-recommendation/diffExecutionPlan.js +246 -117
  13. package/build/prompts/test-recommendation/promptPlan.js +290 -0
  14. package/build/prompts/test-recommendation/promptPlan.test.js +336 -0
  15. package/build/prompts/test-recommendation/recommendationSections.js +4 -3
  16. package/build/prompts/test-recommendation/recommendationShared.js +23 -1
  17. package/build/prompts/test-recommendation/scopeAssessment.js +65 -14
  18. package/build/prompts/test-recommendation/scopeAssessment.test.js +93 -2
  19. package/build/prompts/test-recommendation/test-recommendation-prompt.js +36 -12
  20. package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +316 -1
  21. package/build/prompts/testbot/testbot-prompts.js +73 -13
  22. package/build/prompts/testbot/testbot-prompts.test.js +114 -1
  23. package/build/resources/testbotResource.js +1 -1
  24. package/build/services/ScenarioGenerationService.integration.test.js +158 -0
  25. package/build/services/ScenarioGenerationService.js +47 -4
  26. package/build/services/ScenarioGenerationService.test.js +158 -22
  27. package/build/services/TestExecutionService.js +73 -15
  28. package/build/services/TestExecutionService.test.js +105 -0
  29. package/build/services/TestGenerationService.js +11 -1
  30. package/build/tools/executeSkyrampTestTool.js +1 -10
  31. package/build/tools/generate-tests/generateBatchScenarioRestTool.js +16 -4
  32. package/build/tools/generate-tests/generateIntegrationRestTool.js +2 -0
  33. package/build/tools/generate-tests/generateUIRestTool.js +2 -0
  34. package/build/tools/test-management/actionsTool.js +152 -63
  35. package/build/tools/test-management/analyzeChangesTool.js +178 -64
  36. package/build/tools/test-management/analyzeChangesTool.test.js +103 -16
  37. package/build/tools/test-management/analyzeTestHealthTool.js +30 -81
  38. package/build/tools/test-management/index.js +1 -0
  39. package/build/tools/test-management/uiAnalyzeChangesTool.js +149 -0
  40. package/build/tools/test-management/uiAnalyzeChangesTool.test.js +100 -0
  41. package/build/tools/trace/resolveSaveStoragePath.js +16 -0
  42. package/build/tools/trace/resolveSaveStoragePath.test.js +17 -0
  43. package/build/tools/trace/resolveSessionPaths.js +39 -0
  44. package/build/tools/trace/resolveSessionPaths.test.js +103 -0
  45. package/build/tools/trace/sessionState.js +14 -0
  46. package/build/tools/trace/sessionState.test.js +17 -0
  47. package/build/tools/trace/startTraceCollectionTool.js +84 -14
  48. package/build/tools/trace/stopTraceCollectionTool.js +9 -2
  49. package/build/types/TestAnalysis.js +50 -0
  50. package/build/types/TestRecommendation.js +6 -58
  51. package/build/types/TestTypes.js +1 -1
  52. package/build/utils/AnalysisStateManager.js +22 -11
  53. package/build/utils/branchDiff.js +11 -2
  54. package/build/utils/docker.test.js +1 -1
  55. package/build/utils/gitStaging.js +52 -3
  56. package/build/utils/gitStaging.test.js +19 -1
  57. package/build/utils/repoScanner.js +18 -10
  58. package/build/utils/repoScanner.test.js +92 -0
  59. package/build/utils/routeParsers.js +180 -25
  60. package/build/utils/routeParsers.test.js +180 -1
  61. package/build/utils/scenarioDrafting.js +220 -17
  62. package/build/utils/scenarioDrafting.test.js +182 -9
  63. package/build/utils/sourceRouteExtractor.js +806 -0
  64. package/build/utils/sourceRouteExtractor.test.js +565 -0
  65. package/build/utils/uiPageEnumerator.js +319 -0
  66. package/build/utils/uiPageEnumerator.test.js +422 -0
  67. package/build/utils/utils.js +27 -0
  68. package/build/utils/versions.js +1 -1
  69. package/build/utils/workspaceAuth.js +33 -4
  70. package/node_modules/playwright/ThirdPartyNotices.txt +6 -6
  71. package/node_modules/playwright/lib/dom-analyzer/analyze.js +111 -0
  72. package/node_modules/playwright/lib/dom-analyzer/blueprint.js +1210 -0
  73. package/node_modules/playwright/lib/dom-analyzer/blueprint.test.js +396 -0
  74. package/node_modules/playwright/lib/dom-analyzer/blueprintCache.js +57 -0
  75. package/node_modules/playwright/lib/dom-analyzer/blueprintCache.test.js +57 -0
  76. package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.js +254 -0
  77. package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.test.js +304 -0
  78. package/node_modules/playwright/lib/dom-analyzer/crawler.js +384 -0
  79. package/node_modules/playwright/lib/dom-analyzer/curatedWidgets.js +73 -0
  80. package/node_modules/playwright/lib/dom-analyzer/dynamicId.js +43 -0
  81. package/node_modules/playwright/lib/dom-analyzer/dynamicId.test.js +85 -0
  82. package/node_modules/playwright/lib/dom-analyzer/fingerprint.js +90 -0
  83. package/node_modules/playwright/lib/dom-analyzer/fingerprint.test.js +231 -0
  84. package/node_modules/playwright/lib/dom-analyzer/fingerprintAblation.fixtures.js +145 -0
  85. package/node_modules/playwright/lib/dom-analyzer/fingerprintAblation.test.js +41 -0
  86. package/node_modules/playwright/lib/dom-analyzer/graph.js +36 -0
  87. package/node_modules/playwright/lib/dom-analyzer/liveFingerprints.js +43 -0
  88. package/node_modules/playwright/lib/dom-analyzer/logicalNameResolver.js +72 -0
  89. package/node_modules/playwright/lib/dom-analyzer/logicalNameResolver.test.js +182 -0
  90. package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.js +150 -0
  91. package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.test.js +470 -0
  92. package/node_modules/playwright/lib/dom-analyzer/sectionGrouper.js +169 -0
  93. package/node_modules/playwright/lib/dom-analyzer/sectionGrouper.test.js +269 -0
  94. package/node_modules/playwright/lib/dom-analyzer/serialization.js +75 -0
  95. package/node_modules/playwright/lib/dom-analyzer/slug.js +30 -0
  96. package/node_modules/playwright/lib/dom-analyzer/slug.test.js +84 -0
  97. package/node_modules/playwright/lib/dom-analyzer/widgetContract.js +127 -0
  98. package/node_modules/playwright/lib/dom-analyzer/widgetContract.test.js +212 -0
  99. package/node_modules/playwright/lib/mcp/browser/browserContextFactory.js +3 -1
  100. package/node_modules/playwright/lib/mcp/browser/config.js +1 -1
  101. package/node_modules/playwright/lib/mcp/browser/context.js +17 -1
  102. package/node_modules/playwright/lib/mcp/browser/tab.js +38 -0
  103. package/node_modules/playwright/lib/mcp/browser/tools/domAnalyzer.js +261 -0
  104. package/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +3 -3
  105. package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.js +146 -0
  106. package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.test.js +140 -0
  107. package/node_modules/playwright/lib/mcp/browser/tools/sitemap.js +226 -0
  108. package/node_modules/playwright/lib/mcp/browser/tools/snapshot.js +2 -2
  109. package/node_modules/playwright/lib/mcp/browser/tools/widgetContract.js +168 -0
  110. package/node_modules/playwright/lib/mcp/browser/tools.js +6 -0
  111. package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +52 -12
  112. package/node_modules/playwright/lib/mcp/test/skyRampExport.js +64 -13
  113. package/node_modules/playwright/package.json +1 -1
  114. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.3.tgz +0 -0
  115. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.4.tgz +0 -0
  116. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.5.tgz +0 -0
  117. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.6.tgz +0 -0
  118. package/package.json +3 -3
  119. package/build/services/TestHealthService.js +0 -694
  120. package/build/services/TestHealthService.test.js +0 -241
  121. package/build/types/TestDriftAnalysis.js +0 -1
  122. package/build/types/TestHealth.js +0 -4
@@ -27,6 +27,16 @@ export function externalDedupKey(scenario) {
27
27
  const resource = extractResourceFromPath(primaryStep?.path ?? "");
28
28
  return `${method}::${resource}::${testType}`;
29
29
  }
30
+ export function isAttackSurfaceSecurityBoundary(scenario) {
31
+ return scenario.category === "security_boundary" &&
32
+ (scenario.isAttackSurfaceSecurityBoundary === true ||
33
+ scenario.description.startsWith("Attack-surface auth boundary:"));
34
+ }
35
+ export function isOrdinaryDirectAuthBoundary(scenario) {
36
+ return scenario.category === "security_boundary" &&
37
+ !isAttackSurfaceSecurityBoundary(scenario) &&
38
+ scenario.description.startsWith("Auth boundary:");
39
+ }
30
40
  /**
31
41
  * Build a set of coverage keys from external (non-Skyramp) tests.
32
42
  * Parses `testLocations` entries tagged with `[external]` to extract the
@@ -62,7 +72,19 @@ export function buildExternalCoverageSet(testLocations) {
62
72
  }
63
73
  if (externalWithoutCoverage > 0) {
64
74
  logger.info(`${externalWithoutCoverage} external test file(s) have no extractable endpoint coverage — ` +
65
- `programmatic dedup skipped for these; Step 0 semantic check is the fallback.`);
75
+ `programmatic dedup skipped for these; prompt-level semantic coverage check is the fallback.`);
66
76
  }
67
77
  return coverage;
68
78
  }
79
+ // Shared TestBot task and step labels used by prompt modules that cannot import
80
+ // testbot-prompts.ts directly without creating a circular dependency.
81
+ export const TASK_UI_PRESCAN = "0";
82
+ export const TASK_ANALYZE_MAINTAIN = "1";
83
+ export const TASK_GENERATE = "2";
84
+ export const TASK_SUBMIT = "3";
85
+ export const TESTBOT_TASK1_STEP_ANALYZE = "1";
86
+ export const TESTBOT_TASK1_STEP_MAINTAIN = "2";
87
+ export const TESTBOT_TASK1_STEP_CODE_REVIEW = "3";
88
+ export const TESTBOT_TASK1_STEP_UI_GROUNDING = "4";
89
+ export const taskRef = (taskId) => `Task ${taskId}`;
90
+ export const taskStepRef = (taskId, stepId) => `Task ${taskId} Step ${stepId}`;
@@ -25,25 +25,76 @@ export function isFrontendFile(filePath) {
25
25
  return true;
26
26
  return AMBIGUOUS_FRONTEND_PATTERN.test(filePath) && FRONTEND_DIR_PATTERN.test(filePath);
27
27
  }
28
+ /**
29
+ * Returns true if the file path looks like a test file rather than UI source.
30
+ *
31
+ * Catches:
32
+ * - Skyramp-generated tests (`*_test.ts`, `*_smoke.ts`, `*_contract.ts`,
33
+ * `*_fuzz.ts`, `*_integration.ts`, `*_load.ts`, `*_e2e.ts`, `*_ui.ts`)
34
+ * - Skyramp scenario files (`scenario_*.json`)
35
+ * - Conventional Vitest/Jest/Playwright spec naming
36
+ * (`*.spec.{ts,tsx,js,jsx}`, `*.test.{ts,tsx,js,jsx}`)
37
+ * - Files inside `__tests__/` directories (Jest convention)
38
+ *
39
+ * Used by callers that want to filter test files OUT of frontend-source
40
+ * processing — `isFrontendFile` returns true for `.spec.ts` under a
41
+ * frontend directory because the rule is "tier-3 ambiguous + frontend dir,"
42
+ * but those tests aren't UI source we'd want to ground recommendations in.
43
+ */
44
+ export function isTestFile(filePath) {
45
+ return (/(?:_test|_smoke|_contract|_fuzz|_integration|_load|_e2e|_ui)\.[^/]+$/.test(filePath) ||
46
+ /scenario_[^/]+\.json$/.test(filePath) ||
47
+ /\.(spec|test)\.(tsx?|jsx?)$/.test(filePath) ||
48
+ /(?:^|\/)__tests__\//.test(filePath));
49
+ }
28
50
  // ── LLM scope assessment ──────────────────────────────────────────────────────
29
51
  /**
30
- * Builds the PR scope assessment section embedded as the first step in the
31
- * execution plan prompt.
52
+ * Builds the PR scope assessment section.
53
+ *
54
+ * When `precomputedUIPct` is provided (0 = backend-only, 100 = UI-only) the server
55
+ * has already determined the split unambiguously — skip Steps A–C and emit one line.
32
56
  *
33
- * This replaces fixed formula-computed topN and uiFraction values. The LLM has
34
- * richer context than a file-count formula: it understands semantic complexity
35
- * (one auth change > ten CSS tweaks), can identify UI tests that are warranted
36
- * even on mostly-backend PRs (frontend logic bugs, form validation errors), and
37
- * can down-scale when the diff is trivial regardless of file count.
57
+ * For mixed PRs (`precomputedUIPct` is undefined, `hasFrontendChanges` is true) skip
58
+ * Steps A–C but keep Step D so the LLM can apply judgment to determine the UI%.
38
59
  *
39
- * The LLM is asked to state a concrete Budget Plan before proceeding, which the
40
- * rest of the prompt references to enforce count discipline.
60
+ * Falls back to the full four-step assessment when no precomputed data is available.
41
61
  */
42
- export function buildScopeAssessmentSection(maxTotal = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, isUIOnly = false) {
43
- // Clamp minTotal to maxTotal so the range is never inverted (e.g. when maxGenerateOverride === topN).
44
- const minTotal = Math.min(maxGenerate + 1, maxTotal);
45
- const minAdditional = minTotal - maxGenerate; // 1 normally; 0 when maxTotal === maxGenerate
46
- const baselineFormula = `${maxGenerate} (generate) + ${minAdditional} (min additional) = ${minTotal}`;
62
+ export function buildScopeAssessmentSection(maxTotal = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, isUIOnly = false,
63
+ /** Server-determined UI/E2E percentage. `undefined` = mixed PR or unknown. */
64
+ precomputedUIPct,
65
+ /** Whether the diff contains frontend files (true mixed PR when precomputedUIPct is undefined). */
66
+ hasFrontendChanges) {
67
+ const effectiveGenerate = Math.min(maxGenerate, maxTotal);
68
+ const additional = Math.max(0, maxTotal - effectiveGenerate);
69
+ // Unambiguous backend-only or UI-only: emit a single Budget Plan line — no LLM counting needed.
70
+ if (precomputedUIPct !== undefined) {
71
+ return `### PR Scope Assessment
72
+ Budget Plan: ${maxTotal} total (${effectiveGenerate} generate + ${additional} additional), ${precomputedUIPct}% UI/E2E
73
+
74
+ Use these exact numbers throughout the rest of the prompt.`;
75
+ }
76
+ // Mixed PR: server can pre-compute the total but not the UI/E2E split — keep Step D.
77
+ if (hasFrontendChanges) {
78
+ return `### PR Scope Assessment — determine UI% before planning recommendations
79
+
80
+ Budget Plan (total already determined): **${maxTotal} total (${effectiveGenerate} generate + ${additional} additional)**
81
+
82
+ **Step D — Determine UI vs backend split for the budget above:**
83
+ - Non-UI slots are backend tests; start from file-count ratio for UI%, then apply judgment:
84
+ - Pure CSS/style changes inflate the frontend file count without adding test value → reduce UI%
85
+ - Frontend logic bugs (state management, calculation errors, form validation) in the diff → increase UI% even if few frontend files
86
+ - Frontend component calls a changed backend API → an E2E test covers both sides → count toward UI%
87
+ - Frontend files only in \`__tests__/\` or \`.stories.\` → exclude from the ratio
88
+
89
+ **Append your UI% now** — update the Budget Plan to:
90
+ \`Budget Plan: ${maxTotal} total (${effectiveGenerate} generate + ${additional} additional), <ui_pct>% UI/E2E\`
91
+
92
+ Use these exact numbers throughout the rest of the prompt.`;
93
+ }
94
+ // Fallback (no diff context — full_repo else-branch or test): full four-step assessment.
95
+ const minTotal = Math.min(effectiveGenerate + 1, maxTotal);
96
+ const minAdditional = Math.max(0, minTotal - effectiveGenerate);
97
+ const baselineFormula = `${effectiveGenerate} (generate) + ${minAdditional} (min additional) = ${minTotal}`;
47
98
  const stepD = isUIOnly
48
99
  ? `**Step D — UI/E2E confirmation (frontend-only PR):**
49
100
  This is a frontend-only PR — set **100% UI/E2E** in your Budget Plan.
@@ -1,7 +1,7 @@
1
1
  jest.mock("@skyramp/skyramp", () => ({
2
2
  WorkspaceConfigManager: { create: jest.fn() },
3
3
  }));
4
- import { isFrontendFile, buildScopeAssessmentSection } from "./scopeAssessment.js";
4
+ import { isFrontendFile, isTestFile, buildScopeAssessmentSection } from "./scopeAssessment.js";
5
5
  // ---------------------------------------------------------------------------
6
6
  // isFrontendFile
7
7
  // ---------------------------------------------------------------------------
@@ -58,6 +58,40 @@ describe("isFrontendFile", () => {
58
58
  });
59
59
  });
60
60
  // ---------------------------------------------------------------------------
61
+ // isTestFile
62
+ // ---------------------------------------------------------------------------
63
+ describe("isTestFile", () => {
64
+ it("matches Skyramp-generated test suffix patterns", () => {
65
+ expect(isTestFile("backend/tests/orders_integration_test.py")).toBe(true);
66
+ expect(isTestFile("frontend/tests/cart_ui_test.ts")).toBe(true);
67
+ expect(isTestFile("backend/tests/users_contract_test.py")).toBe(true);
68
+ expect(isTestFile("backend/tests/load_test.py")).toBe(true);
69
+ });
70
+ it("matches Skyramp scenario JSON files", () => {
71
+ expect(isTestFile("tests/scenario_orders.json")).toBe(true);
72
+ expect(isTestFile("scenario_checkout.json")).toBe(true);
73
+ });
74
+ it("matches conventional Vitest/Jest/Playwright spec naming", () => {
75
+ expect(isTestFile("frontend/tests/cart_labels_baseline.spec.ts")).toBe(true);
76
+ expect(isTestFile("src/components/Button.test.tsx")).toBe(true);
77
+ expect(isTestFile("e2e/login.spec.js")).toBe(true);
78
+ expect(isTestFile("packages/foo/Bar.test.jsx")).toBe(true);
79
+ });
80
+ it("matches files under __tests__/ directories", () => {
81
+ expect(isTestFile("src/__tests__/utils.ts")).toBe(true);
82
+ expect(isTestFile("packages/foo/__tests__/Bar.tsx")).toBe(true);
83
+ });
84
+ it("returns false for ordinary source files", () => {
85
+ expect(isTestFile("frontend/src/pages/Cart.tsx")).toBe(false);
86
+ expect(isTestFile("frontend/src/components/Button.tsx")).toBe(false);
87
+ expect(isTestFile("backend/src/handlers/orders.py")).toBe(false);
88
+ });
89
+ it("returns false for non-spec .ts files in tests/ directories that aren't Skyramp-generated", () => {
90
+ // Tests/ directory alone doesn't trigger isTestFile — only the suffix matters.
91
+ expect(isTestFile("frontend/tests/helpers/setup.ts")).toBe(false);
92
+ });
93
+ });
94
+ // ---------------------------------------------------------------------------
61
95
  // buildScopeAssessmentSection
62
96
  // ---------------------------------------------------------------------------
63
97
  describe("buildScopeAssessmentSection", () => {
@@ -80,9 +114,12 @@ describe("buildScopeAssessmentSection", () => {
80
114
  it("clamps minTotal to maxTotal when maxTotal < maxGenerate", () => {
81
115
  // Defensive: maxGenerate clamped to topN upstream, but guard applies here too
82
116
  const section = buildScopeAssessmentSection(3, 5);
83
- // minTotal = min(5+1, 3) = 3; range "3–3"
117
+ // effectiveGenerate = min(5, 3) = 3; minTotal = min(3+1, 3) = 3; range "3–3"
84
118
  expect(section).toContain("3–3");
85
119
  expect(section).not.toMatch(/\b[6-9]–3\b/);
120
+ // Must not show original maxGenerate (5) in the formula
121
+ expect(section).toContain("3 (generate)");
122
+ expect(section).not.toContain("5 (generate)");
86
123
  });
87
124
  it("embeds UI/E2E confirmation step when isUIOnly=true", () => {
88
125
  const section = buildScopeAssessmentSection(10, 3, true);
@@ -101,4 +138,58 @@ describe("buildScopeAssessmentSection", () => {
101
138
  expect(section.length).toBeGreaterThan(0);
102
139
  expect(section).toContain("Budget Plan");
103
140
  });
141
+ // ---------------------------------------------------------------------------
142
+ // New branches added in PR 453 (4a) — precomputedUIPct / hasFrontendChanges
143
+ // ---------------------------------------------------------------------------
144
+ it("emits a single Budget Plan line for backend-only PR (precomputedUIPct=0)", () => {
145
+ // isUIOnlyPR=false, hasFrontendChanges=false → precomputedUIPct=0
146
+ const section = buildScopeAssessmentSection(10, 3, false, 0, false);
147
+ expect(section).toContain("Budget Plan: 10 total (3 generate + 7 additional), 0% UI/E2E");
148
+ expect(section).toContain("Use these exact numbers throughout the rest of the prompt.");
149
+ // Must NOT include the step-based assessment
150
+ expect(section).not.toContain("Step A");
151
+ expect(section).not.toContain("Step B");
152
+ expect(section).not.toContain("Step C");
153
+ expect(section).not.toContain("Step D");
154
+ });
155
+ it("emits a single Budget Plan line for UI-only PR (precomputedUIPct=100)", () => {
156
+ const section = buildScopeAssessmentSection(10, 3, true, 100, false);
157
+ expect(section).toContain("Budget Plan: 10 total (3 generate + 7 additional), 100% UI/E2E");
158
+ expect(section).not.toContain("Step A");
159
+ expect(section).not.toContain("Step D");
160
+ });
161
+ it("emits Step D only for mixed PR (hasFrontendChanges=true, precomputedUIPct=undefined)", () => {
162
+ const section = buildScopeAssessmentSection(10, 3, false, undefined, true);
163
+ // Total is pre-determined — shows in the Budget Plan header
164
+ expect(section).toContain("Budget Plan (total already determined)");
165
+ expect(section).toContain("10 total (3 generate + 7 additional)");
166
+ // Step D is kept for UI/E2E split judgment
167
+ expect(section).toContain("Step D");
168
+ // The "Append your UI%" instruction must appear (comment #3222061560)
169
+ expect(section).toContain("Append your UI%");
170
+ // Steps A–C are skipped
171
+ expect(section).not.toContain("Step A");
172
+ expect(section).not.toContain("Step B");
173
+ expect(section).not.toContain("Step C");
174
+ });
175
+ it("clamps additional to 0 when maxGenerate equals maxTotal (precomputed path)", () => {
176
+ // maxGenerate=5, maxTotal=5 → additional must be 0, not negative
177
+ const section = buildScopeAssessmentSection(5, 5, false, 0, false);
178
+ expect(section).toContain("5 generate + 0 additional");
179
+ expect(section).not.toMatch(/\d+ generate \+ -\d+ additional/);
180
+ });
181
+ it("clamps additional to 0 when maxGenerate exceeds maxTotal (defensive, precomputed path)", () => {
182
+ // Should never happen in normal usage but guard must hold
183
+ const section = buildScopeAssessmentSection(3, 5, false, 0, false);
184
+ expect(section).toContain("3 generate + 0 additional");
185
+ expect(section).not.toMatch(/\+ -\d+ additional/);
186
+ // Must not show unclamped maxGenerate
187
+ expect(section).not.toContain("5 generate");
188
+ });
189
+ it("clamps generate in mixed PR path when maxGenerate exceeds maxTotal", () => {
190
+ const section = buildScopeAssessmentSection(3, 5, false, undefined, true);
191
+ expect(section).toContain("3 generate + 0 additional");
192
+ expect(section).not.toContain("5 generate");
193
+ expect(section).not.toMatch(/\+ -\d+ additional/);
194
+ });
104
195
  });
@@ -5,9 +5,10 @@ import { logger } from "../../utils/logger.js";
5
5
  import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildFewShotExamples, buildVerificationChecklist, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, } from "./recommendationSections.js";
6
6
  import { CATEGORY_PRIORITY } from "../../types/TestRecommendation.js";
7
7
  import { buildScopeAssessmentSection, isFrontendFile } from "./scopeAssessment.js";
8
- import { buildExecutionPlan } from "./diffExecutionPlan.js";
8
+ import { buildExecutionPlan, EXEC_STEP_CODE_REVIEW } from "./diffExecutionPlan.js";
9
9
  import { buildFullRepoRecommendations } from "./fullRepoCatalog.js";
10
- import { buildExternalCoverageSet, externalDedupKey, } from "./recommendationShared.js";
10
+ import { ANALYSIS_STEP_EXTRACT } from "./analysisOutputPrompt.js";
11
+ import { TASK_GENERATE, buildExternalCoverageSet, externalDedupKey, isAttackSurfaceSecurityBoundary, taskRef, } from "./recommendationShared.js";
11
12
  // Re-export for backward compatibility (tests and external callers import these from this module)
12
13
  export { buildExternalCoverageSet, externalDedupKey };
13
14
  function formatTestLocations(locs) {
@@ -47,7 +48,9 @@ function classifyNovelty(scenario, diffContext) {
47
48
  return "existing";
48
49
  }
49
50
  function prioritiseCandidate(scenario, diffContext) {
50
- const priority = CATEGORY_PRIORITY[scenario.category] ?? "LOW";
51
+ const priority = isAttackSurfaceSecurityBoundary(scenario)
52
+ ? "CRITICAL"
53
+ : CATEGORY_PRIORITY[scenario.category] ?? "LOW";
51
54
  const novelty = classifyNovelty(scenario, diffContext);
52
55
  return { priority, novelty };
53
56
  }
@@ -89,6 +92,25 @@ ${isUIOnlyPR ? `\n**UI-only PR** — no backend changes. UI and E2E tests are mo
89
92
  : ``}
90
93
  Output should be concise and immediately actionable.`
91
94
  : `You are in **Repo mode**. Comprehensive test strategy across all endpoints.`;
95
+ // ── UI rec authoring rules ──
96
+ // Anchors every UI recommendation regardless of whether the agent ran
97
+ // skyramp_ui_analyze_changes / browser_blueprint before this call. When the
98
+ // agent has prior captures in its own tool-result history, those serve as
99
+ // grounding; when it doesn't, recommendations fall back to source-grounded
100
+ // prose. Either way, inventing elements / leaking jargon / duplicating UI
101
+ // tests are wrong, so the rules fire unconditionally.
102
+ const uiRecRulesSection = `
103
+
104
+ <ui_recommendation_authoring_rules>
105
+ **Blueprints inform *how* you describe UI tests, not *which* tests to recommend.** The recommendation catalog derives from the same change-signals listed elsewhere in this prompt (new endpoints, schema/field changes, security boundaries, business-logic modifications, frontend route or component additions, layout additions, etc.) — refer to those signals for the source-of-truth list, not this section.
106
+
107
+ **Do not generate near-duplicate UI tests of the same surface;** one well-targeted UI test per surface is enough.
108
+
109
+ For UI recommendations you *do* emit, ground the \`reasoning\` field in elements you have actually observed via \`browser_blueprint\` calls earlier in this session. If a recommendation's target element is not in any blueprint you have observed, either rephrase the recommendation around an element that IS observed, or describe the test target in higher-level terms. Do not invent element names from the PR description, source diff, or component name. If you have not captured any blueprints yet (e.g. backend-only PR, or pre-flight skipped), UI recommendations fall back to source-grounded prose drawn from the diff alone — that is a legitimate outcome, not a reason to invent.
110
+
111
+ Write UI recommendation \`reasoning\` fields in **natural prose** that names elements as a human would describe them (e.g. "the Notifications heading", "the disabled Mark all as read button"). **Do NOT mention "blueprint", "captured blueprint", "DOM analyzer", or any other internal MCP terminology in the reasoning text.** The reader of the report is a developer reviewing test recommendations on a PR; they don't know what a blueprint is and shouldn't have to. Phrases like "the captured blueprint shows X" or "visible from the captured blueprint" leak builder internals — instead just describe what the test verifies in plain product terms ("the disabled mark-all-read button in the empty state"). Likewise do NOT use internal-identifier syntax like \`role=button, accessibleName='X', logicalName=...\`.
112
+ </ui_recommendation_authoring_rules>
113
+ `;
92
114
  // ── Endpoint listing ──
93
115
  const allEndpoints = analysis.apiEndpoints.endpoints;
94
116
  // In PR mode, identify which endpoints were changed so we can partition the listing.
@@ -124,7 +146,7 @@ Output should be concise and immediately actionable.`
124
146
  changedLines.push(` ${m.method} ${ep.path} [removed]`);
125
147
  }
126
148
  }
127
- endpointLines = `**Likely changed in this PR (from static file→endpoint mapping — verify against diff in Step 2):**\n${changedLines.join("\n") || " none"}\n\n**Other endpoints (reference only):**\n${otherLines.join("\n") || " none"}`;
149
+ endpointLines = `**Likely changed in this PR (from static file→endpoint mapping — verify against diff in Step ${ANALYSIS_STEP_EXTRACT}):**\n${changedLines.join("\n") || " none"}\n\n**Other endpoints (reference only):**\n${otherLines.join("\n") || " none"}`;
128
150
  }
129
151
  else {
130
152
  endpointLines = allEndpoints
@@ -311,10 +333,11 @@ ${detailBlocks}
311
333
  }
312
334
  mainSection = buildFullRepoRecommendations(scored, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject, isFrontendOnlyProject, externalCoverageFullRepo);
313
335
  }
314
- else if (isDiffScope && (isUIOnlyPR || scored.length > 0)) {
315
- // Build external coverage set for programmatic dedupprevents recommending
316
- // tests that duplicate existing non-Skyramp tests at the METHOD::resource::testType
317
- // level, so different methods on the same resource (e.g. GET vs PUT) remain distinct.
336
+ else if (isDiffScope) {
337
+ // Always use the full execution plan in diff scope even when scored.length === 0
338
+ // (no pre-ranked scenarios). The execution plan includes the dynamic Code Review step,
339
+ // bug-catching insertion, and the dynamic bug-coverage gate
340
+ // that are critical for catching seeded bugs in new endpoints.
318
341
  const externalCoverage = buildExternalCoverageSet(testLocations);
319
342
  if (externalCoverage.size > 0) {
320
343
  logger.info(`External test coverage keys: ${[...externalCoverage].join(", ")}`);
@@ -322,6 +345,7 @@ ${detailBlocks}
322
345
  mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, externalCoverage, analysis.existingTests.relevantExternalTestPaths ?? []);
323
346
  }
324
347
  else {
348
+ // Full-repo scope with no scored items — rare fallback
325
349
  mainSection = `
326
350
  ## Draft Your Execution Plan
327
351
 
@@ -407,7 +431,7 @@ Only add NEW recommendations for code paths introduced in the latest commit.
407
431
  prHistorySection = `
408
432
  ## PR History (PR #${prContext.prNumber})
409
433
  Tests from prior bot runs are still in the working tree — the maintenance pipeline
410
- (Task 2) keeps them up to date. Use the history below to **avoid duplicating** existing
434
+ (${taskRef(TASK_GENERATE)}) keeps them up to date. Use the history below to **avoid duplicating** existing
411
435
  coverage and to fill gaps:
412
436
  - **Do NOT re-recommend** tests listed under "Previously Generated Tests" — they already
413
437
  exist and are maintained automatically.
@@ -419,7 +443,7 @@ coverage and to fill gaps:
419
443
  - **Stability**: When the code diff between commits is small, the recommendation set
420
444
  should be mostly stable. Do not churn recommendations without cause.
421
445
  - If prior execution results show failures, note the issue but do not re-recommend
422
- the test — Task 2 handles fixes for existing tests.
446
+ the test — ${taskRef(TASK_GENERATE)} handles fixes for existing tests.
423
447
  ${historyBody}`;
424
448
  }
425
449
  // ── Compose all sections ──
@@ -436,7 +460,7 @@ ${modePreamble}
436
460
  Scope: ${scopeNote}
437
461
 
438
462
  ${sourcePriority}
439
-
463
+ ${uiRecRulesSection}
440
464
  <repository_context>
441
465
  ## Repository Context
442
466
 
@@ -485,7 +509,7 @@ ${isDiffScope
485
509
 
486
510
  ${mainSection}
487
511
 
488
- ${isDiffScope ? buildVerificationChecklist(topN, maxGen) : ""}
512
+ ${isDiffScope ? buildVerificationChecklist(topN, maxGen, EXEC_STEP_CODE_REVIEW) : ""}
489
513
  </instructions>
490
514
  `;
491
515
  }