@skyramp/mcp 0.2.0-rc.1 → 0.2.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +4 -2
- package/build/prompts/code-reuse.js +106 -7
- package/build/prompts/pom-aware-code-reuse.js +106 -7
- package/build/prompts/startTraceCollectionPrompts.js +37 -15
- package/build/prompts/test-maintenance/drift-analysis-prompt.js +26 -31
- package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +40 -1
- package/build/prompts/test-maintenance/driftAnalysisSections.js +90 -86
- package/build/prompts/test-recommendation/analysisOutputPrompt.js +286 -163
- package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -45
- package/build/prompts/test-recommendation/diffExecutionPlan.js +215 -117
- package/build/prompts/test-recommendation/promptPlan.js +290 -0
- package/build/prompts/test-recommendation/promptPlan.test.js +336 -0
- package/build/prompts/test-recommendation/recommendationSections.js +3 -1
- package/build/prompts/test-recommendation/recommendationShared.js +23 -1
- package/build/prompts/test-recommendation/scopeAssessment.js +65 -14
- package/build/prompts/test-recommendation/scopeAssessment.test.js +93 -2
- package/build/prompts/test-recommendation/test-recommendation-prompt.js +36 -12
- package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +222 -1
- package/build/prompts/testbot/testbot-prompts.js +18 -62
- package/build/prompts/testbot/testbot-prompts.test.js +65 -31
- package/build/services/ScenarioGenerationService.js +11 -1
- package/build/services/TestExecutionService.js +73 -15
- package/build/services/TestExecutionService.test.js +105 -0
- package/build/services/TestGenerationService.js +11 -1
- package/build/tools/executeSkyrampTestTool.js +1 -10
- package/build/tools/test-management/actionsTool.js +152 -63
- package/build/tools/test-management/analyzeChangesTool.js +171 -63
- package/build/tools/test-management/analyzeChangesTool.test.js +103 -16
- package/build/tools/test-management/analyzeTestHealthTool.js +30 -81
- package/build/tools/test-management/index.js +1 -0
- package/build/tools/test-management/uiAnalyzeChangesTool.js +149 -0
- package/build/tools/test-management/uiAnalyzeChangesTool.test.js +100 -0
- package/build/tools/trace/resolveSaveStoragePath.js +16 -0
- package/build/tools/trace/resolveSaveStoragePath.test.js +17 -0
- package/build/tools/trace/resolveSessionPaths.js +39 -0
- package/build/tools/trace/resolveSessionPaths.test.js +103 -0
- package/build/tools/trace/sessionState.js +14 -0
- package/build/tools/trace/sessionState.test.js +17 -0
- package/build/tools/trace/startTraceCollectionTool.js +84 -14
- package/build/tools/trace/stopTraceCollectionTool.js +9 -2
- package/build/types/TestAnalysis.js +50 -0
- package/build/types/TestRecommendation.js +6 -58
- package/build/types/TestTypes.js +1 -1
- package/build/utils/AnalysisStateManager.js +22 -11
- package/build/utils/branchDiff.js +11 -2
- package/build/utils/docker.test.js +1 -1
- package/build/utils/gitStaging.js +52 -3
- package/build/utils/gitStaging.test.js +19 -1
- package/build/utils/repoScanner.js +18 -10
- package/build/utils/repoScanner.test.js +92 -0
- package/build/utils/routeParsers.js +168 -25
- package/build/utils/routeParsers.test.js +180 -1
- package/build/utils/scenarioDrafting.js +220 -17
- package/build/utils/scenarioDrafting.test.js +182 -9
- package/build/utils/sourceRouteExtractor.js +806 -0
- package/build/utils/sourceRouteExtractor.test.js +565 -0
- package/build/utils/uiPageEnumerator.js +319 -0
- package/build/utils/uiPageEnumerator.test.js +422 -0
- package/build/utils/utils.js +27 -0
- package/build/utils/versions.js +1 -1
- package/build/utils/workspaceAuth.js +33 -4
- package/node_modules/playwright/lib/dom-analyzer/blueprint.js +54 -5
- package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.js +4 -0
- package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.test.js +6 -0
- package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.js +150 -0
- package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.test.js +470 -0
- package/node_modules/playwright/lib/mcp/browser/tab.js +1 -1
- package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.js +21 -4
- package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.test.js +3 -0
- package/node_modules/playwright/package.json +1 -1
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.4.tgz +0 -0
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.5.tgz +0 -0
- package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.6.tgz +0 -0
- package/package.json +3 -3
- package/build/services/TestHealthService.js +0 -694
- package/build/services/TestHealthService.test.js +0 -241
- package/build/types/TestDriftAnalysis.js +0 -1
- package/build/types/TestHealth.js +0 -4
|
@@ -27,6 +27,16 @@ export function externalDedupKey(scenario) {
|
|
|
27
27
|
const resource = extractResourceFromPath(primaryStep?.path ?? "");
|
|
28
28
|
return `${method}::${resource}::${testType}`;
|
|
29
29
|
}
|
|
30
|
+
export function isAttackSurfaceSecurityBoundary(scenario) {
|
|
31
|
+
return scenario.category === "security_boundary" &&
|
|
32
|
+
(scenario.isAttackSurfaceSecurityBoundary === true ||
|
|
33
|
+
scenario.description.startsWith("Attack-surface auth boundary:"));
|
|
34
|
+
}
|
|
35
|
+
export function isOrdinaryDirectAuthBoundary(scenario) {
|
|
36
|
+
return scenario.category === "security_boundary" &&
|
|
37
|
+
!isAttackSurfaceSecurityBoundary(scenario) &&
|
|
38
|
+
scenario.description.startsWith("Auth boundary:");
|
|
39
|
+
}
|
|
30
40
|
/**
|
|
31
41
|
* Build a set of coverage keys from external (non-Skyramp) tests.
|
|
32
42
|
* Parses `testLocations` entries tagged with `[external]` to extract the
|
|
@@ -62,7 +72,19 @@ export function buildExternalCoverageSet(testLocations) {
|
|
|
62
72
|
}
|
|
63
73
|
if (externalWithoutCoverage > 0) {
|
|
64
74
|
logger.info(`${externalWithoutCoverage} external test file(s) have no extractable endpoint coverage — ` +
|
|
65
|
-
`programmatic dedup skipped for these;
|
|
75
|
+
`programmatic dedup skipped for these; prompt-level semantic coverage check is the fallback.`);
|
|
66
76
|
}
|
|
67
77
|
return coverage;
|
|
68
78
|
}
|
|
79
|
+
// Shared TestBot task and step labels used by prompt modules that cannot import
|
|
80
|
+
// testbot-prompts.ts directly without creating a circular dependency.
|
|
81
|
+
export const TASK_UI_PRESCAN = "0";
|
|
82
|
+
export const TASK_ANALYZE_MAINTAIN = "1";
|
|
83
|
+
export const TASK_GENERATE = "2";
|
|
84
|
+
export const TASK_SUBMIT = "3";
|
|
85
|
+
export const TESTBOT_TASK1_STEP_ANALYZE = "1";
|
|
86
|
+
export const TESTBOT_TASK1_STEP_MAINTAIN = "2";
|
|
87
|
+
export const TESTBOT_TASK1_STEP_CODE_REVIEW = "3";
|
|
88
|
+
export const TESTBOT_TASK1_STEP_UI_GROUNDING = "4";
|
|
89
|
+
export const taskRef = (taskId) => `Task ${taskId}`;
|
|
90
|
+
export const taskStepRef = (taskId, stepId) => `Task ${taskId} Step ${stepId}`;
|
|
@@ -25,25 +25,76 @@ export function isFrontendFile(filePath) {
|
|
|
25
25
|
return true;
|
|
26
26
|
return AMBIGUOUS_FRONTEND_PATTERN.test(filePath) && FRONTEND_DIR_PATTERN.test(filePath);
|
|
27
27
|
}
|
|
28
|
+
/**
|
|
29
|
+
* Returns true if the file path looks like a test file rather than UI source.
|
|
30
|
+
*
|
|
31
|
+
* Catches:
|
|
32
|
+
* - Skyramp-generated tests (`*_test.ts`, `*_smoke.ts`, `*_contract.ts`,
|
|
33
|
+
* `*_fuzz.ts`, `*_integration.ts`, `*_load.ts`, `*_e2e.ts`, `*_ui.ts`)
|
|
34
|
+
* - Skyramp scenario files (`scenario_*.json`)
|
|
35
|
+
* - Conventional Vitest/Jest/Playwright spec naming
|
|
36
|
+
* (`*.spec.{ts,tsx,js,jsx}`, `*.test.{ts,tsx,js,jsx}`)
|
|
37
|
+
* - Files inside `__tests__/` directories (Jest convention)
|
|
38
|
+
*
|
|
39
|
+
* Used by callers that want to filter test files OUT of frontend-source
|
|
40
|
+
* processing — `isFrontendFile` returns true for `.spec.ts` under a
|
|
41
|
+
* frontend directory because the rule is "tier-3 ambiguous + frontend dir,"
|
|
42
|
+
* but those tests aren't UI source we'd want to ground recommendations in.
|
|
43
|
+
*/
|
|
44
|
+
export function isTestFile(filePath) {
|
|
45
|
+
return (/(?:_test|_smoke|_contract|_fuzz|_integration|_load|_e2e|_ui)\.[^/]+$/.test(filePath) ||
|
|
46
|
+
/scenario_[^/]+\.json$/.test(filePath) ||
|
|
47
|
+
/\.(spec|test)\.(tsx?|jsx?)$/.test(filePath) ||
|
|
48
|
+
/(?:^|\/)__tests__\//.test(filePath));
|
|
49
|
+
}
|
|
28
50
|
// ── LLM scope assessment ──────────────────────────────────────────────────────
|
|
29
51
|
/**
|
|
30
|
-
* Builds the PR scope assessment section
|
|
31
|
-
*
|
|
52
|
+
* Builds the PR scope assessment section.
|
|
53
|
+
*
|
|
54
|
+
* When `precomputedUIPct` is provided (0 = backend-only, 100 = UI-only) the server
|
|
55
|
+
* has already determined the split unambiguously — skip Steps A–C and emit one line.
|
|
32
56
|
*
|
|
33
|
-
*
|
|
34
|
-
*
|
|
35
|
-
* (one auth change > ten CSS tweaks), can identify UI tests that are warranted
|
|
36
|
-
* even on mostly-backend PRs (frontend logic bugs, form validation errors), and
|
|
37
|
-
* can down-scale when the diff is trivial regardless of file count.
|
|
57
|
+
* For mixed PRs (`precomputedUIPct` is undefined, `hasFrontendChanges` is true) skip
|
|
58
|
+
* Steps A–C but keep Step D so the LLM can apply judgment to determine the UI%.
|
|
38
59
|
*
|
|
39
|
-
*
|
|
40
|
-
* rest of the prompt references to enforce count discipline.
|
|
60
|
+
* Falls back to the full four-step assessment when no precomputed data is available.
|
|
41
61
|
*/
|
|
42
|
-
export function buildScopeAssessmentSection(maxTotal = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, isUIOnly = false
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
62
|
+
export function buildScopeAssessmentSection(maxTotal = MAX_RECOMMENDATIONS, maxGenerate = MAX_TESTS_TO_GENERATE, isUIOnly = false,
|
|
63
|
+
/** Server-determined UI/E2E percentage. `undefined` = mixed PR or unknown. */
|
|
64
|
+
precomputedUIPct,
|
|
65
|
+
/** Whether the diff contains frontend files (true ↔ mixed PR when precomputedUIPct is undefined). */
|
|
66
|
+
hasFrontendChanges) {
|
|
67
|
+
const effectiveGenerate = Math.min(maxGenerate, maxTotal);
|
|
68
|
+
const additional = Math.max(0, maxTotal - effectiveGenerate);
|
|
69
|
+
// Unambiguous backend-only or UI-only: emit a single Budget Plan line — no LLM counting needed.
|
|
70
|
+
if (precomputedUIPct !== undefined) {
|
|
71
|
+
return `### PR Scope Assessment
|
|
72
|
+
Budget Plan: ${maxTotal} total (${effectiveGenerate} generate + ${additional} additional), ${precomputedUIPct}% UI/E2E
|
|
73
|
+
|
|
74
|
+
Use these exact numbers throughout the rest of the prompt.`;
|
|
75
|
+
}
|
|
76
|
+
// Mixed PR: server can pre-compute the total but not the UI/E2E split — keep Step D.
|
|
77
|
+
if (hasFrontendChanges) {
|
|
78
|
+
return `### PR Scope Assessment — determine UI% before planning recommendations
|
|
79
|
+
|
|
80
|
+
Budget Plan (total already determined): **${maxTotal} total (${effectiveGenerate} generate + ${additional} additional)**
|
|
81
|
+
|
|
82
|
+
**Step D — Determine UI vs backend split for the budget above:**
|
|
83
|
+
- Non-UI slots are backend tests; start from file-count ratio for UI%, then apply judgment:
|
|
84
|
+
- Pure CSS/style changes inflate the frontend file count without adding test value → reduce UI%
|
|
85
|
+
- Frontend logic bugs (state management, calculation errors, form validation) in the diff → increase UI% even if few frontend files
|
|
86
|
+
- Frontend component calls a changed backend API → an E2E test covers both sides → count toward UI%
|
|
87
|
+
- Frontend files only in \`__tests__/\` or \`.stories.\` → exclude from the ratio
|
|
88
|
+
|
|
89
|
+
**Append your UI% now** — update the Budget Plan to:
|
|
90
|
+
\`Budget Plan: ${maxTotal} total (${effectiveGenerate} generate + ${additional} additional), <ui_pct>% UI/E2E\`
|
|
91
|
+
|
|
92
|
+
Use these exact numbers throughout the rest of the prompt.`;
|
|
93
|
+
}
|
|
94
|
+
// Fallback (no diff context — full_repo else-branch or test): full four-step assessment.
|
|
95
|
+
const minTotal = Math.min(effectiveGenerate + 1, maxTotal);
|
|
96
|
+
const minAdditional = Math.max(0, minTotal - effectiveGenerate);
|
|
97
|
+
const baselineFormula = `${effectiveGenerate} (generate) + ${minAdditional} (min additional) = ${minTotal}`;
|
|
47
98
|
const stepD = isUIOnly
|
|
48
99
|
? `**Step D — UI/E2E confirmation (frontend-only PR):**
|
|
49
100
|
This is a frontend-only PR — set **100% UI/E2E** in your Budget Plan.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
jest.mock("@skyramp/skyramp", () => ({
|
|
2
2
|
WorkspaceConfigManager: { create: jest.fn() },
|
|
3
3
|
}));
|
|
4
|
-
import { isFrontendFile, buildScopeAssessmentSection } from "./scopeAssessment.js";
|
|
4
|
+
import { isFrontendFile, isTestFile, buildScopeAssessmentSection } from "./scopeAssessment.js";
|
|
5
5
|
// ---------------------------------------------------------------------------
|
|
6
6
|
// isFrontendFile
|
|
7
7
|
// ---------------------------------------------------------------------------
|
|
@@ -58,6 +58,40 @@ describe("isFrontendFile", () => {
|
|
|
58
58
|
});
|
|
59
59
|
});
|
|
60
60
|
// ---------------------------------------------------------------------------
|
|
61
|
+
// isTestFile
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
describe("isTestFile", () => {
|
|
64
|
+
it("matches Skyramp-generated test suffix patterns", () => {
|
|
65
|
+
expect(isTestFile("backend/tests/orders_integration_test.py")).toBe(true);
|
|
66
|
+
expect(isTestFile("frontend/tests/cart_ui_test.ts")).toBe(true);
|
|
67
|
+
expect(isTestFile("backend/tests/users_contract_test.py")).toBe(true);
|
|
68
|
+
expect(isTestFile("backend/tests/load_test.py")).toBe(true);
|
|
69
|
+
});
|
|
70
|
+
it("matches Skyramp scenario JSON files", () => {
|
|
71
|
+
expect(isTestFile("tests/scenario_orders.json")).toBe(true);
|
|
72
|
+
expect(isTestFile("scenario_checkout.json")).toBe(true);
|
|
73
|
+
});
|
|
74
|
+
it("matches conventional Vitest/Jest/Playwright spec naming", () => {
|
|
75
|
+
expect(isTestFile("frontend/tests/cart_labels_baseline.spec.ts")).toBe(true);
|
|
76
|
+
expect(isTestFile("src/components/Button.test.tsx")).toBe(true);
|
|
77
|
+
expect(isTestFile("e2e/login.spec.js")).toBe(true);
|
|
78
|
+
expect(isTestFile("packages/foo/Bar.test.jsx")).toBe(true);
|
|
79
|
+
});
|
|
80
|
+
it("matches files under __tests__/ directories", () => {
|
|
81
|
+
expect(isTestFile("src/__tests__/utils.ts")).toBe(true);
|
|
82
|
+
expect(isTestFile("packages/foo/__tests__/Bar.tsx")).toBe(true);
|
|
83
|
+
});
|
|
84
|
+
it("returns false for ordinary source files", () => {
|
|
85
|
+
expect(isTestFile("frontend/src/pages/Cart.tsx")).toBe(false);
|
|
86
|
+
expect(isTestFile("frontend/src/components/Button.tsx")).toBe(false);
|
|
87
|
+
expect(isTestFile("backend/src/handlers/orders.py")).toBe(false);
|
|
88
|
+
});
|
|
89
|
+
it("returns false for non-spec .ts files in tests/ directories that aren't Skyramp-generated", () => {
|
|
90
|
+
// Tests/ directory alone doesn't trigger isTestFile — only the suffix matters.
|
|
91
|
+
expect(isTestFile("frontend/tests/helpers/setup.ts")).toBe(false);
|
|
92
|
+
});
|
|
93
|
+
});
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
61
95
|
// buildScopeAssessmentSection
|
|
62
96
|
// ---------------------------------------------------------------------------
|
|
63
97
|
describe("buildScopeAssessmentSection", () => {
|
|
@@ -80,9 +114,12 @@ describe("buildScopeAssessmentSection", () => {
|
|
|
80
114
|
it("clamps minTotal to maxTotal when maxTotal < maxGenerate", () => {
|
|
81
115
|
// Defensive: maxGenerate clamped to topN upstream, but guard applies here too
|
|
82
116
|
const section = buildScopeAssessmentSection(3, 5);
|
|
83
|
-
// minTotal = min(
|
|
117
|
+
// effectiveGenerate = min(5, 3) = 3; minTotal = min(3+1, 3) = 3; range "3–3"
|
|
84
118
|
expect(section).toContain("3–3");
|
|
85
119
|
expect(section).not.toMatch(/\b[6-9]–3\b/);
|
|
120
|
+
// Must not show original maxGenerate (5) in the formula
|
|
121
|
+
expect(section).toContain("3 (generate)");
|
|
122
|
+
expect(section).not.toContain("5 (generate)");
|
|
86
123
|
});
|
|
87
124
|
it("embeds UI/E2E confirmation step when isUIOnly=true", () => {
|
|
88
125
|
const section = buildScopeAssessmentSection(10, 3, true);
|
|
@@ -101,4 +138,58 @@ describe("buildScopeAssessmentSection", () => {
|
|
|
101
138
|
expect(section.length).toBeGreaterThan(0);
|
|
102
139
|
expect(section).toContain("Budget Plan");
|
|
103
140
|
});
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
// New branches added in PR 453 (4a) — precomputedUIPct / hasFrontendChanges
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
it("emits a single Budget Plan line for backend-only PR (precomputedUIPct=0)", () => {
|
|
145
|
+
// isUIOnlyPR=false, hasFrontendChanges=false → precomputedUIPct=0
|
|
146
|
+
const section = buildScopeAssessmentSection(10, 3, false, 0, false);
|
|
147
|
+
expect(section).toContain("Budget Plan: 10 total (3 generate + 7 additional), 0% UI/E2E");
|
|
148
|
+
expect(section).toContain("Use these exact numbers throughout the rest of the prompt.");
|
|
149
|
+
// Must NOT include the step-based assessment
|
|
150
|
+
expect(section).not.toContain("Step A");
|
|
151
|
+
expect(section).not.toContain("Step B");
|
|
152
|
+
expect(section).not.toContain("Step C");
|
|
153
|
+
expect(section).not.toContain("Step D");
|
|
154
|
+
});
|
|
155
|
+
it("emits a single Budget Plan line for UI-only PR (precomputedUIPct=100)", () => {
|
|
156
|
+
const section = buildScopeAssessmentSection(10, 3, true, 100, false);
|
|
157
|
+
expect(section).toContain("Budget Plan: 10 total (3 generate + 7 additional), 100% UI/E2E");
|
|
158
|
+
expect(section).not.toContain("Step A");
|
|
159
|
+
expect(section).not.toContain("Step D");
|
|
160
|
+
});
|
|
161
|
+
it("emits Step D only for mixed PR (hasFrontendChanges=true, precomputedUIPct=undefined)", () => {
|
|
162
|
+
const section = buildScopeAssessmentSection(10, 3, false, undefined, true);
|
|
163
|
+
// Total is pre-determined — shows in the Budget Plan header
|
|
164
|
+
expect(section).toContain("Budget Plan (total already determined)");
|
|
165
|
+
expect(section).toContain("10 total (3 generate + 7 additional)");
|
|
166
|
+
// Step D is kept for UI/E2E split judgment
|
|
167
|
+
expect(section).toContain("Step D");
|
|
168
|
+
// The "Append your UI%" instruction must appear (comment #3222061560)
|
|
169
|
+
expect(section).toContain("Append your UI%");
|
|
170
|
+
// Steps A–C are skipped
|
|
171
|
+
expect(section).not.toContain("Step A");
|
|
172
|
+
expect(section).not.toContain("Step B");
|
|
173
|
+
expect(section).not.toContain("Step C");
|
|
174
|
+
});
|
|
175
|
+
it("clamps additional to 0 when maxGenerate equals maxTotal (precomputed path)", () => {
|
|
176
|
+
// maxGenerate=5, maxTotal=5 → additional must be 0, not negative
|
|
177
|
+
const section = buildScopeAssessmentSection(5, 5, false, 0, false);
|
|
178
|
+
expect(section).toContain("5 generate + 0 additional");
|
|
179
|
+
expect(section).not.toMatch(/\d+ generate \+ -\d+ additional/);
|
|
180
|
+
});
|
|
181
|
+
it("clamps additional to 0 when maxGenerate exceeds maxTotal (defensive, precomputed path)", () => {
|
|
182
|
+
// Should never happen in normal usage but guard must hold
|
|
183
|
+
const section = buildScopeAssessmentSection(3, 5, false, 0, false);
|
|
184
|
+
expect(section).toContain("3 generate + 0 additional");
|
|
185
|
+
expect(section).not.toMatch(/\+ -\d+ additional/);
|
|
186
|
+
// Must not show unclamped maxGenerate
|
|
187
|
+
expect(section).not.toContain("5 generate");
|
|
188
|
+
});
|
|
189
|
+
it("clamps generate in mixed PR path when maxGenerate exceeds maxTotal", () => {
|
|
190
|
+
const section = buildScopeAssessmentSection(3, 5, false, undefined, true);
|
|
191
|
+
expect(section).toContain("3 generate + 0 additional");
|
|
192
|
+
expect(section).not.toContain("5 generate");
|
|
193
|
+
expect(section).not.toMatch(/\+ -\d+ additional/);
|
|
194
|
+
});
|
|
104
195
|
});
|
|
@@ -5,9 +5,10 @@ import { logger } from "../../utils/logger.js";
|
|
|
5
5
|
import { buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildToolWorkflows, buildFewShotExamples, buildVerificationChecklist, getAuthSnippets, MAX_TESTS_TO_GENERATE, MAX_RECOMMENDATIONS, } from "./recommendationSections.js";
|
|
6
6
|
import { CATEGORY_PRIORITY } from "../../types/TestRecommendation.js";
|
|
7
7
|
import { buildScopeAssessmentSection, isFrontendFile } from "./scopeAssessment.js";
|
|
8
|
-
import { buildExecutionPlan } from "./diffExecutionPlan.js";
|
|
8
|
+
import { buildExecutionPlan, EXEC_STEP_CODE_REVIEW } from "./diffExecutionPlan.js";
|
|
9
9
|
import { buildFullRepoRecommendations } from "./fullRepoCatalog.js";
|
|
10
|
-
import {
|
|
10
|
+
import { ANALYSIS_STEP_EXTRACT } from "./analysisOutputPrompt.js";
|
|
11
|
+
import { TASK_GENERATE, buildExternalCoverageSet, externalDedupKey, isAttackSurfaceSecurityBoundary, taskRef, } from "./recommendationShared.js";
|
|
11
12
|
// Re-export for backward compatibility (tests and external callers import these from this module)
|
|
12
13
|
export { buildExternalCoverageSet, externalDedupKey };
|
|
13
14
|
function formatTestLocations(locs) {
|
|
@@ -47,7 +48,9 @@ function classifyNovelty(scenario, diffContext) {
|
|
|
47
48
|
return "existing";
|
|
48
49
|
}
|
|
49
50
|
function prioritiseCandidate(scenario, diffContext) {
|
|
50
|
-
const priority =
|
|
51
|
+
const priority = isAttackSurfaceSecurityBoundary(scenario)
|
|
52
|
+
? "CRITICAL"
|
|
53
|
+
: CATEGORY_PRIORITY[scenario.category] ?? "LOW";
|
|
51
54
|
const novelty = classifyNovelty(scenario, diffContext);
|
|
52
55
|
return { priority, novelty };
|
|
53
56
|
}
|
|
@@ -89,6 +92,25 @@ ${isUIOnlyPR ? `\n**UI-only PR** — no backend changes. UI and E2E tests are mo
|
|
|
89
92
|
: ``}
|
|
90
93
|
Output should be concise and immediately actionable.`
|
|
91
94
|
: `You are in **Repo mode**. Comprehensive test strategy across all endpoints.`;
|
|
95
|
+
// ── UI rec authoring rules ──
|
|
96
|
+
// Anchors every UI recommendation regardless of whether the agent ran
|
|
97
|
+
// skyramp_ui_analyze_changes / browser_blueprint before this call. When the
|
|
98
|
+
// agent has prior captures in its own tool-result history, those serve as
|
|
99
|
+
// grounding; when it doesn't, recommendations fall back to source-grounded
|
|
100
|
+
// prose. Either way, inventing elements / leaking jargon / duplicating UI
|
|
101
|
+
// tests are wrong, so the rules fire unconditionally.
|
|
102
|
+
const uiRecRulesSection = `
|
|
103
|
+
|
|
104
|
+
<ui_recommendation_authoring_rules>
|
|
105
|
+
**Blueprints inform *how* you describe UI tests, not *which* tests to recommend.** The recommendation catalog derives from the same change-signals listed elsewhere in this prompt (new endpoints, schema/field changes, security boundaries, business-logic modifications, frontend route or component additions, layout additions, etc.) — refer to those signals for the source-of-truth list, not this section.
|
|
106
|
+
|
|
107
|
+
**Do not generate near-duplicate UI tests of the same surface;** one well-targeted UI test per surface is enough.
|
|
108
|
+
|
|
109
|
+
For UI recommendations you *do* emit, ground the \`reasoning\` field in elements you have actually observed via \`browser_blueprint\` calls earlier in this session. If a recommendation's target element is not in any blueprint you have observed, either rephrase the recommendation around an element that IS observed, or describe the test target in higher-level terms. Do not invent element names from the PR description, source diff, or component name. If you have not captured any blueprints yet (e.g. backend-only PR, or pre-flight skipped), UI recommendations fall back to source-grounded prose drawn from the diff alone — that is a legitimate outcome, not a reason to invent.
|
|
110
|
+
|
|
111
|
+
Write UI recommendation \`reasoning\` fields in **natural prose** that names elements as a human would describe them (e.g. "the Notifications heading", "the disabled Mark all as read button"). **Do NOT mention "blueprint", "captured blueprint", "DOM analyzer", or any other internal MCP terminology in the reasoning text.** The reader of the report is a developer reviewing test recommendations on a PR; they don't know what a blueprint is and shouldn't have to. Phrases like "the captured blueprint shows X" or "visible from the captured blueprint" leak builder internals — instead just describe what the test verifies in plain product terms ("the disabled mark-all-read button in the empty state"). Likewise do NOT use internal-identifier syntax like \`role=button, accessibleName='X', logicalName=...\`.
|
|
112
|
+
</ui_recommendation_authoring_rules>
|
|
113
|
+
`;
|
|
92
114
|
// ── Endpoint listing ──
|
|
93
115
|
const allEndpoints = analysis.apiEndpoints.endpoints;
|
|
94
116
|
// In PR mode, identify which endpoints were changed so we can partition the listing.
|
|
@@ -124,7 +146,7 @@ Output should be concise and immediately actionable.`
|
|
|
124
146
|
changedLines.push(` ${m.method} ${ep.path} [removed]`);
|
|
125
147
|
}
|
|
126
148
|
}
|
|
127
|
-
endpointLines = `**Likely changed in this PR (from static file→endpoint mapping — verify against diff in Step
|
|
149
|
+
endpointLines = `**Likely changed in this PR (from static file→endpoint mapping — verify against diff in Step ${ANALYSIS_STEP_EXTRACT}):**\n${changedLines.join("\n") || " none"}\n\n**Other endpoints (reference only):**\n${otherLines.join("\n") || " none"}`;
|
|
128
150
|
}
|
|
129
151
|
else {
|
|
130
152
|
endpointLines = allEndpoints
|
|
@@ -311,10 +333,11 @@ ${detailBlocks}
|
|
|
311
333
|
}
|
|
312
334
|
mainSection = buildFullRepoRecommendations(scored, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, isFrontendProject, isFrontendOnlyProject, externalCoverageFullRepo);
|
|
313
335
|
}
|
|
314
|
-
else if (isDiffScope
|
|
315
|
-
//
|
|
316
|
-
//
|
|
317
|
-
//
|
|
336
|
+
else if (isDiffScope) {
|
|
337
|
+
// Always use the full execution plan in diff scope — even when scored.length === 0
|
|
338
|
+
// (no pre-ranked scenarios). The execution plan includes the dynamic Code Review step,
|
|
339
|
+
// bug-catching insertion, and the dynamic bug-coverage gate
|
|
340
|
+
// that are critical for catching seeded bugs in new endpoints.
|
|
318
341
|
const externalCoverage = buildExternalCoverageSet(testLocations);
|
|
319
342
|
if (externalCoverage.size > 0) {
|
|
320
343
|
logger.info(`External test coverage keys: ${[...externalCoverage].join(", ")}`);
|
|
@@ -322,6 +345,7 @@ ${detailBlocks}
|
|
|
322
345
|
mainSection = buildExecutionPlan(scored, maxGen, topN, analysis.apiEndpoints.baseUrl, authHeaderValue, authSchemeSnippet, authTypeValue, seed, endpointCount, isUIOnlyPR, hasFrontendChanges, hasTraces, externalCoverage, analysis.existingTests.relevantExternalTestPaths ?? []);
|
|
323
346
|
}
|
|
324
347
|
else {
|
|
348
|
+
// Full-repo scope with no scored items — rare fallback
|
|
325
349
|
mainSection = `
|
|
326
350
|
## Draft Your Execution Plan
|
|
327
351
|
|
|
@@ -407,7 +431,7 @@ Only add NEW recommendations for code paths introduced in the latest commit.
|
|
|
407
431
|
prHistorySection = `
|
|
408
432
|
## PR History (PR #${prContext.prNumber})
|
|
409
433
|
Tests from prior bot runs are still in the working tree — the maintenance pipeline
|
|
410
|
-
(
|
|
434
|
+
(${taskRef(TASK_GENERATE)}) keeps them up to date. Use the history below to **avoid duplicating** existing
|
|
411
435
|
coverage and to fill gaps:
|
|
412
436
|
- **Do NOT re-recommend** tests listed under "Previously Generated Tests" — they already
|
|
413
437
|
exist and are maintained automatically.
|
|
@@ -419,7 +443,7 @@ coverage and to fill gaps:
|
|
|
419
443
|
- **Stability**: When the code diff between commits is small, the recommendation set
|
|
420
444
|
should be mostly stable. Do not churn recommendations without cause.
|
|
421
445
|
- If prior execution results show failures, note the issue but do not re-recommend
|
|
422
|
-
the test —
|
|
446
|
+
the test — ${taskRef(TASK_GENERATE)} handles fixes for existing tests.
|
|
423
447
|
${historyBody}`;
|
|
424
448
|
}
|
|
425
449
|
// ── Compose all sections ──
|
|
@@ -436,7 +460,7 @@ ${modePreamble}
|
|
|
436
460
|
Scope: ${scopeNote}
|
|
437
461
|
|
|
438
462
|
${sourcePriority}
|
|
439
|
-
|
|
463
|
+
${uiRecRulesSection}
|
|
440
464
|
<repository_context>
|
|
441
465
|
## Repository Context
|
|
442
466
|
|
|
@@ -485,7 +509,7 @@ ${isDiffScope
|
|
|
485
509
|
|
|
486
510
|
${mainSection}
|
|
487
511
|
|
|
488
|
-
${isDiffScope ? buildVerificationChecklist(topN, maxGen) : ""}
|
|
512
|
+
${isDiffScope ? buildVerificationChecklist(topN, maxGen, EXEC_STEP_CODE_REVIEW) : ""}
|
|
489
513
|
</instructions>
|
|
490
514
|
`;
|
|
491
515
|
}
|
|
@@ -3,6 +3,7 @@ jest.mock("@skyramp/skyramp", () => ({
|
|
|
3
3
|
}));
|
|
4
4
|
import { TestType } from "../../types/TestTypes.js";
|
|
5
5
|
import { buildRecommendationPrompt, buildExternalCoverageSet, externalDedupKey } from "./test-recommendation-prompt.js";
|
|
6
|
+
import { buildExecutionPlan } from "./diffExecutionPlan.js";
|
|
6
7
|
import { PATH_PARAM_UUID_GUIDANCE, MAX_TESTS_TO_GENERATE, buildTestQualityCriteria, buildArchitectPreamble, buildContextFetchingGuidance, buildReasoningProtocol, buildFewShotExamples, buildVerificationChecklist, } from "./recommendationSections.js";
|
|
7
8
|
import { AnalysisScope } from "../../types/RepositoryAnalysis.js";
|
|
8
9
|
// ---------------------------------------------------------------------------
|
|
@@ -757,6 +758,156 @@ describe("buildRecommendationPrompt — GENERATE slot allocation", () => {
|
|
|
757
758
|
expect(prompt).not.toContain("ui-test-for-changed-components");
|
|
758
759
|
expect(prompt).not.toContain("ui_mixed_pr_trace.zip");
|
|
759
760
|
});
|
|
761
|
+
it("promotes attack-surface security boundaries into generated slots", () => {
|
|
762
|
+
const attackSurface = minimalScenario({
|
|
763
|
+
scenarioName: "flows-bulk-delete-post-auth-boundary",
|
|
764
|
+
description: "Attack-surface auth boundary: /api/flows/bulk_delete is a destructive sibling of changed DELETE /api/flows/{id}; verify it rejects missing authentication",
|
|
765
|
+
category: "security_boundary",
|
|
766
|
+
priority: "high",
|
|
767
|
+
testType: TestType.CONTRACT,
|
|
768
|
+
steps: [{
|
|
769
|
+
order: 1,
|
|
770
|
+
method: "POST",
|
|
771
|
+
path: "/api/flows/bulk_delete",
|
|
772
|
+
description: "POST /api/flows/bulk_delete without auth",
|
|
773
|
+
interactionType: "error",
|
|
774
|
+
expectedStatusCode: 401,
|
|
775
|
+
}],
|
|
776
|
+
});
|
|
777
|
+
const directDelete = minimalScenario({
|
|
778
|
+
scenarioName: "flows-delete-auth-boundary",
|
|
779
|
+
description: "Auth boundary: DELETE /api/flows/{id} rejects missing authentication",
|
|
780
|
+
category: "security_boundary",
|
|
781
|
+
priority: "high",
|
|
782
|
+
testType: TestType.CONTRACT,
|
|
783
|
+
steps: [{
|
|
784
|
+
order: 1,
|
|
785
|
+
method: "DELETE",
|
|
786
|
+
path: "/api/flows/{id}",
|
|
787
|
+
description: "DELETE /api/flows/{id} without auth",
|
|
788
|
+
interactionType: "error",
|
|
789
|
+
expectedStatusCode: 401,
|
|
790
|
+
}],
|
|
791
|
+
});
|
|
792
|
+
const validation = minimalScenario({
|
|
793
|
+
scenarioName: "flows-delete-invalid-id",
|
|
794
|
+
description: "Validate malformed IDs",
|
|
795
|
+
category: "data_validation",
|
|
796
|
+
priority: "medium",
|
|
797
|
+
testType: TestType.CONTRACT,
|
|
798
|
+
steps: [{
|
|
799
|
+
order: 1,
|
|
800
|
+
method: "DELETE",
|
|
801
|
+
path: "/api/flows/not-a-uuid",
|
|
802
|
+
description: "DELETE /api/flows/not-a-uuid",
|
|
803
|
+
interactionType: "error",
|
|
804
|
+
expectedStatusCode: 422,
|
|
805
|
+
}],
|
|
806
|
+
});
|
|
807
|
+
const analysis = minimalAnalysis({
|
|
808
|
+
businessContext: {
|
|
809
|
+
mainPurpose: "Test",
|
|
810
|
+
userFlows: [],
|
|
811
|
+
dataFlows: [],
|
|
812
|
+
integrationPatterns: [],
|
|
813
|
+
draftedScenarios: [directDelete, validation, attackSurface],
|
|
814
|
+
},
|
|
815
|
+
branchDiffContext: {
|
|
816
|
+
baseBranch: "main",
|
|
817
|
+
currentBranch: "feature/admin-key",
|
|
818
|
+
changedFiles: ["src/prefect/server/api/flows.py"],
|
|
819
|
+
newEndpoints: [],
|
|
820
|
+
modifiedEndpoints: [{
|
|
821
|
+
path: "/api/flows/{id}",
|
|
822
|
+
methods: [{ method: "DELETE", sourceFile: "flows.py", changeType: "modified" }],
|
|
823
|
+
}],
|
|
824
|
+
affectedServices: [],
|
|
825
|
+
},
|
|
826
|
+
});
|
|
827
|
+
const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 5, undefined, undefined, undefined, undefined, 2);
|
|
828
|
+
const attackIdx = prompt.indexOf("POST /api/flows/bulk_delete → 401");
|
|
829
|
+
const directIdx = prompt.indexOf("DELETE /api/flows/{id} → 401");
|
|
830
|
+
expect(attackIdx).toBeGreaterThanOrEqual(0);
|
|
831
|
+
expect(directIdx).toBeGreaterThanOrEqual(0);
|
|
832
|
+
expect(attackIdx).toBeLessThan(directIdx);
|
|
833
|
+
expect(prompt).toContain("#1 — GENERATE** | contract | security_boundary");
|
|
834
|
+
expect(prompt).toContain("preserve attack-surface `security_boundary` items");
|
|
835
|
+
});
|
|
836
|
+
it("does not external-dedup protected bug-catching or attack-surface scenarios", () => {
|
|
837
|
+
const attackSurface = minimalScenario({
|
|
838
|
+
scenarioName: "flows-bulk-delete-post-auth-boundary",
|
|
839
|
+
description: "Verify destructive sibling rejects missing authentication",
|
|
840
|
+
category: "security_boundary",
|
|
841
|
+
isAttackSurfaceSecurityBoundary: true,
|
|
842
|
+
testType: TestType.CONTRACT,
|
|
843
|
+
steps: [{ order: 1, method: "POST", path: "/api/flows/bulk_delete", description: "POST /api/flows/bulk_delete without auth", interactionType: "error", expectedStatusCode: 401 }],
|
|
844
|
+
});
|
|
845
|
+
const bugCaught = minimalScenario({
|
|
846
|
+
scenarioName: "orders-discount-bug-caught",
|
|
847
|
+
description: "Bug-catching test: discount should subtract from total",
|
|
848
|
+
category: "bug_caught",
|
|
849
|
+
testType: TestType.CONTRACT,
|
|
850
|
+
steps: [{ order: 1, method: "POST", path: "/api/orders", description: "POST /api/orders exposes discount bug", interactionType: "success", expectedStatusCode: 201 }],
|
|
851
|
+
});
|
|
852
|
+
const ordinary = minimalScenario({
|
|
853
|
+
scenarioName: "customers-create-contract",
|
|
854
|
+
description: "Create customer contract",
|
|
855
|
+
category: "crud",
|
|
856
|
+
testType: TestType.CONTRACT,
|
|
857
|
+
steps: [{ order: 1, method: "POST", path: "/api/customers", description: "POST /api/customers", interactionType: "success", expectedStatusCode: 201 }],
|
|
858
|
+
});
|
|
859
|
+
const prompt = buildExecutionPlan([attackSurface, bugCaught, ordinary].map((scenario) => ({
|
|
860
|
+
scenario,
|
|
861
|
+
priority: scenario.category === "crud" ? "LOW" : "CRITICAL",
|
|
862
|
+
novelty: "modified",
|
|
863
|
+
})), 3, 3, "http://localhost:3000", "Authorization", ", authScheme: \"Bearer\"", "", "seed", 3, false, false, false, new Set(["POST::flows::contract", "POST::orders::contract", "POST::customers::contract"]));
|
|
864
|
+
expect(prompt).toContain("POST /api/flows/bulk_delete → 401");
|
|
865
|
+
expect(prompt).toContain("POST /api/orders → 201");
|
|
866
|
+
expect(prompt).not.toContain("POST /api/customers → 201");
|
|
867
|
+
expect(prompt).toContain("except for `bug_caught` and attack-surface `security_boundary` items");
|
|
868
|
+
});
|
|
869
|
+
it("keeps symmetric attack-surface siblings together before ordinary auth boundaries", () => {
|
|
870
|
+
const directWidgets = minimalScenario({
|
|
871
|
+
scenarioName: "widgets-delete-auth-boundary",
|
|
872
|
+
description: "Auth boundary: DELETE /api/widgets/{id} rejects missing authentication",
|
|
873
|
+
category: "security_boundary",
|
|
874
|
+
testType: TestType.CONTRACT,
|
|
875
|
+
steps: [{ order: 1, method: "DELETE", path: "/api/widgets/{id}", description: "DELETE /api/widgets/{id} without auth", interactionType: "error", expectedStatusCode: 401 }],
|
|
876
|
+
});
|
|
877
|
+
const directGadgets = minimalScenario({
|
|
878
|
+
scenarioName: "gadgets-delete-auth-boundary",
|
|
879
|
+
description: "Auth boundary: DELETE /api/gadgets/{id} rejects missing authentication",
|
|
880
|
+
category: "security_boundary",
|
|
881
|
+
testType: TestType.CONTRACT,
|
|
882
|
+
steps: [{ order: 1, method: "DELETE", path: "/api/gadgets/{id}", description: "DELETE /api/gadgets/{id} without auth", interactionType: "error", expectedStatusCode: 401 }],
|
|
883
|
+
});
|
|
884
|
+
const widgetsBulk = minimalScenario({
|
|
885
|
+
scenarioName: "widgets-bulk-delete-auth-boundary",
|
|
886
|
+
description: "Attack-surface auth boundary: /api/widgets/bulk_delete is a destructive sibling of changed DELETE /api/widgets/{id}; verify it rejects missing authentication",
|
|
887
|
+
category: "security_boundary",
|
|
888
|
+
testType: TestType.CONTRACT,
|
|
889
|
+
steps: [{ order: 1, method: "POST", path: "/api/widgets/bulk_delete", description: "POST /api/widgets/bulk_delete without auth", interactionType: "error", expectedStatusCode: 401 }],
|
|
890
|
+
});
|
|
891
|
+
const gadgetsBulk = minimalScenario({
|
|
892
|
+
scenarioName: "gadgets-bulk-delete-auth-boundary",
|
|
893
|
+
description: "Attack-surface auth boundary: /api/gadgets/bulk_delete is a destructive sibling of changed DELETE /api/gadgets/{id}; verify it rejects missing authentication",
|
|
894
|
+
category: "security_boundary",
|
|
895
|
+
testType: TestType.CONTRACT,
|
|
896
|
+
steps: [{ order: 1, method: "POST", path: "/api/gadgets/bulk_delete", description: "POST /api/gadgets/bulk_delete without auth", interactionType: "error", expectedStatusCode: 401 }],
|
|
897
|
+
});
|
|
898
|
+
const prompt = buildExecutionPlan([directWidgets, directGadgets, widgetsBulk, gadgetsBulk].map((scenario) => ({
|
|
899
|
+
scenario,
|
|
900
|
+
priority: "CRITICAL",
|
|
901
|
+
novelty: "modified",
|
|
902
|
+
})), 3, 4, "http://localhost:3000", "Authorization", ", authScheme: \"Bearer\"", "", "seed", 4, false);
|
|
903
|
+
const generatedBlock = prompt.slice(0, prompt.indexOf("#4 [ADDITIONAL]"));
|
|
904
|
+
const additionalBlock = prompt.slice(prompt.indexOf("#4 [ADDITIONAL]"));
|
|
905
|
+
expect(generatedBlock).toContain("POST /api/widgets/bulk_delete → 401");
|
|
906
|
+
expect(generatedBlock).toContain("POST /api/gadgets/bulk_delete → 401");
|
|
907
|
+
expect(additionalBlock).not.toContain("POST /api/widgets/bulk_delete → 401");
|
|
908
|
+
expect(additionalBlock).not.toContain("POST /api/gadgets/bulk_delete → 401");
|
|
909
|
+
expect(additionalBlock).toContain("DELETE /api/gadgets/{id} → 401");
|
|
910
|
+
});
|
|
760
911
|
});
|
|
761
912
|
// ---------------------------------------------------------------------------
|
|
762
913
|
// Tests — buildTestQualityCriteria contract-test guidance (regression guard)
|
|
@@ -1175,6 +1326,12 @@ describe("buildVerificationChecklist — self-check at end of prompt", () => {
|
|
|
1175
1326
|
const checklist = buildVerificationChecklist(10, 3);
|
|
1176
1327
|
expect(checklist).toContain("bugCatchingTarget");
|
|
1177
1328
|
});
|
|
1329
|
+
it("includes issue coverage check with promotion cap", () => {
|
|
1330
|
+
const checklist = buildVerificationChecklist(10, 3);
|
|
1331
|
+
expect(checklist).toContain("Issue coverage");
|
|
1332
|
+
expect(checklist).toContain("highest-severity flaw");
|
|
1333
|
+
expect(checklist).toContain("At most one promotion per run");
|
|
1334
|
+
});
|
|
1178
1335
|
it("includes distinct code path check", () => {
|
|
1179
1336
|
const checklist = buildVerificationChecklist(10, 3);
|
|
1180
1337
|
expect(checklist).toContain("distinct code path");
|
|
@@ -1243,7 +1400,7 @@ describe("buildRecommendationPrompt — reduced over-prompting", () => {
|
|
|
1243
1400
|
});
|
|
1244
1401
|
const prompt = buildRecommendationPrompt(analysis, AnalysisScope.CurrentBranchDiff, 10);
|
|
1245
1402
|
expect(prompt).not.toContain("(MANDATORY before executing anything)");
|
|
1246
|
-
expect(prompt).toContain("(before
|
|
1403
|
+
expect(prompt).toContain("(before anything else)");
|
|
1247
1404
|
});
|
|
1248
1405
|
it("uses XML tags in context fetching guidance", () => {
|
|
1249
1406
|
const guidance = buildContextFetchingGuidance("session-1");
|
|
@@ -1606,3 +1763,67 @@ describe("UI grounding guidance (Phase C D-1.a)", () => {
|
|
|
1606
1763
|
expect(out).toContain("Do NOT silently produce ungrounded reasoning");
|
|
1607
1764
|
});
|
|
1608
1765
|
});
|
|
1766
|
+
// ---------------------------------------------------------------------------
|
|
1767
|
+
// Tests — UI recommendation authoring rules
|
|
1768
|
+
// ---------------------------------------------------------------------------
|
|
1769
|
+
//
|
|
1770
|
+
// The recommendation prompt always emits a "UI Recommendation Authoring Rules"
|
|
1771
|
+
// section that anchors UI rec reasoning. Earlier iterations of this code
|
|
1772
|
+
// accepted a `capturedBlueprints` parameter and rendered the captures inline,
|
|
1773
|
+
// but that was redundant: the agent has the captures in its own tool-result
|
|
1774
|
+
// history. The prompt now ships the rules; the agent supplies the vocabulary.
|
|
1775
|
+
describe("buildRecommendationPrompt UI authoring rules", () => {
|
|
1776
|
+
function minimalDiffAnalysis() {
|
|
1777
|
+
return minimalAnalysis({
|
|
1778
|
+
branchDiffContext: {
|
|
1779
|
+
baseBranch: "main",
|
|
1780
|
+
currentBranch: "feature/test",
|
|
1781
|
+
changedFiles: ["src/components/OrderForm.tsx"],
|
|
1782
|
+
newEndpoints: [],
|
|
1783
|
+
modifiedEndpoints: [],
|
|
1784
|
+
affectedServices: [],
|
|
1785
|
+
},
|
|
1786
|
+
});
|
|
1787
|
+
}
|
|
1788
|
+
it("emits UI authoring rules wrapped in an XML tag, unconditionally", () => {
|
|
1789
|
+
const analysis = minimalDiffAnalysis();
|
|
1790
|
+
const prompt = buildRecommendationPrompt(analysis);
|
|
1791
|
+
expect(prompt).toContain("<ui_recommendation_authoring_rules>");
|
|
1792
|
+
expect(prompt).toContain("</ui_recommendation_authoring_rules>");
|
|
1793
|
+
expect(prompt).toMatch(/do NOT mention "blueprint"/i);
|
|
1794
|
+
expect(prompt).toMatch(/do not invent element names/i);
|
|
1795
|
+
});
|
|
1796
|
+
it("does not render a 'Captured Blueprints' data section (param removed)", () => {
|
|
1797
|
+
// Regression on the previous design: capturedBlueprints used to be
|
|
1798
|
+
// threaded through the call and rendered as a "## Captured Blueprints"
|
|
1799
|
+
// data section. That path is gone — the agent's own browser_blueprint
|
|
1800
|
+
// tool-result history is the source of truth for element vocabulary.
|
|
1801
|
+
const analysis = minimalDiffAnalysis();
|
|
1802
|
+
const prompt = buildRecommendationPrompt(analysis);
|
|
1803
|
+
expect(prompt).not.toContain("## Captured Blueprints");
|
|
1804
|
+
});
|
|
1805
|
+
it("instructs the LLM to ground UI recommendations in elements observed via earlier browser_blueprint calls", () => {
|
|
1806
|
+
const analysis = minimalDiffAnalysis();
|
|
1807
|
+
const prompt = buildRecommendationPrompt(analysis);
|
|
1808
|
+
expect(prompt).toMatch(/ground the [`]?reasoning[`]?\s*field in elements you have actually observed/i);
|
|
1809
|
+
expect(prompt).toMatch(/inform.*how.*describe.*not.*which/i);
|
|
1810
|
+
});
|
|
1811
|
+
it("instructs the LLM not to leak internal MCP terminology into reasoning", () => {
|
|
1812
|
+
const analysis = minimalDiffAnalysis();
|
|
1813
|
+
const prompt = buildRecommendationPrompt(analysis);
|
|
1814
|
+
expect(prompt).toMatch(/do NOT mention "blueprint"/i);
|
|
1815
|
+
expect(prompt).toMatch(/leak builder internals/i);
|
|
1816
|
+
});
|
|
1817
|
+
it("does not abbreviate 'recommendation' to 'rec' in the rules section", () => {
|
|
1818
|
+
// Per PR review: shortform 'rec' invites the LLM to hallucinate the term.
|
|
1819
|
+
// Spell it out to keep the language consistent with the rest of the prompt.
|
|
1820
|
+
const analysis = minimalDiffAnalysis();
|
|
1821
|
+
const prompt = buildRecommendationPrompt(analysis);
|
|
1822
|
+
const rulesStart = prompt.indexOf("<ui_recommendation_authoring_rules>");
|
|
1823
|
+
const rulesEnd = prompt.indexOf("</ui_recommendation_authoring_rules>");
|
|
1824
|
+
const rulesBlock = prompt.slice(rulesStart, rulesEnd);
|
|
1825
|
+
// Allow "recs" as a substring within "recommendations" — match only the
|
|
1826
|
+
// standalone word.
|
|
1827
|
+
expect(rulesBlock).not.toMatch(/\b(rec|recs)\b/);
|
|
1828
|
+
});
|
|
1829
|
+
});
|