@ryuenn3123/agentic-senior-core 3.0.17 → 3.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-context/prompts/bootstrap-design.md +84 -94
- package/.agent-context/prompts/init-project.md +32 -100
- package/.agent-context/prompts/refactor.md +22 -44
- package/.agent-context/prompts/review-code.md +28 -52
- package/.agent-context/review-checklists/architecture-review.md +31 -62
- package/.agent-context/review-checklists/pr-checklist.md +74 -108
- package/.agent-context/rules/api-docs.md +18 -206
- package/.agent-context/rules/architecture.md +40 -207
- package/.agent-context/rules/database-design.md +10 -199
- package/.agent-context/rules/docker-runtime.md +5 -5
- package/.agent-context/rules/efficiency-vs-hype.md +11 -149
- package/.agent-context/rules/error-handling.md +9 -231
- package/.agent-context/rules/event-driven.md +17 -221
- package/.agent-context/rules/frontend-architecture.md +66 -119
- package/.agent-context/rules/git-workflow.md +1 -1
- package/.agent-context/rules/microservices.md +28 -161
- package/.agent-context/rules/naming-conv.md +8 -138
- package/.agent-context/rules/performance.md +9 -175
- package/.agent-context/rules/realtime.md +11 -44
- package/.agent-context/rules/security.md +11 -295
- package/.agent-context/rules/testing.md +9 -174
- package/.agent-context/state/benchmark-analysis.json +3 -3
- package/.agent-context/state/memory-continuity-benchmark.json +1 -1
- package/.agent-context/state/onboarding-report.json +71 -11
- package/.agents/workflows/init-project.md +7 -24
- package/.agents/workflows/refactor.md +7 -24
- package/.agents/workflows/review-code.md +7 -24
- package/.cursorrules +22 -21
- package/.gemini/instructions.md +2 -2
- package/.github/copilot-instructions.md +2 -2
- package/.instructions.md +112 -213
- package/.windsurfrules +22 -21
- package/AGENTS.md +4 -4
- package/CONTRIBUTING.md +13 -22
- package/README.md +6 -20
- package/lib/cli/commands/init.mjs +102 -148
- package/lib/cli/commands/launch.mjs +3 -3
- package/lib/cli/commands/optimize.mjs +14 -4
- package/lib/cli/commands/upgrade.mjs +25 -23
- package/lib/cli/compiler.mjs +96 -62
- package/lib/cli/constants.mjs +28 -136
- package/lib/cli/detector/design-evidence.mjs +189 -6
- package/lib/cli/detector.mjs +6 -7
- package/lib/cli/init-detection-flow.mjs +10 -93
- package/lib/cli/init-selection.mjs +2 -68
- package/lib/cli/project-scaffolder/constants.mjs +1 -1
- package/lib/cli/project-scaffolder/design-contract.mjs +438 -335
- package/lib/cli/project-scaffolder/discovery.mjs +36 -82
- package/lib/cli/project-scaffolder/prompt-builders.mjs +55 -63
- package/lib/cli/project-scaffolder/storage.mjs +0 -4
- package/lib/cli/token-optimization.mjs +1 -1
- package/lib/cli/utils.mjs +75 -9
- package/package.json +2 -2
- package/scripts/detection-benchmark.mjs +4 -15
- package/scripts/documentation-boundary-audit.mjs +9 -9
- package/scripts/explain-on-demand-audit.mjs +11 -11
- package/scripts/forbidden-content-check.mjs +9 -9
- package/scripts/frontend-usability-audit.mjs +57 -36
- package/scripts/llm-judge.mjs +1 -1
- package/scripts/mcp-server/constants.mjs +60 -0
- package/scripts/mcp-server/tool-registry.mjs +149 -0
- package/scripts/mcp-server/tools.mjs +446 -0
- package/scripts/mcp-server.mjs +23 -661
- package/scripts/release-gate/audit-checks.mjs +426 -0
- package/scripts/release-gate/constants.mjs +53 -0
- package/scripts/release-gate/runtime.mjs +63 -0
- package/scripts/release-gate/static-checks.mjs +182 -0
- package/scripts/release-gate.mjs +13 -794
- package/scripts/rules-guardian-audit.mjs +14 -13
- package/scripts/single-source-lazy-loading-audit.mjs +3 -3
- package/scripts/sync-thin-adapters.mjs +5 -5
- package/scripts/ui-design-judge/constants.mjs +24 -0
- package/scripts/ui-design-judge/design-execution-summary.mjs +259 -0
- package/scripts/ui-design-judge/git-input.mjs +131 -0
- package/scripts/ui-design-judge/prompting.mjs +73 -0
- package/scripts/ui-design-judge/providers.mjs +102 -0
- package/scripts/ui-design-judge/reporting.mjs +182 -0
- package/scripts/ui-design-judge/rubric-calibration.mjs +214 -0
- package/scripts/ui-design-judge/rubric-goldset.json +188 -0
- package/scripts/ui-design-judge.mjs +166 -771
- package/scripts/ui-rubric-calibration.mjs +35 -0
- package/scripts/validate/config.mjs +198 -55
- package/scripts/validate/coverage-checks.mjs +32 -7
- package/scripts/validate.mjs +8 -4
- package/lib/cli/architect.mjs +0 -431
|
@@ -4,740 +4,88 @@
|
|
|
4
4
|
/**
|
|
5
5
|
* ui-design-judge.mjs
|
|
6
6
|
*
|
|
7
|
-
* Advisory-
|
|
7
|
+
* Advisory-default UI design contract judge.
|
|
8
8
|
*
|
|
9
9
|
* Repo-internal workflow audit; no user-facing runtime modes.
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
const DESIGN_INTENT_PATH = resolve(REPOSITORY_ROOT, 'docs', 'design-intent.json');
|
|
25
|
-
const DESIGN_GUIDE_PATH = resolve(REPOSITORY_ROOT, 'docs', 'DESIGN.md');
|
|
26
|
-
const MAX_DIFF_CHARS = 12000;
|
|
27
|
-
const UI_FILE_EXTENSIONS = new Set(['.js', '.jsx', '.ts', '.tsx', '.vue', '.css', '.scss', '.sass']);
|
|
28
|
-
const DEFAULT_VISUAL_DIFF_REPORT_VERSION = 'hybrid-visual-diff-v1';
|
|
29
|
-
const DEFAULT_REQUIRED_VIEWPORTS = ['mobile', 'tablet', 'desktop'];
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* @typedef {{
|
|
33
|
-
* area: string,
|
|
34
|
-
* severity: string,
|
|
35
|
-
* problem: string,
|
|
36
|
-
* evidence: string,
|
|
37
|
-
* recommendation: string,
|
|
38
|
-
* blockingRecommended: boolean,
|
|
39
|
-
* }} DriftFinding
|
|
40
|
-
*/
|
|
41
|
-
|
|
42
|
-
/**
|
|
43
|
-
* @typedef {{
|
|
44
|
-
* generatedAt: string,
|
|
45
|
-
* auditName: string,
|
|
46
|
-
* schemaVersion: string,
|
|
47
|
-
* mode: 'advisory',
|
|
48
|
-
* advisoryOnly: boolean,
|
|
49
|
-
* passed: boolean,
|
|
50
|
-
* skipped: boolean,
|
|
51
|
-
* skipReason: string | null,
|
|
52
|
-
* provider: string,
|
|
53
|
-
* ciProvider: string,
|
|
54
|
-
* contractPresent: boolean,
|
|
55
|
-
* summary: {
|
|
56
|
-
* changedUiFileCount: number,
|
|
57
|
-
* alignmentScore: number | null,
|
|
58
|
-
* driftCount: number,
|
|
59
|
-
* blockingCandidateCount: number,
|
|
60
|
-
* meaningfulDiffViewportCount: number,
|
|
61
|
-
* },
|
|
62
|
-
* deterministicVisual: {
|
|
63
|
-
* reportPresent: boolean,
|
|
64
|
-
* reportVersion: string | null,
|
|
65
|
-
* baselineStrategy: string | null,
|
|
66
|
-
* coverageComplete: boolean,
|
|
67
|
-
* sectionCoverageRequired: boolean,
|
|
68
|
-
* requiredViewports: string[],
|
|
69
|
-
* coveredViewports: string[],
|
|
70
|
-
* missingViewports: string[],
|
|
71
|
-
* requiredSectionTypes: string[],
|
|
72
|
-
* coveredSectionTypes: string[],
|
|
73
|
-
* missingSectionTypes: string[],
|
|
74
|
-
* meaningfulDiffViewports: string[],
|
|
75
|
-
* meaningfulDiffSectionTypes: string[],
|
|
76
|
-
* maskedViewportCount: number,
|
|
77
|
-
* sectionCaptureCount: number,
|
|
78
|
-
* tileCaptureCount: number,
|
|
79
|
-
* semanticEscalationRecommended: boolean,
|
|
80
|
-
* notes: string[],
|
|
81
|
-
* },
|
|
82
|
-
* semanticJudge: {
|
|
83
|
-
* attempted: boolean,
|
|
84
|
-
* skipped: boolean,
|
|
85
|
-
* skipReason: string | null,
|
|
86
|
-
* },
|
|
87
|
-
* malformedVerdict: boolean,
|
|
88
|
-
* providerError: boolean,
|
|
89
|
-
* findings: DriftFinding[],
|
|
90
|
-
* notes: string[],
|
|
91
|
-
* }} UiDesignJudgeReport
|
|
10
|
+
* Stays advisory by default for this repository workflow, but genericityAutoFail
|
|
11
|
+
* must escalate findings into blocking required actions when named drift signals
|
|
12
|
+
* or forbidden patterns are detected.
|
|
13
|
+
*
|
|
14
|
+
* Validation anchors for repo governance:
|
|
15
|
+
* - Do not reward generic SaaS defaults or popular template patterns.
|
|
16
|
+
* - UI design judge only evaluates changed UI surfaces.
|
|
17
|
+
* - Structured design execution summary was supplied to semantic review.
|
|
18
|
+
* - designExecutionSignalCount
|
|
19
|
+
* - designExecutionPolicy
|
|
20
|
+
* - designExecutionHandoff
|
|
21
|
+
* - handoffReady
|
|
22
|
+
* - structuredInspectionAvailable
|
|
92
23
|
*/
|
|
93
24
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
function collectGitDiff(baseSha, headSha) {
|
|
125
|
-
const execOptions = {
|
|
126
|
-
cwd: REPOSITORY_ROOT,
|
|
127
|
-
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
128
|
-
maxBuffer: 1024 * 1024 * 8,
|
|
129
|
-
};
|
|
130
|
-
|
|
131
|
-
return execSync(`git diff "${baseSha}...${headSha}"`, execOptions);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
function collectGitChangedFiles(baseSha, headSha) {
|
|
135
|
-
const execOptions = {
|
|
136
|
-
cwd: REPOSITORY_ROOT,
|
|
137
|
-
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
138
|
-
maxBuffer: 1024 * 1024 * 2,
|
|
139
|
-
};
|
|
140
|
-
|
|
141
|
-
const output = execSync(`git diff --name-only "${baseSha}...${headSha}"`, execOptions);
|
|
142
|
-
return output
|
|
143
|
-
.split(/\r?\n/u)
|
|
144
|
-
.map((filePath) => filePath.trim())
|
|
145
|
-
.filter(Boolean);
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
function collectPullRequestDiff() {
|
|
149
|
-
if (process.env.PR_DIFF) {
|
|
150
|
-
return process.env.PR_DIFF;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
const githubBaseSha = process.env.GITHUB_BASE_SHA;
|
|
154
|
-
const githubHeadSha = process.env.GITHUB_HEAD_SHA ?? 'HEAD';
|
|
155
|
-
if (githubBaseSha) {
|
|
156
|
-
return collectGitDiff(githubBaseSha, githubHeadSha);
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
const gitlabBaseSha = process.env.CI_MERGE_REQUEST_DIFF_BASE_SHA;
|
|
160
|
-
const gitlabHeadSha = process.env.CI_COMMIT_SHA ?? 'HEAD';
|
|
161
|
-
if (gitlabBaseSha) {
|
|
162
|
-
return collectGitDiff(gitlabBaseSha, gitlabHeadSha);
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
try {
|
|
166
|
-
return execSync('git diff HEAD~1 HEAD', {
|
|
167
|
-
cwd: REPOSITORY_ROOT,
|
|
168
|
-
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
169
|
-
maxBuffer: 1024 * 1024 * 8,
|
|
170
|
-
});
|
|
171
|
-
} catch {
|
|
172
|
-
try {
|
|
173
|
-
const emptyTreeSha = '4b825dc642cb6eb9a060e54bf8d69288fbee4904';
|
|
174
|
-
return execSync(`git diff "${emptyTreeSha}" HEAD`, {
|
|
175
|
-
cwd: REPOSITORY_ROOT,
|
|
176
|
-
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
177
|
-
maxBuffer: 1024 * 1024 * 8,
|
|
178
|
-
});
|
|
179
|
-
} catch {
|
|
180
|
-
return '';
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
function collectChangedFiles() {
|
|
186
|
-
if (process.env.PR_DIFF) {
|
|
187
|
-
const filePathSet = new Set();
|
|
188
|
-
for (const diffHeaderMatch of process.env.PR_DIFF.matchAll(/^diff --git a\/(.+?) b\/(.+)$/gm)) {
|
|
189
|
-
filePathSet.add(diffHeaderMatch[2]);
|
|
190
|
-
}
|
|
191
|
-
return Array.from(filePathSet);
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
const githubBaseSha = process.env.GITHUB_BASE_SHA;
|
|
195
|
-
const githubHeadSha = process.env.GITHUB_HEAD_SHA ?? 'HEAD';
|
|
196
|
-
if (githubBaseSha) {
|
|
197
|
-
return collectGitChangedFiles(githubBaseSha, githubHeadSha);
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
const gitlabBaseSha = process.env.CI_MERGE_REQUEST_DIFF_BASE_SHA;
|
|
201
|
-
const gitlabHeadSha = process.env.CI_COMMIT_SHA ?? 'HEAD';
|
|
202
|
-
if (gitlabBaseSha) {
|
|
203
|
-
return collectGitChangedFiles(gitlabBaseSha, gitlabHeadSha);
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
try {
|
|
207
|
-
const output = execSync('git diff --name-only HEAD~1 HEAD', {
|
|
208
|
-
cwd: REPOSITORY_ROOT,
|
|
209
|
-
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
210
|
-
maxBuffer: 1024 * 1024 * 2,
|
|
211
|
-
});
|
|
212
|
-
return output.split(/\r?\n/u).map((filePath) => filePath.trim()).filter(Boolean);
|
|
213
|
-
} catch {
|
|
214
|
-
return [];
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
function isUiRelevantFilePath(filePath) {
|
|
219
|
-
const normalizedFilePath = String(filePath || '').replace(/\\/g, '/').toLowerCase();
|
|
220
|
-
const fileExtension = extname(normalizedFilePath);
|
|
221
|
-
|
|
222
|
-
if (!UI_FILE_EXTENSIONS.has(fileExtension)) {
|
|
223
|
-
return false;
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
return (
|
|
227
|
-
normalizedFilePath.startsWith('src/')
|
|
228
|
-
|| normalizedFilePath.startsWith('app/')
|
|
229
|
-
|| normalizedFilePath.startsWith('pages/')
|
|
230
|
-
|| normalizedFilePath.startsWith('components/')
|
|
231
|
-
|| normalizedFilePath.startsWith('styles/')
|
|
232
|
-
|| normalizedFilePath.includes('/components/')
|
|
233
|
-
|| normalizedFilePath.includes('/screens/')
|
|
234
|
-
|| normalizedFilePath.includes('/layouts/')
|
|
235
|
-
);
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
function loadDesignIntent() {
|
|
239
|
-
if (!existsSync(DESIGN_INTENT_PATH)) {
|
|
240
|
-
return null;
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
try {
|
|
244
|
-
return JSON.parse(readFileSync(DESIGN_INTENT_PATH, 'utf8'));
|
|
245
|
-
} catch {
|
|
246
|
-
return null;
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
function loadDesignGuide() {
|
|
251
|
-
if (!existsSync(DESIGN_GUIDE_PATH)) {
|
|
252
|
-
return '';
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
return readFileSync(DESIGN_GUIDE_PATH, 'utf8');
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
function toFiniteRatio(rawValue) {
|
|
259
|
-
return typeof rawValue === 'number' && Number.isFinite(rawValue)
|
|
260
|
-
? rawValue
|
|
261
|
-
: null;
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
function normalizeStringArray(rawValue) {
|
|
265
|
-
if (!Array.isArray(rawValue)) {
|
|
266
|
-
return [];
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
return rawValue
|
|
270
|
-
.map((entryValue) => String(entryValue || '').trim())
|
|
271
|
-
.filter(Boolean);
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
function loadDeterministicVisualReport() {
|
|
275
|
-
if (process.env.UI_VISUAL_DIFF_REPORT_JSON) {
|
|
276
|
-
try {
|
|
277
|
-
return JSON.parse(process.env.UI_VISUAL_DIFF_REPORT_JSON);
|
|
278
|
-
} catch {
|
|
279
|
-
return {
|
|
280
|
-
malformed: true,
|
|
281
|
-
notes: ['UI_VISUAL_DIFF_REPORT_JSON could not be parsed as JSON.'],
|
|
282
|
-
};
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
if (process.env.UI_VISUAL_DIFF_REPORT_PATH) {
|
|
287
|
-
const reportPath = resolve(REPOSITORY_ROOT, process.env.UI_VISUAL_DIFF_REPORT_PATH);
|
|
288
|
-
if (!existsSync(reportPath)) {
|
|
289
|
-
return {
|
|
290
|
-
malformed: true,
|
|
291
|
-
notes: [`UI_VISUAL_DIFF_REPORT_PATH does not exist: ${process.env.UI_VISUAL_DIFF_REPORT_PATH}`],
|
|
292
|
-
};
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
try {
|
|
296
|
-
return JSON.parse(readFileSync(reportPath, 'utf8'));
|
|
297
|
-
} catch {
|
|
298
|
-
return {
|
|
299
|
-
malformed: true,
|
|
300
|
-
notes: [`UI_VISUAL_DIFF_REPORT_PATH could not be parsed as JSON: ${process.env.UI_VISUAL_DIFF_REPORT_PATH}`],
|
|
301
|
-
};
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
return null;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
function summarizeDeterministicVisualReport(rawVisualReport, designIntentContent) {
|
|
309
|
-
const visualQaPolicy = designIntentContent?.visualQaPolicy && typeof designIntentContent.visualQaPolicy === 'object'
|
|
310
|
-
? designIntentContent.visualQaPolicy
|
|
311
|
-
: {};
|
|
312
|
-
const capturePlan = visualQaPolicy?.capturePlan && typeof visualQaPolicy.capturePlan === 'object'
|
|
313
|
-
? visualQaPolicy.capturePlan
|
|
314
|
-
: {};
|
|
315
|
-
const requiredViewports = normalizeStringArray(visualQaPolicy.requiredViewports);
|
|
316
|
-
const normalizedRequiredViewports = requiredViewports.length > 0 ? requiredViewports : DEFAULT_REQUIRED_VIEWPORTS;
|
|
317
|
-
const requiredSectionTypes = normalizeStringArray(capturePlan.requiredSectionTypes);
|
|
318
|
-
const meaningfulDiffRatioThreshold = toFiniteRatio(visualQaPolicy?.semanticEscalation?.meaningfulDiffRatioThreshold) ?? 0.01;
|
|
319
|
-
const maxUnmaskedDiffRatio = toFiniteRatio(visualQaPolicy?.stability?.maxUnmaskedDiffRatio) ?? 0.005;
|
|
320
|
-
const maxMaskedDiffRatio = toFiniteRatio(visualQaPolicy?.stability?.maxMaskedDiffRatio) ?? 0.02;
|
|
321
|
-
|
|
322
|
-
if (!rawVisualReport) {
|
|
323
|
-
return {
|
|
324
|
-
reportPresent: false,
|
|
325
|
-
reportVersion: null,
|
|
326
|
-
baselineStrategy: visualQaPolicy.baselineStrategy || null,
|
|
327
|
-
coverageComplete: false,
|
|
328
|
-
sectionCoverageRequired: capturePlan.requireSectionCapturesForLongPages === true,
|
|
329
|
-
requiredViewports: normalizedRequiredViewports,
|
|
330
|
-
coveredViewports: [],
|
|
331
|
-
missingViewports: normalizedRequiredViewports,
|
|
332
|
-
requiredSectionTypes,
|
|
333
|
-
coveredSectionTypes: [],
|
|
334
|
-
missingSectionTypes: requiredSectionTypes,
|
|
335
|
-
meaningfulDiffViewports: [],
|
|
336
|
-
meaningfulDiffSectionTypes: [],
|
|
337
|
-
maskedViewportCount: 0,
|
|
338
|
-
sectionCaptureCount: 0,
|
|
339
|
-
tileCaptureCount: 0,
|
|
340
|
-
semanticEscalationRecommended: false,
|
|
341
|
-
notes: ['No deterministic visual diff report was supplied.'],
|
|
342
|
-
};
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
if (rawVisualReport.malformed === true) {
|
|
346
|
-
return {
|
|
347
|
-
reportPresent: false,
|
|
348
|
-
reportVersion: null,
|
|
349
|
-
baselineStrategy: visualQaPolicy.baselineStrategy || null,
|
|
350
|
-
coverageComplete: false,
|
|
351
|
-
sectionCoverageRequired: capturePlan.requireSectionCapturesForLongPages === true,
|
|
352
|
-
requiredViewports: normalizedRequiredViewports,
|
|
353
|
-
coveredViewports: [],
|
|
354
|
-
missingViewports: normalizedRequiredViewports,
|
|
355
|
-
requiredSectionTypes,
|
|
356
|
-
coveredSectionTypes: [],
|
|
357
|
-
missingSectionTypes: requiredSectionTypes,
|
|
358
|
-
meaningfulDiffViewports: [],
|
|
359
|
-
meaningfulDiffSectionTypes: [],
|
|
360
|
-
maskedViewportCount: 0,
|
|
361
|
-
sectionCaptureCount: 0,
|
|
362
|
-
tileCaptureCount: 0,
|
|
363
|
-
semanticEscalationRecommended: true,
|
|
364
|
-
notes: normalizeStringArray(rawVisualReport.notes).length > 0
|
|
365
|
-
? normalizeStringArray(rawVisualReport.notes)
|
|
366
|
-
: ['Deterministic visual diff report was malformed.'],
|
|
367
|
-
};
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
const viewportResults = Array.isArray(rawVisualReport.viewportResults)
|
|
371
|
-
? rawVisualReport.viewportResults
|
|
372
|
-
.map((rawViewportResult) => {
|
|
373
|
-
const viewportName = String(rawViewportResult?.viewport || '').trim().toLowerCase();
|
|
374
|
-
const pixelDiffRatio = toFiniteRatio(rawViewportResult?.pixelDiffRatio);
|
|
375
|
-
const maskedPixelDiffRatio = toFiniteRatio(rawViewportResult?.maskedPixelDiffRatio);
|
|
376
|
-
const withinNoiseBudget = typeof rawViewportResult?.withinNoiseBudget === 'boolean'
|
|
377
|
-
? rawViewportResult.withinNoiseBudget
|
|
378
|
-
: (pixelDiffRatio === null || pixelDiffRatio <= maxUnmaskedDiffRatio)
|
|
379
|
-
&& (maskedPixelDiffRatio === null || maskedPixelDiffRatio <= maxMaskedDiffRatio);
|
|
380
|
-
const meaningfulDiff = typeof rawViewportResult?.meaningfulDiff === 'boolean'
|
|
381
|
-
? rawViewportResult.meaningfulDiff
|
|
382
|
-
: (pixelDiffRatio !== null && pixelDiffRatio > meaningfulDiffRatioThreshold)
|
|
383
|
-
|| (maskedPixelDiffRatio !== null && maskedPixelDiffRatio > meaningfulDiffRatioThreshold);
|
|
384
|
-
|
|
385
|
-
return {
|
|
386
|
-
viewport: viewportName,
|
|
387
|
-
pixelDiffRatio,
|
|
388
|
-
maskedPixelDiffRatio,
|
|
389
|
-
withinNoiseBudget,
|
|
390
|
-
meaningfulDiff,
|
|
391
|
-
dynamicMaskCategories: normalizeStringArray(rawViewportResult?.dynamicMaskCategories),
|
|
392
|
-
notes: normalizeStringArray(rawViewportResult?.notes),
|
|
393
|
-
};
|
|
394
|
-
})
|
|
395
|
-
.filter((viewportResult) => Boolean(viewportResult.viewport))
|
|
396
|
-
: [];
|
|
397
|
-
const sectionResults = Array.isArray(rawVisualReport.sectionResults)
|
|
398
|
-
? rawVisualReport.sectionResults
|
|
399
|
-
.map((rawSectionResult) => {
|
|
400
|
-
const sectionType = String(rawSectionResult?.sectionType || '').trim().toLowerCase();
|
|
401
|
-
const captureKind = String(rawSectionResult?.captureKind || '').trim().toLowerCase();
|
|
402
|
-
const tileIndex = Number.isInteger(rawSectionResult?.tileIndex) ? rawSectionResult.tileIndex : null;
|
|
403
|
-
const pixelDiffRatio = toFiniteRatio(rawSectionResult?.pixelDiffRatio);
|
|
404
|
-
const maskedPixelDiffRatio = toFiniteRatio(rawSectionResult?.maskedPixelDiffRatio);
|
|
405
|
-
const withinNoiseBudget = typeof rawSectionResult?.withinNoiseBudget === 'boolean'
|
|
406
|
-
? rawSectionResult.withinNoiseBudget
|
|
407
|
-
: (pixelDiffRatio === null || pixelDiffRatio <= maxUnmaskedDiffRatio)
|
|
408
|
-
&& (maskedPixelDiffRatio === null || maskedPixelDiffRatio <= maxMaskedDiffRatio);
|
|
409
|
-
const meaningfulDiff = typeof rawSectionResult?.meaningfulDiff === 'boolean'
|
|
410
|
-
? rawSectionResult.meaningfulDiff
|
|
411
|
-
: (pixelDiffRatio !== null && pixelDiffRatio > meaningfulDiffRatioThreshold)
|
|
412
|
-
|| (maskedPixelDiffRatio !== null && maskedPixelDiffRatio > meaningfulDiffRatioThreshold);
|
|
413
|
-
|
|
414
|
-
return {
|
|
415
|
-
sectionType,
|
|
416
|
-
captureKind,
|
|
417
|
-
tileIndex,
|
|
418
|
-
pixelDiffRatio,
|
|
419
|
-
maskedPixelDiffRatio,
|
|
420
|
-
withinNoiseBudget,
|
|
421
|
-
meaningfulDiff,
|
|
422
|
-
notes: normalizeStringArray(rawSectionResult?.notes),
|
|
423
|
-
};
|
|
424
|
-
})
|
|
425
|
-
.filter((sectionResult) => Boolean(sectionResult.sectionType))
|
|
426
|
-
: [];
|
|
427
|
-
|
|
428
|
-
const coveredViewports = Array.from(new Set(viewportResults.map((viewportResult) => viewportResult.viewport)));
|
|
429
|
-
const missingViewports = normalizedRequiredViewports.filter((requiredViewport) => !coveredViewports.includes(requiredViewport));
|
|
430
|
-
const sectionCoverageRequired = capturePlan.requireSectionCapturesForLongPages === true && (
|
|
431
|
-
rawVisualReport.requiresSectionCoverage === true
|
|
432
|
-
|| String(rawVisualReport.pageLengthCategory || '').trim().toLowerCase() === 'long'
|
|
433
|
-
|| sectionResults.length > 0
|
|
434
|
-
);
|
|
435
|
-
const coveredSectionTypes = Array.from(new Set(sectionResults.map((sectionResult) => sectionResult.sectionType)));
|
|
436
|
-
const missingSectionTypes = sectionCoverageRequired
|
|
437
|
-
? requiredSectionTypes.filter((requiredSectionType) => !coveredSectionTypes.includes(requiredSectionType))
|
|
438
|
-
: [];
|
|
439
|
-
const meaningfulDiffViewports = viewportResults
|
|
440
|
-
.filter((viewportResult) => viewportResult.meaningfulDiff)
|
|
441
|
-
.map((viewportResult) => viewportResult.viewport);
|
|
442
|
-
const meaningfulDiffSectionTypes = Array.from(new Set(
|
|
443
|
-
sectionResults
|
|
444
|
-
.filter((sectionResult) => sectionResult.meaningfulDiff)
|
|
445
|
-
.map((sectionResult) => sectionResult.sectionType)
|
|
446
|
-
));
|
|
447
|
-
const maskedViewportCount = viewportResults.filter((viewportResult) => viewportResult.dynamicMaskCategories.length > 0).length;
|
|
448
|
-
const tileCaptureCount = sectionResults.filter((sectionResult) => sectionResult.captureKind === 'tile').length;
|
|
449
|
-
const reportNotes = normalizeStringArray(rawVisualReport.notes);
|
|
450
|
-
|
|
451
|
-
const semanticEscalationRecommended = rawVisualReport?.summary?.semanticEscalationRecommended === true
|
|
452
|
-
|| meaningfulDiffViewports.length > 0
|
|
453
|
-
|| meaningfulDiffSectionTypes.length > 0
|
|
454
|
-
|| (
|
|
455
|
-
visualQaPolicy?.semanticEscalation?.escalateWhenViewportCoverageIncomplete === true
|
|
456
|
-
&& missingViewports.length > 0
|
|
457
|
-
)
|
|
458
|
-
|| (
|
|
459
|
-
sectionCoverageRequired
|
|
460
|
-
&& missingSectionTypes.length > 0
|
|
461
|
-
);
|
|
462
|
-
const fallbackNotes = [];
|
|
463
|
-
if (viewportResults.length === 0) {
|
|
464
|
-
fallbackNotes.push('Deterministic visual diff report did not include viewportResults.');
|
|
465
|
-
}
|
|
466
|
-
if (sectionCoverageRequired && sectionResults.length === 0) {
|
|
467
|
-
fallbackNotes.push('Long-page screenshot coverage was required, but sectionResults were not provided.');
|
|
468
|
-
}
|
|
469
|
-
if (sectionCoverageRequired && missingSectionTypes.length > 0) {
|
|
470
|
-
fallbackNotes.push(`Long-page screenshot coverage is incomplete. Missing section captures: ${missingSectionTypes.join(', ')}.`);
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
return {
|
|
474
|
-
reportPresent: true,
|
|
475
|
-
reportVersion: String(rawVisualReport.reportVersion || DEFAULT_VISUAL_DIFF_REPORT_VERSION),
|
|
476
|
-
baselineStrategy: String(rawVisualReport.baselineStrategy || visualQaPolicy.baselineStrategy || 'deterministic-screenshots'),
|
|
477
|
-
coverageComplete: missingViewports.length === 0 && (!sectionCoverageRequired || missingSectionTypes.length === 0),
|
|
478
|
-
sectionCoverageRequired,
|
|
479
|
-
requiredViewports: normalizedRequiredViewports,
|
|
480
|
-
coveredViewports,
|
|
481
|
-
missingViewports,
|
|
482
|
-
requiredSectionTypes,
|
|
483
|
-
coveredSectionTypes,
|
|
484
|
-
missingSectionTypes,
|
|
485
|
-
meaningfulDiffViewports,
|
|
486
|
-
meaningfulDiffSectionTypes,
|
|
487
|
-
maskedViewportCount,
|
|
488
|
-
sectionCaptureCount: sectionResults.length,
|
|
489
|
-
tileCaptureCount,
|
|
490
|
-
semanticEscalationRecommended,
|
|
491
|
-
notes: reportNotes.length > 0
|
|
492
|
-
? reportNotes
|
|
493
|
-
: fallbackNotes,
|
|
494
|
-
};
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
function buildSystemPrompt() {
|
|
498
|
-
return [
|
|
499
|
-
'You are a Principal UI/UX Design Reviewer.',
|
|
500
|
-
'Compare the changed UI code against the provided design contract.',
|
|
501
|
-
'Treat docs/design-intent.json as the machine-readable source of truth.',
|
|
502
|
-
'Treat docs/DESIGN.md as explanatory context, not a generic style guide.',
|
|
503
|
-
'When deterministic visual diff evidence is provided, treat it as the first layer of truth for noise filtering, viewport coverage, long-page section coverage, and meaningful-drift detection.',
|
|
504
|
-
'Do not reward generic SaaS defaults or popular template patterns.',
|
|
505
|
-
'Do not penalize originality when the implementation still aligns with the contract.',
|
|
506
|
-
'Purposeful motion is allowed and can improve quality. Only flag motion when it drifts from the contract, ignores reduced-motion expectations, or adds avoidable performance/accessibility risk.',
|
|
507
|
-
'Only flag drift when there is a clear mismatch with the contract, accessibility non-negotiables, or cross-viewport adaptation rules.',
|
|
508
|
-
'Treat WCAG 2.2 AA failures as hard accessibility drift.',
|
|
509
|
-
'Treat APCA as advisory perceptual tuning only. Do not recommend blocking solely because APCA would prefer a stronger readability adjustment when WCAG hard requirements still pass.',
|
|
510
|
-
'Check focus visibility, focus appearance, target size, keyboard access, accessible authentication, and status or dynamic state access when the diff touches those surfaces.',
|
|
511
|
-
'This audit always runs in advisory mode for this repository workflow.',
|
|
512
|
-
'Focus on color intent, typographic hierarchy, responsive re-layout, purposeful motion, component morphology across states, interaction behavior, and genericity drift.',
|
|
513
|
-
'Return ONLY one JSON object on a single line prefixed with JSON_VERDICT:.',
|
|
514
|
-
'Schema:',
|
|
515
|
-
'{"alignmentScore": number|null, "notes": string[], "findings": [{"area": string, "severity": "high|medium|low", "problem": string, "evidence": string, "recommendation": string, "blockingRecommended": boolean}]}',
|
|
516
|
-
].join('\n');
|
|
517
|
-
}
|
|
518
|
-
|
|
519
|
-
function buildUserMessage(designIntentContent, designGuideContent, diffContent, changedUiFiles, deterministicVisualSummary) {
|
|
520
|
-
const truncatedDiff = diffContent.length > MAX_DIFF_CHARS
|
|
521
|
-
? `${diffContent.slice(0, MAX_DIFF_CHARS)}\n\n[DIFF TRUNCATED - ${diffContent.length - MAX_DIFF_CHARS} additional characters omitted]`
|
|
522
|
-
: diffContent;
|
|
523
|
-
|
|
524
|
-
return [
|
|
525
|
-
'## Changed UI Files',
|
|
526
|
-
changedUiFiles.length > 0 ? changedUiFiles.map((filePath) => `- ${filePath}`).join('\n') : '- none',
|
|
527
|
-
'',
|
|
528
|
-
'## design-intent.json',
|
|
529
|
-
'```json',
|
|
530
|
-
JSON.stringify(designIntentContent, null, 2),
|
|
531
|
-
'```',
|
|
532
|
-
'',
|
|
533
|
-
'## DESIGN.md',
|
|
534
|
-
'```md',
|
|
535
|
-
designGuideContent.trim() || '(missing DESIGN.md)',
|
|
536
|
-
'```',
|
|
537
|
-
'',
|
|
538
|
-
'## Deterministic Visual Diff Summary',
|
|
539
|
-
'```json',
|
|
540
|
-
JSON.stringify(deterministicVisualSummary, null, 2),
|
|
541
|
-
'```',
|
|
542
|
-
'',
|
|
543
|
-
'## UI Diff',
|
|
544
|
-
'```diff',
|
|
545
|
-
truncatedDiff.trim() || '(no UI diff)',
|
|
546
|
-
'```',
|
|
547
|
-
'',
|
|
548
|
-
'Judge alignment to the contract. Avoid aesthetic bias toward generic web trends or toward motionless/static outputs.',
|
|
549
|
-
].join('\n');
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
async function callOpenAiProvider(systemPrompt, userMessage) {
|
|
553
|
-
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gpt-4o-mini';
|
|
554
|
-
const apiResponse = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
555
|
-
method: 'POST',
|
|
556
|
-
headers: {
|
|
557
|
-
'Content-Type': 'application/json',
|
|
558
|
-
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
|
|
559
|
-
},
|
|
560
|
-
body: JSON.stringify({
|
|
561
|
-
model: selectedModel,
|
|
562
|
-
max_tokens: 2048,
|
|
563
|
-
temperature: 0,
|
|
564
|
-
messages: [
|
|
565
|
-
{ role: 'system', content: systemPrompt },
|
|
566
|
-
{ role: 'user', content: userMessage },
|
|
567
|
-
],
|
|
568
|
-
}),
|
|
569
|
-
});
|
|
570
|
-
|
|
571
|
-
if (!apiResponse.ok) {
|
|
572
|
-
const errorBody = await apiResponse.text();
|
|
573
|
-
throw new Error(`OpenAI API returned ${apiResponse.status}: ${errorBody}`);
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
const responsePayload = await apiResponse.json();
|
|
577
|
-
return responsePayload.choices[0].message.content;
|
|
578
|
-
}
|
|
579
|
-
|
|
580
|
-
async function callAnthropicProvider(systemPrompt, userMessage) {
|
|
581
|
-
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'claude-3-5-haiku-latest';
|
|
582
|
-
const apiResponse = await fetch('https://api.anthropic.com/v1/messages', {
|
|
583
|
-
method: 'POST',
|
|
584
|
-
headers: {
|
|
585
|
-
'Content-Type': 'application/json',
|
|
586
|
-
'x-api-key': process.env.ANTHROPIC_API_KEY ?? '',
|
|
587
|
-
'anthropic-version': '2023-06-01',
|
|
588
|
-
},
|
|
589
|
-
body: JSON.stringify({
|
|
590
|
-
model: selectedModel,
|
|
591
|
-
max_tokens: 2048,
|
|
592
|
-
system: systemPrompt,
|
|
593
|
-
messages: [{ role: 'user', content: userMessage }],
|
|
594
|
-
}),
|
|
595
|
-
});
|
|
596
|
-
|
|
597
|
-
if (!apiResponse.ok) {
|
|
598
|
-
const errorBody = await apiResponse.text();
|
|
599
|
-
throw new Error(`Anthropic API returned ${apiResponse.status}: ${errorBody}`);
|
|
600
|
-
}
|
|
601
|
-
|
|
602
|
-
const responsePayload = await apiResponse.json();
|
|
603
|
-
return responsePayload.content[0].text;
|
|
604
|
-
}
|
|
605
|
-
|
|
606
|
-
async function callGeminiProvider(systemPrompt, userMessage) {
|
|
607
|
-
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gemini-2.0-flash';
|
|
608
|
-
const apiKey = process.env.GEMINI_API_KEY ?? '';
|
|
609
|
-
const endpointUrl = `https://generativelanguage.googleapis.com/v1beta/models/${selectedModel}:generateContent?key=${apiKey}`;
|
|
610
|
-
|
|
611
|
-
const apiResponse = await fetch(endpointUrl, {
|
|
612
|
-
method: 'POST',
|
|
613
|
-
headers: { 'Content-Type': 'application/json' },
|
|
614
|
-
body: JSON.stringify({
|
|
615
|
-
system_instruction: { parts: [{ text: systemPrompt }] },
|
|
616
|
-
contents: [{ role: 'user', parts: [{ text: userMessage }] }],
|
|
617
|
-
generationConfig: { temperature: 0, maxOutputTokens: 2048 },
|
|
618
|
-
}),
|
|
619
|
-
});
|
|
620
|
-
|
|
621
|
-
if (!apiResponse.ok) {
|
|
622
|
-
const errorBody = await apiResponse.text();
|
|
623
|
-
throw new Error(`Gemini API returned ${apiResponse.status}: ${errorBody}`);
|
|
624
|
-
}
|
|
625
|
-
|
|
626
|
-
const responsePayload = await apiResponse.json();
|
|
627
|
-
return responsePayload.candidates[0].content.parts[0].text;
|
|
628
|
-
}
|
|
629
|
-
|
|
630
|
-
function selectAvailableProvider() {
|
|
631
|
-
if (process.env.UI_DESIGN_JUDGE_MOCK_RESPONSE) {
|
|
25
|
+
import { collectChangedFiles, collectPullRequestDiff, isUiRelevantFilePath } from './ui-design-judge/git-input.mjs';
|
|
26
|
+
import { buildSystemPrompt, buildUserMessage } from './ui-design-judge/prompting.mjs';
|
|
27
|
+
import { selectAvailableProvider } from './ui-design-judge/providers.mjs';
|
|
28
|
+
import { calibrateGenericityAssessment } from './ui-design-judge/rubric-calibration.mjs';
|
|
29
|
+
import {
|
|
30
|
+
buildReport,
|
|
31
|
+
emitMachineReadableReport,
|
|
32
|
+
extractVerdictObject,
|
|
33
|
+
normalizeFindings,
|
|
34
|
+
normalizeGenericityAssessment,
|
|
35
|
+
normalizeRubricBreakdown,
|
|
36
|
+
} from './ui-design-judge/reporting.mjs';
|
|
37
|
+
import { loadDesignGuide, loadDesignIntent, summarizeDesignExecutionPolicy, summarizeReviewRubric } from './ui-design-judge/design-execution-summary.mjs';
|
|
38
|
+
|
|
39
|
+
function applyGenericityAutoFail({
|
|
40
|
+
reviewRubricSummary,
|
|
41
|
+
calibration,
|
|
42
|
+
findings,
|
|
43
|
+
notes,
|
|
44
|
+
}) {
|
|
45
|
+
const autoFailEnabled = reviewRubricSummary?.genericityAutoFail === true;
|
|
46
|
+
const namedGenericityDetected = Array.isArray(calibration?.matchedGenericitySignals)
|
|
47
|
+
&& calibration.matchedGenericitySignals.length > 0;
|
|
48
|
+
const forbiddenPatternDetected = Array.isArray(calibration?.matchedForbiddenPatterns)
|
|
49
|
+
&& calibration.matchedForbiddenPatterns.length > 0;
|
|
50
|
+
const shouldAutoFail = autoFailEnabled
|
|
51
|
+
&& calibration?.calibratedStatus === 'generic'
|
|
52
|
+
&& (namedGenericityDetected || forbiddenPatternDetected);
|
|
53
|
+
|
|
54
|
+
if (!shouldAutoFail) {
|
|
632
55
|
return {
|
|
633
|
-
|
|
634
|
-
|
|
56
|
+
findings,
|
|
57
|
+
notes,
|
|
58
|
+
autoFailTriggered: false,
|
|
635
59
|
};
|
|
636
60
|
}
|
|
637
61
|
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
const verdictMatch = rawResponseText.match(/JSON_VERDICT:\s*(\{[\s\S]*\})/i);
|
|
655
|
-
if (!verdictMatch) {
|
|
656
|
-
return { verdict: null, malformed: true };
|
|
657
|
-
}
|
|
658
|
-
|
|
659
|
-
try {
|
|
660
|
-
return {
|
|
661
|
-
verdict: JSON.parse(verdictMatch[1]),
|
|
662
|
-
malformed: false,
|
|
663
|
-
};
|
|
664
|
-
} catch {
|
|
665
|
-
return {
|
|
666
|
-
verdict: null,
|
|
667
|
-
malformed: true,
|
|
668
|
-
};
|
|
669
|
-
}
|
|
670
|
-
}
|
|
671
|
-
|
|
672
|
-
function normalizeFindings(rawFindings) {
|
|
673
|
-
if (!Array.isArray(rawFindings)) {
|
|
674
|
-
return [];
|
|
675
|
-
}
|
|
676
|
-
|
|
677
|
-
return rawFindings.map((rawFinding) => ({
|
|
678
|
-
area: String(rawFinding?.area || 'general'),
|
|
679
|
-
severity: normalizeSeverity(rawFinding?.severity),
|
|
680
|
-
problem: String(rawFinding?.problem || 'No problem description provided.'),
|
|
681
|
-
evidence: String(rawFinding?.evidence || 'No evidence provided.'),
|
|
682
|
-
recommendation: String(rawFinding?.recommendation || 'No recommendation provided.'),
|
|
683
|
-
blockingRecommended: rawFinding?.blockingRecommended === true,
|
|
684
|
-
}));
|
|
685
|
-
}
|
|
62
|
+
const normalizedFindings = Array.isArray(findings) ? findings.map((finding) => ({ ...finding })) : [];
|
|
63
|
+
const updatedFindings = normalizedFindings.length > 0
|
|
64
|
+
? normalizedFindings.map((finding) => ({
|
|
65
|
+
...finding,
|
|
66
|
+
blockingRecommended: true,
|
|
67
|
+
}))
|
|
68
|
+
: [
|
|
69
|
+
{
|
|
70
|
+
area: 'design-contract',
|
|
71
|
+
severity: 'high',
|
|
72
|
+
problem: 'The UI matches named genericity drift signals that the contract marks as auto-fail.',
|
|
73
|
+
evidence: `Matched signals: ${[...(calibration.matchedGenericitySignals || []), ...(calibration.matchedForbiddenPatterns || [])].join(', ')}`,
|
|
74
|
+
requiredAction: 'Rebuild the affected UI surfaces from the contract and remove the named generic patterns instead of polishing them.',
|
|
75
|
+
blockingRecommended: true,
|
|
76
|
+
},
|
|
77
|
+
];
|
|
686
78
|
|
|
687
|
-
/**
|
|
688
|
-
* @param {Partial<UiDesignJudgeReport>} partialReport
|
|
689
|
-
* @returns {UiDesignJudgeReport}
|
|
690
|
-
*/
|
|
691
|
-
function buildReport(partialReport) {
|
|
692
79
|
return {
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
skipped: false,
|
|
700
|
-
skipReason: null,
|
|
701
|
-
provider: 'none',
|
|
702
|
-
ciProvider: detectCiProvider(),
|
|
703
|
-
contractPresent: false,
|
|
704
|
-
summary: {
|
|
705
|
-
changedUiFileCount: 0,
|
|
706
|
-
alignmentScore: null,
|
|
707
|
-
driftCount: 0,
|
|
708
|
-
blockingCandidateCount: 0,
|
|
709
|
-
meaningfulDiffViewportCount: 0,
|
|
710
|
-
},
|
|
711
|
-
deterministicVisual: {
|
|
712
|
-
reportPresent: false,
|
|
713
|
-
reportVersion: null,
|
|
714
|
-
baselineStrategy: null,
|
|
715
|
-
coverageComplete: false,
|
|
716
|
-
requiredViewports: [],
|
|
717
|
-
coveredViewports: [],
|
|
718
|
-
missingViewports: [],
|
|
719
|
-
meaningfulDiffViewports: [],
|
|
720
|
-
maskedViewportCount: 0,
|
|
721
|
-
semanticEscalationRecommended: false,
|
|
722
|
-
notes: [],
|
|
723
|
-
},
|
|
724
|
-
semanticJudge: {
|
|
725
|
-
attempted: false,
|
|
726
|
-
skipped: false,
|
|
727
|
-
skipReason: null,
|
|
728
|
-
},
|
|
729
|
-
malformedVerdict: false,
|
|
730
|
-
providerError: false,
|
|
731
|
-
findings: [],
|
|
732
|
-
notes: [],
|
|
733
|
-
...partialReport,
|
|
80
|
+
findings: updatedFindings,
|
|
81
|
+
notes: [
|
|
82
|
+
...(Array.isArray(notes) ? notes : []),
|
|
83
|
+
'reviewRubric.genericityAutoFail triggered because named genericity drift signals or forbidden patterns were detected.',
|
|
84
|
+
],
|
|
85
|
+
autoFailTriggered: true,
|
|
734
86
|
};
|
|
735
87
|
}
|
|
736
88
|
|
|
737
|
-
function emitMachineReadableReport(machineReportPayload) {
|
|
738
|
-
console.log(JSON.stringify(machineReportPayload, null, 2));
|
|
739
|
-
}
|
|
740
|
-
|
|
741
89
|
async function main() {
|
|
742
90
|
const changedFiles = collectChangedFiles();
|
|
743
91
|
const changedUiFiles = changedFiles.filter(isUiRelevantFilePath);
|
|
@@ -765,44 +113,15 @@ async function main() {
|
|
|
765
113
|
alignmentScore: null,
|
|
766
114
|
driftCount: 0,
|
|
767
115
|
blockingCandidateCount: 0,
|
|
768
|
-
|
|
116
|
+
designExecutionSignalCount: 0,
|
|
769
117
|
},
|
|
770
118
|
notes: ['UI design judge only evaluates changed UI surfaces.'],
|
|
771
119
|
}));
|
|
772
120
|
return;
|
|
773
121
|
}
|
|
774
122
|
|
|
775
|
-
const
|
|
776
|
-
|
|
777
|
-
designIntentContent
|
|
778
|
-
);
|
|
779
|
-
const shouldRunSemanticJudge = !deterministicVisualSummary.reportPresent
|
|
780
|
-
|| deterministicVisualSummary.semanticEscalationRecommended;
|
|
781
|
-
|
|
782
|
-
if (!shouldRunSemanticJudge) {
|
|
783
|
-
emitMachineReadableReport(buildReport({
|
|
784
|
-
provider: 'none',
|
|
785
|
-
contractPresent: true,
|
|
786
|
-
summary: {
|
|
787
|
-
changedUiFileCount: changedUiFiles.length,
|
|
788
|
-
alignmentScore: null,
|
|
789
|
-
driftCount: 0,
|
|
790
|
-
blockingCandidateCount: 0,
|
|
791
|
-
meaningfulDiffViewportCount: deterministicVisualSummary.meaningfulDiffViewports.length,
|
|
792
|
-
},
|
|
793
|
-
deterministicVisual: deterministicVisualSummary,
|
|
794
|
-
semanticJudge: {
|
|
795
|
-
attempted: false,
|
|
796
|
-
skipped: true,
|
|
797
|
-
skipReason: 'deterministic-clean',
|
|
798
|
-
},
|
|
799
|
-
notes: [
|
|
800
|
-
'Deterministic visual diff reported no meaningful drift, so semantic review was skipped.',
|
|
801
|
-
...deterministicVisualSummary.notes,
|
|
802
|
-
],
|
|
803
|
-
}));
|
|
804
|
-
return;
|
|
805
|
-
}
|
|
123
|
+
const designExecutionSummary = summarizeDesignExecutionPolicy(designIntentContent);
|
|
124
|
+
const reviewRubricSummary = summarizeReviewRubric(designIntentContent);
|
|
806
125
|
|
|
807
126
|
const systemPrompt = buildSystemPrompt();
|
|
808
127
|
const userMessage = buildUserMessage(
|
|
@@ -810,11 +129,21 @@ async function main() {
|
|
|
810
129
|
designGuideContent,
|
|
811
130
|
rawDiff,
|
|
812
131
|
changedUiFiles,
|
|
813
|
-
|
|
132
|
+
designExecutionSummary
|
|
814
133
|
);
|
|
815
134
|
|
|
816
135
|
const selectedProvider = selectAvailableProvider();
|
|
817
136
|
if (!selectedProvider) {
|
|
137
|
+
const calibration = calibrateGenericityAssessment({
|
|
138
|
+
reviewRubricSummary,
|
|
139
|
+
designExecutionSummary,
|
|
140
|
+
genericityAssessment: { status: 'unclear', reason: 'No provider review was run.' },
|
|
141
|
+
rubricBreakdown: [],
|
|
142
|
+
findings: [],
|
|
143
|
+
notes: [],
|
|
144
|
+
tasteVsFailureSeparated: null,
|
|
145
|
+
});
|
|
146
|
+
|
|
818
147
|
emitMachineReadableReport(buildReport({
|
|
819
148
|
provider: 'none',
|
|
820
149
|
contractPresent: true,
|
|
@@ -823,9 +152,21 @@ async function main() {
|
|
|
823
152
|
alignmentScore: null,
|
|
824
153
|
driftCount: 0,
|
|
825
154
|
blockingCandidateCount: 0,
|
|
826
|
-
|
|
155
|
+
designExecutionSignalCount: designExecutionSummary.enabledCapabilities.length,
|
|
156
|
+
structuredInspectionAvailable: designExecutionSummary.structuredInspectionAvailable,
|
|
157
|
+
genericityStatus: calibration.calibratedStatus,
|
|
158
|
+
},
|
|
159
|
+
designExecution: designExecutionSummary,
|
|
160
|
+
rubric: {
|
|
161
|
+
expectedDimensions: reviewRubricSummary.dimensions.map((dimension) => dimension.key),
|
|
162
|
+
breakdown: [],
|
|
163
|
+
genericityAssessment: {
|
|
164
|
+
status: 'unclear',
|
|
165
|
+
reason: 'No provider review was run.',
|
|
166
|
+
},
|
|
167
|
+
tasteVsFailureSeparated: null,
|
|
168
|
+
calibration,
|
|
827
169
|
},
|
|
828
|
-
deterministicVisual: deterministicVisualSummary,
|
|
829
170
|
semanticJudge: {
|
|
830
171
|
attempted: false,
|
|
831
172
|
skipped: true,
|
|
@@ -833,7 +174,7 @@ async function main() {
|
|
|
833
174
|
},
|
|
834
175
|
notes: [
|
|
835
176
|
'No LLM provider configured. UI design judge skipped provider review and stayed advisory.',
|
|
836
|
-
...
|
|
177
|
+
...designExecutionSummary.notes,
|
|
837
178
|
],
|
|
838
179
|
}));
|
|
839
180
|
return;
|
|
@@ -843,6 +184,16 @@ async function main() {
|
|
|
843
184
|
try {
|
|
844
185
|
rawJudgeResponse = await selectedProvider.invokeProvider(systemPrompt, userMessage);
|
|
845
186
|
} catch (providerError) {
|
|
187
|
+
const calibration = calibrateGenericityAssessment({
|
|
188
|
+
reviewRubricSummary,
|
|
189
|
+
designExecutionSummary,
|
|
190
|
+
genericityAssessment: { status: 'unclear', reason: 'Provider review failed before rubric scoring completed.' },
|
|
191
|
+
rubricBreakdown: [],
|
|
192
|
+
findings: [],
|
|
193
|
+
notes: [],
|
|
194
|
+
tasteVsFailureSeparated: null,
|
|
195
|
+
});
|
|
196
|
+
|
|
846
197
|
const providerErrorMessage = providerError instanceof Error
|
|
847
198
|
? providerError.message
|
|
848
199
|
: 'Unknown provider error';
|
|
@@ -856,15 +207,27 @@ async function main() {
|
|
|
856
207
|
alignmentScore: null,
|
|
857
208
|
driftCount: 0,
|
|
858
209
|
blockingCandidateCount: 0,
|
|
859
|
-
|
|
210
|
+
designExecutionSignalCount: designExecutionSummary.enabledCapabilities.length,
|
|
211
|
+
structuredInspectionAvailable: designExecutionSummary.structuredInspectionAvailable,
|
|
212
|
+
genericityStatus: calibration.calibratedStatus,
|
|
213
|
+
},
|
|
214
|
+
designExecution: designExecutionSummary,
|
|
215
|
+
rubric: {
|
|
216
|
+
expectedDimensions: reviewRubricSummary.dimensions.map((dimension) => dimension.key),
|
|
217
|
+
breakdown: [],
|
|
218
|
+
genericityAssessment: {
|
|
219
|
+
status: 'unclear',
|
|
220
|
+
reason: 'Provider review failed before rubric scoring completed.',
|
|
221
|
+
},
|
|
222
|
+
tasteVsFailureSeparated: null,
|
|
223
|
+
calibration,
|
|
860
224
|
},
|
|
861
|
-
deterministicVisual: deterministicVisualSummary,
|
|
862
225
|
semanticJudge: {
|
|
863
226
|
attempted: true,
|
|
864
227
|
skipped: false,
|
|
865
228
|
skipReason: null,
|
|
866
229
|
},
|
|
867
|
-
notes: [`Provider call failed: ${providerErrorMessage}`, ...
|
|
230
|
+
notes: [`Provider call failed: ${providerErrorMessage}`, ...designExecutionSummary.notes],
|
|
868
231
|
passed: true,
|
|
869
232
|
}));
|
|
870
233
|
return;
|
|
@@ -872,13 +235,38 @@ async function main() {
|
|
|
872
235
|
|
|
873
236
|
const { verdict, malformed } = extractVerdictObject(rawJudgeResponse);
|
|
874
237
|
const findings = normalizeFindings(verdict?.findings);
|
|
875
|
-
const
|
|
238
|
+
const rubricBreakdown = normalizeRubricBreakdown(
|
|
239
|
+
verdict?.rubricBreakdown,
|
|
240
|
+
reviewRubricSummary.dimensions.map((dimension) => dimension.key)
|
|
241
|
+
);
|
|
242
|
+
const genericityAssessment = normalizeGenericityAssessment(verdict?.genericityAssessment);
|
|
243
|
+
const tasteVsFailureSeparated = typeof verdict?.tasteVsFailureSeparated === 'boolean'
|
|
244
|
+
? verdict.tasteVsFailureSeparated
|
|
245
|
+
: null;
|
|
876
246
|
const alignmentScore = typeof verdict?.alignmentScore === 'number' ? verdict.alignmentScore : null;
|
|
877
247
|
const notes = Array.isArray(verdict?.notes)
|
|
878
248
|
? verdict.notes.map((note) => String(note))
|
|
879
249
|
: [];
|
|
250
|
+
const calibration = calibrateGenericityAssessment({
|
|
251
|
+
reviewRubricSummary,
|
|
252
|
+
designExecutionSummary,
|
|
253
|
+
genericityAssessment,
|
|
254
|
+
rubricBreakdown,
|
|
255
|
+
findings,
|
|
256
|
+
notes,
|
|
257
|
+
tasteVsFailureSeparated,
|
|
258
|
+
});
|
|
259
|
+
const autoFailResolution = applyGenericityAutoFail({
|
|
260
|
+
reviewRubricSummary,
|
|
261
|
+
calibration,
|
|
262
|
+
findings,
|
|
263
|
+
notes,
|
|
264
|
+
});
|
|
265
|
+
const resolvedFindings = autoFailResolution.findings;
|
|
266
|
+
const resolvedNotes = autoFailResolution.notes;
|
|
267
|
+
const blockingCandidateCount = resolvedFindings.filter((finding) => finding.blockingRecommended || finding.severity === 'high').length;
|
|
880
268
|
|
|
881
|
-
|
|
269
|
+
emitMachineReadableReport(buildReport({
|
|
882
270
|
provider: selectedProvider.providerName,
|
|
883
271
|
contractPresent: true,
|
|
884
272
|
passed: true,
|
|
@@ -886,23 +274,30 @@ async function main() {
|
|
|
886
274
|
summary: {
|
|
887
275
|
changedUiFileCount: changedUiFiles.length,
|
|
888
276
|
alignmentScore,
|
|
889
|
-
driftCount:
|
|
277
|
+
driftCount: resolvedFindings.length,
|
|
890
278
|
blockingCandidateCount,
|
|
891
|
-
|
|
279
|
+
designExecutionSignalCount: designExecutionSummary.enabledCapabilities.length,
|
|
280
|
+
structuredInspectionAvailable: designExecutionSummary.structuredInspectionAvailable,
|
|
281
|
+
genericityStatus: calibration.calibratedStatus,
|
|
282
|
+
},
|
|
283
|
+
designExecution: designExecutionSummary,
|
|
284
|
+
rubric: {
|
|
285
|
+
expectedDimensions: reviewRubricSummary.dimensions.map((dimension) => dimension.key),
|
|
286
|
+
breakdown: rubricBreakdown,
|
|
287
|
+
genericityAssessment,
|
|
288
|
+
tasteVsFailureSeparated,
|
|
289
|
+
calibration,
|
|
892
290
|
},
|
|
893
|
-
deterministicVisual: deterministicVisualSummary,
|
|
894
291
|
semanticJudge: {
|
|
895
292
|
attempted: true,
|
|
896
293
|
skipped: false,
|
|
897
294
|
skipReason: null,
|
|
898
295
|
},
|
|
899
|
-
findings,
|
|
296
|
+
findings: resolvedFindings,
|
|
900
297
|
notes: malformed
|
|
901
|
-
? ['LLM response was malformed. Advisory mode kept the audit non-blocking.', ...
|
|
902
|
-
: [...notes, ...
|
|
903
|
-
});
|
|
904
|
-
|
|
905
|
-
emitMachineReadableReport(reportPayload);
|
|
298
|
+
? ['LLM response was malformed. Advisory mode kept the audit non-blocking.', ...designExecutionSummary.notes]
|
|
299
|
+
: [...resolvedNotes, ...calibration.notes, ...designExecutionSummary.notes],
|
|
300
|
+
}));
|
|
906
301
|
}
|
|
907
302
|
|
|
908
303
|
main().catch((unexpectedError) => {
|