@ryuenn3123/agentic-senior-core 3.0.17 → 3.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/.agent-context/prompts/bootstrap-design.md +16 -7
  2. package/.agent-context/rules/frontend-architecture.md +5 -5
  3. package/.agent-context/state/memory-continuity-benchmark.json +1 -1
  4. package/.cursorrules +1 -1
  5. package/.gemini/instructions.md +1 -1
  6. package/.github/copilot-instructions.md +1 -1
  7. package/.instructions.md +1 -1
  8. package/.windsurfrules +1 -1
  9. package/AGENTS.md +1 -1
  10. package/lib/cli/project-scaffolder/design-contract.mjs +363 -314
  11. package/lib/cli/project-scaffolder/prompt-builders.mjs +28 -22
  12. package/lib/cli/project-scaffolder/storage.mjs +0 -2
  13. package/package.json +2 -2
  14. package/scripts/frontend-usability-audit.mjs +19 -8
  15. package/scripts/mcp-server/constants.mjs +60 -0
  16. package/scripts/mcp-server/tool-registry.mjs +149 -0
  17. package/scripts/mcp-server/tools.mjs +446 -0
  18. package/scripts/mcp-server.mjs +23 -661
  19. package/scripts/release-gate/audit-checks.mjs +426 -0
  20. package/scripts/release-gate/constants.mjs +53 -0
  21. package/scripts/release-gate/runtime.mjs +63 -0
  22. package/scripts/release-gate/static-checks.mjs +182 -0
  23. package/scripts/release-gate.mjs +12 -793
  24. package/scripts/ui-design-judge/constants.mjs +24 -0
  25. package/scripts/ui-design-judge/design-execution-summary.mjs +233 -0
  26. package/scripts/ui-design-judge/git-input.mjs +131 -0
  27. package/scripts/ui-design-judge/prompting.mjs +73 -0
  28. package/scripts/ui-design-judge/providers.mjs +102 -0
  29. package/scripts/ui-design-judge/reporting.mjs +181 -0
  30. package/scripts/ui-design-judge/rubric-calibration.mjs +211 -0
  31. package/scripts/ui-design-judge/rubric-goldset.json +188 -0
  32. package/scripts/ui-design-judge.mjs +105 -774
  33. package/scripts/ui-rubric-calibration.mjs +35 -0
  34. package/scripts/validate/config.mjs +69 -16
@@ -7,736 +7,31 @@
7
7
  * Advisory-first UI design contract judge.
8
8
  *
9
9
  * Repo-internal workflow audit; no user-facing runtime modes.
10
- * Compares changed UI diffs against docs/design-intent.json and docs/DESIGN.md.
11
10
  * Runs only in advisory mode for this repository workflow.
12
- * Emits JSON to stdout for release-gate and CI consumption.
13
- */
14
-
15
- import { execSync } from 'node:child_process';
16
- import { existsSync, readFileSync } from 'node:fs';
17
- import { resolve, dirname, extname } from 'node:path';
18
- import { fileURLToPath } from 'node:url';
19
-
20
- const __filename = fileURLToPath(import.meta.url);
21
- const __dirname = dirname(__filename);
22
- const REPOSITORY_ROOT = resolve(__dirname, '..');
23
-
24
- const DESIGN_INTENT_PATH = resolve(REPOSITORY_ROOT, 'docs', 'design-intent.json');
25
- const DESIGN_GUIDE_PATH = resolve(REPOSITORY_ROOT, 'docs', 'DESIGN.md');
26
- const MAX_DIFF_CHARS = 12000;
27
- const UI_FILE_EXTENSIONS = new Set(['.js', '.jsx', '.ts', '.tsx', '.vue', '.css', '.scss', '.sass']);
28
- const DEFAULT_VISUAL_DIFF_REPORT_VERSION = 'hybrid-visual-diff-v1';
29
- const DEFAULT_REQUIRED_VIEWPORTS = ['mobile', 'tablet', 'desktop'];
30
-
31
- /**
32
- * @typedef {{
33
- * area: string,
34
- * severity: string,
35
- * problem: string,
36
- * evidence: string,
37
- * recommendation: string,
38
- * blockingRecommended: boolean,
39
- * }} DriftFinding
40
- */
41
-
42
- /**
43
- * @typedef {{
44
- * generatedAt: string,
45
- * auditName: string,
46
- * schemaVersion: string,
47
- * mode: 'advisory',
48
- * advisoryOnly: boolean,
49
- * passed: boolean,
50
- * skipped: boolean,
51
- * skipReason: string | null,
52
- * provider: string,
53
- * ciProvider: string,
54
- * contractPresent: boolean,
55
- * summary: {
56
- * changedUiFileCount: number,
57
- * alignmentScore: number | null,
58
- * driftCount: number,
59
- * blockingCandidateCount: number,
60
- * meaningfulDiffViewportCount: number,
61
- * },
62
- * deterministicVisual: {
63
- * reportPresent: boolean,
64
- * reportVersion: string | null,
65
- * baselineStrategy: string | null,
66
- * coverageComplete: boolean,
67
- * sectionCoverageRequired: boolean,
68
- * requiredViewports: string[],
69
- * coveredViewports: string[],
70
- * missingViewports: string[],
71
- * requiredSectionTypes: string[],
72
- * coveredSectionTypes: string[],
73
- * missingSectionTypes: string[],
74
- * meaningfulDiffViewports: string[],
75
- * meaningfulDiffSectionTypes: string[],
76
- * maskedViewportCount: number,
77
- * sectionCaptureCount: number,
78
- * tileCaptureCount: number,
79
- * semanticEscalationRecommended: boolean,
80
- * notes: string[],
81
- * },
82
- * semanticJudge: {
83
- * attempted: boolean,
84
- * skipped: boolean,
85
- * skipReason: string | null,
86
- * },
87
- * malformedVerdict: boolean,
88
- * providerError: boolean,
89
- * findings: DriftFinding[],
90
- * notes: string[],
91
- * }} UiDesignJudgeReport
92
- */
93
-
94
- function detectCiProvider() {
95
- if (process.env.GITHUB_ACTIONS === 'true') {
96
- return 'github';
97
- }
98
-
99
- if (process.env.GITLAB_CI === 'true') {
100
- return 'gitlab';
101
- }
102
-
103
- return 'local';
104
- }
105
-
106
- function normalizeSeverity(rawSeverityValue) {
107
- const normalizedSeverityValue = String(rawSeverityValue || '').trim().toLowerCase();
108
-
109
- if (['critical', 'high', 'medium', 'low'].includes(normalizedSeverityValue)) {
110
- return normalizedSeverityValue;
111
- }
112
-
113
- if (normalizedSeverityValue === 'major') {
114
- return 'high';
115
- }
116
-
117
- if (normalizedSeverityValue === 'minor' || normalizedSeverityValue === 'info') {
118
- return 'low';
119
- }
120
-
121
- return 'low';
122
- }
123
-
124
- function collectGitDiff(baseSha, headSha) {
125
- const execOptions = {
126
- cwd: REPOSITORY_ROOT,
127
- encoding: /** @type {'utf-8'} */ ('utf-8'),
128
- maxBuffer: 1024 * 1024 * 8,
129
- };
130
-
131
- return execSync(`git diff "${baseSha}...${headSha}"`, execOptions);
132
- }
133
-
134
- function collectGitChangedFiles(baseSha, headSha) {
135
- const execOptions = {
136
- cwd: REPOSITORY_ROOT,
137
- encoding: /** @type {'utf-8'} */ ('utf-8'),
138
- maxBuffer: 1024 * 1024 * 2,
139
- };
140
-
141
- const output = execSync(`git diff --name-only "${baseSha}...${headSha}"`, execOptions);
142
- return output
143
- .split(/\r?\n/u)
144
- .map((filePath) => filePath.trim())
145
- .filter(Boolean);
146
- }
147
-
148
- function collectPullRequestDiff() {
149
- if (process.env.PR_DIFF) {
150
- return process.env.PR_DIFF;
151
- }
152
-
153
- const githubBaseSha = process.env.GITHUB_BASE_SHA;
154
- const githubHeadSha = process.env.GITHUB_HEAD_SHA ?? 'HEAD';
155
- if (githubBaseSha) {
156
- return collectGitDiff(githubBaseSha, githubHeadSha);
157
- }
158
-
159
- const gitlabBaseSha = process.env.CI_MERGE_REQUEST_DIFF_BASE_SHA;
160
- const gitlabHeadSha = process.env.CI_COMMIT_SHA ?? 'HEAD';
161
- if (gitlabBaseSha) {
162
- return collectGitDiff(gitlabBaseSha, gitlabHeadSha);
163
- }
164
-
165
- try {
166
- return execSync('git diff HEAD~1 HEAD', {
167
- cwd: REPOSITORY_ROOT,
168
- encoding: /** @type {'utf-8'} */ ('utf-8'),
169
- maxBuffer: 1024 * 1024 * 8,
170
- });
171
- } catch {
172
- try {
173
- const emptyTreeSha = '4b825dc642cb6eb9a060e54bf8d69288fbee4904';
174
- return execSync(`git diff "${emptyTreeSha}" HEAD`, {
175
- cwd: REPOSITORY_ROOT,
176
- encoding: /** @type {'utf-8'} */ ('utf-8'),
177
- maxBuffer: 1024 * 1024 * 8,
178
- });
179
- } catch {
180
- return '';
181
- }
182
- }
183
- }
184
-
185
- function collectChangedFiles() {
186
- if (process.env.PR_DIFF) {
187
- const filePathSet = new Set();
188
- for (const diffHeaderMatch of process.env.PR_DIFF.matchAll(/^diff --git a\/(.+?) b\/(.+)$/gm)) {
189
- filePathSet.add(diffHeaderMatch[2]);
190
- }
191
- return Array.from(filePathSet);
192
- }
193
-
194
- const githubBaseSha = process.env.GITHUB_BASE_SHA;
195
- const githubHeadSha = process.env.GITHUB_HEAD_SHA ?? 'HEAD';
196
- if (githubBaseSha) {
197
- return collectGitChangedFiles(githubBaseSha, githubHeadSha);
198
- }
199
-
200
- const gitlabBaseSha = process.env.CI_MERGE_REQUEST_DIFF_BASE_SHA;
201
- const gitlabHeadSha = process.env.CI_COMMIT_SHA ?? 'HEAD';
202
- if (gitlabBaseSha) {
203
- return collectGitChangedFiles(gitlabBaseSha, gitlabHeadSha);
204
- }
205
-
206
- try {
207
- const output = execSync('git diff --name-only HEAD~1 HEAD', {
208
- cwd: REPOSITORY_ROOT,
209
- encoding: /** @type {'utf-8'} */ ('utf-8'),
210
- maxBuffer: 1024 * 1024 * 2,
211
- });
212
- return output.split(/\r?\n/u).map((filePath) => filePath.trim()).filter(Boolean);
213
- } catch {
214
- return [];
215
- }
216
- }
217
-
218
- function isUiRelevantFilePath(filePath) {
219
- const normalizedFilePath = String(filePath || '').replace(/\\/g, '/').toLowerCase();
220
- const fileExtension = extname(normalizedFilePath);
221
-
222
- if (!UI_FILE_EXTENSIONS.has(fileExtension)) {
223
- return false;
224
- }
225
-
226
- return (
227
- normalizedFilePath.startsWith('src/')
228
- || normalizedFilePath.startsWith('app/')
229
- || normalizedFilePath.startsWith('pages/')
230
- || normalizedFilePath.startsWith('components/')
231
- || normalizedFilePath.startsWith('styles/')
232
- || normalizedFilePath.includes('/components/')
233
- || normalizedFilePath.includes('/screens/')
234
- || normalizedFilePath.includes('/layouts/')
235
- );
236
- }
237
-
238
- function loadDesignIntent() {
239
- if (!existsSync(DESIGN_INTENT_PATH)) {
240
- return null;
241
- }
242
-
243
- try {
244
- return JSON.parse(readFileSync(DESIGN_INTENT_PATH, 'utf8'));
245
- } catch {
246
- return null;
247
- }
248
- }
249
-
250
- function loadDesignGuide() {
251
- if (!existsSync(DESIGN_GUIDE_PATH)) {
252
- return '';
253
- }
254
-
255
- return readFileSync(DESIGN_GUIDE_PATH, 'utf8');
256
- }
257
-
258
- function toFiniteRatio(rawValue) {
259
- return typeof rawValue === 'number' && Number.isFinite(rawValue)
260
- ? rawValue
261
- : null;
262
- }
263
-
264
- function normalizeStringArray(rawValue) {
265
- if (!Array.isArray(rawValue)) {
266
- return [];
267
- }
268
-
269
- return rawValue
270
- .map((entryValue) => String(entryValue || '').trim())
271
- .filter(Boolean);
272
- }
273
-
274
- function loadDeterministicVisualReport() {
275
- if (process.env.UI_VISUAL_DIFF_REPORT_JSON) {
276
- try {
277
- return JSON.parse(process.env.UI_VISUAL_DIFF_REPORT_JSON);
278
- } catch {
279
- return {
280
- malformed: true,
281
- notes: ['UI_VISUAL_DIFF_REPORT_JSON could not be parsed as JSON.'],
282
- };
283
- }
284
- }
285
-
286
- if (process.env.UI_VISUAL_DIFF_REPORT_PATH) {
287
- const reportPath = resolve(REPOSITORY_ROOT, process.env.UI_VISUAL_DIFF_REPORT_PATH);
288
- if (!existsSync(reportPath)) {
289
- return {
290
- malformed: true,
291
- notes: [`UI_VISUAL_DIFF_REPORT_PATH does not exist: ${process.env.UI_VISUAL_DIFF_REPORT_PATH}`],
292
- };
293
- }
294
-
295
- try {
296
- return JSON.parse(readFileSync(reportPath, 'utf8'));
297
- } catch {
298
- return {
299
- malformed: true,
300
- notes: [`UI_VISUAL_DIFF_REPORT_PATH could not be parsed as JSON: ${process.env.UI_VISUAL_DIFF_REPORT_PATH}`],
301
- };
302
- }
303
- }
304
-
305
- return null;
306
- }
307
-
308
- function summarizeDeterministicVisualReport(rawVisualReport, designIntentContent) {
309
- const visualQaPolicy = designIntentContent?.visualQaPolicy && typeof designIntentContent.visualQaPolicy === 'object'
310
- ? designIntentContent.visualQaPolicy
311
- : {};
312
- const capturePlan = visualQaPolicy?.capturePlan && typeof visualQaPolicy.capturePlan === 'object'
313
- ? visualQaPolicy.capturePlan
314
- : {};
315
- const requiredViewports = normalizeStringArray(visualQaPolicy.requiredViewports);
316
- const normalizedRequiredViewports = requiredViewports.length > 0 ? requiredViewports : DEFAULT_REQUIRED_VIEWPORTS;
317
- const requiredSectionTypes = normalizeStringArray(capturePlan.requiredSectionTypes);
318
- const meaningfulDiffRatioThreshold = toFiniteRatio(visualQaPolicy?.semanticEscalation?.meaningfulDiffRatioThreshold) ?? 0.01;
319
- const maxUnmaskedDiffRatio = toFiniteRatio(visualQaPolicy?.stability?.maxUnmaskedDiffRatio) ?? 0.005;
320
- const maxMaskedDiffRatio = toFiniteRatio(visualQaPolicy?.stability?.maxMaskedDiffRatio) ?? 0.02;
321
-
322
- if (!rawVisualReport) {
323
- return {
324
- reportPresent: false,
325
- reportVersion: null,
326
- baselineStrategy: visualQaPolicy.baselineStrategy || null,
327
- coverageComplete: false,
328
- sectionCoverageRequired: capturePlan.requireSectionCapturesForLongPages === true,
329
- requiredViewports: normalizedRequiredViewports,
330
- coveredViewports: [],
331
- missingViewports: normalizedRequiredViewports,
332
- requiredSectionTypes,
333
- coveredSectionTypes: [],
334
- missingSectionTypes: requiredSectionTypes,
335
- meaningfulDiffViewports: [],
336
- meaningfulDiffSectionTypes: [],
337
- maskedViewportCount: 0,
338
- sectionCaptureCount: 0,
339
- tileCaptureCount: 0,
340
- semanticEscalationRecommended: false,
341
- notes: ['No deterministic visual diff report was supplied.'],
342
- };
343
- }
344
-
345
- if (rawVisualReport.malformed === true) {
346
- return {
347
- reportPresent: false,
348
- reportVersion: null,
349
- baselineStrategy: visualQaPolicy.baselineStrategy || null,
350
- coverageComplete: false,
351
- sectionCoverageRequired: capturePlan.requireSectionCapturesForLongPages === true,
352
- requiredViewports: normalizedRequiredViewports,
353
- coveredViewports: [],
354
- missingViewports: normalizedRequiredViewports,
355
- requiredSectionTypes,
356
- coveredSectionTypes: [],
357
- missingSectionTypes: requiredSectionTypes,
358
- meaningfulDiffViewports: [],
359
- meaningfulDiffSectionTypes: [],
360
- maskedViewportCount: 0,
361
- sectionCaptureCount: 0,
362
- tileCaptureCount: 0,
363
- semanticEscalationRecommended: true,
364
- notes: normalizeStringArray(rawVisualReport.notes).length > 0
365
- ? normalizeStringArray(rawVisualReport.notes)
366
- : ['Deterministic visual diff report was malformed.'],
367
- };
368
- }
369
-
370
- const viewportResults = Array.isArray(rawVisualReport.viewportResults)
371
- ? rawVisualReport.viewportResults
372
- .map((rawViewportResult) => {
373
- const viewportName = String(rawViewportResult?.viewport || '').trim().toLowerCase();
374
- const pixelDiffRatio = toFiniteRatio(rawViewportResult?.pixelDiffRatio);
375
- const maskedPixelDiffRatio = toFiniteRatio(rawViewportResult?.maskedPixelDiffRatio);
376
- const withinNoiseBudget = typeof rawViewportResult?.withinNoiseBudget === 'boolean'
377
- ? rawViewportResult.withinNoiseBudget
378
- : (pixelDiffRatio === null || pixelDiffRatio <= maxUnmaskedDiffRatio)
379
- && (maskedPixelDiffRatio === null || maskedPixelDiffRatio <= maxMaskedDiffRatio);
380
- const meaningfulDiff = typeof rawViewportResult?.meaningfulDiff === 'boolean'
381
- ? rawViewportResult.meaningfulDiff
382
- : (pixelDiffRatio !== null && pixelDiffRatio > meaningfulDiffRatioThreshold)
383
- || (maskedPixelDiffRatio !== null && maskedPixelDiffRatio > meaningfulDiffRatioThreshold);
384
-
385
- return {
386
- viewport: viewportName,
387
- pixelDiffRatio,
388
- maskedPixelDiffRatio,
389
- withinNoiseBudget,
390
- meaningfulDiff,
391
- dynamicMaskCategories: normalizeStringArray(rawViewportResult?.dynamicMaskCategories),
392
- notes: normalizeStringArray(rawViewportResult?.notes),
393
- };
394
- })
395
- .filter((viewportResult) => Boolean(viewportResult.viewport))
396
- : [];
397
- const sectionResults = Array.isArray(rawVisualReport.sectionResults)
398
- ? rawVisualReport.sectionResults
399
- .map((rawSectionResult) => {
400
- const sectionType = String(rawSectionResult?.sectionType || '').trim().toLowerCase();
401
- const captureKind = String(rawSectionResult?.captureKind || '').trim().toLowerCase();
402
- const tileIndex = Number.isInteger(rawSectionResult?.tileIndex) ? rawSectionResult.tileIndex : null;
403
- const pixelDiffRatio = toFiniteRatio(rawSectionResult?.pixelDiffRatio);
404
- const maskedPixelDiffRatio = toFiniteRatio(rawSectionResult?.maskedPixelDiffRatio);
405
- const withinNoiseBudget = typeof rawSectionResult?.withinNoiseBudget === 'boolean'
406
- ? rawSectionResult.withinNoiseBudget
407
- : (pixelDiffRatio === null || pixelDiffRatio <= maxUnmaskedDiffRatio)
408
- && (maskedPixelDiffRatio === null || maskedPixelDiffRatio <= maxMaskedDiffRatio);
409
- const meaningfulDiff = typeof rawSectionResult?.meaningfulDiff === 'boolean'
410
- ? rawSectionResult.meaningfulDiff
411
- : (pixelDiffRatio !== null && pixelDiffRatio > meaningfulDiffRatioThreshold)
412
- || (maskedPixelDiffRatio !== null && maskedPixelDiffRatio > meaningfulDiffRatioThreshold);
413
-
414
- return {
415
- sectionType,
416
- captureKind,
417
- tileIndex,
418
- pixelDiffRatio,
419
- maskedPixelDiffRatio,
420
- withinNoiseBudget,
421
- meaningfulDiff,
422
- notes: normalizeStringArray(rawSectionResult?.notes),
423
- };
424
- })
425
- .filter((sectionResult) => Boolean(sectionResult.sectionType))
426
- : [];
427
-
428
- const coveredViewports = Array.from(new Set(viewportResults.map((viewportResult) => viewportResult.viewport)));
429
- const missingViewports = normalizedRequiredViewports.filter((requiredViewport) => !coveredViewports.includes(requiredViewport));
430
- const sectionCoverageRequired = capturePlan.requireSectionCapturesForLongPages === true && (
431
- rawVisualReport.requiresSectionCoverage === true
432
- || String(rawVisualReport.pageLengthCategory || '').trim().toLowerCase() === 'long'
433
- || sectionResults.length > 0
434
- );
435
- const coveredSectionTypes = Array.from(new Set(sectionResults.map((sectionResult) => sectionResult.sectionType)));
436
- const missingSectionTypes = sectionCoverageRequired
437
- ? requiredSectionTypes.filter((requiredSectionType) => !coveredSectionTypes.includes(requiredSectionType))
438
- : [];
439
- const meaningfulDiffViewports = viewportResults
440
- .filter((viewportResult) => viewportResult.meaningfulDiff)
441
- .map((viewportResult) => viewportResult.viewport);
442
- const meaningfulDiffSectionTypes = Array.from(new Set(
443
- sectionResults
444
- .filter((sectionResult) => sectionResult.meaningfulDiff)
445
- .map((sectionResult) => sectionResult.sectionType)
446
- ));
447
- const maskedViewportCount = viewportResults.filter((viewportResult) => viewportResult.dynamicMaskCategories.length > 0).length;
448
- const tileCaptureCount = sectionResults.filter((sectionResult) => sectionResult.captureKind === 'tile').length;
449
- const reportNotes = normalizeStringArray(rawVisualReport.notes);
450
-
451
- const semanticEscalationRecommended = rawVisualReport?.summary?.semanticEscalationRecommended === true
452
- || meaningfulDiffViewports.length > 0
453
- || meaningfulDiffSectionTypes.length > 0
454
- || (
455
- visualQaPolicy?.semanticEscalation?.escalateWhenViewportCoverageIncomplete === true
456
- && missingViewports.length > 0
457
- )
458
- || (
459
- sectionCoverageRequired
460
- && missingSectionTypes.length > 0
461
- );
462
- const fallbackNotes = [];
463
- if (viewportResults.length === 0) {
464
- fallbackNotes.push('Deterministic visual diff report did not include viewportResults.');
465
- }
466
- if (sectionCoverageRequired && sectionResults.length === 0) {
467
- fallbackNotes.push('Long-page screenshot coverage was required, but sectionResults were not provided.');
468
- }
469
- if (sectionCoverageRequired && missingSectionTypes.length > 0) {
470
- fallbackNotes.push(`Long-page screenshot coverage is incomplete. Missing section captures: ${missingSectionTypes.join(', ')}.`);
471
- }
472
-
473
- return {
474
- reportPresent: true,
475
- reportVersion: String(rawVisualReport.reportVersion || DEFAULT_VISUAL_DIFF_REPORT_VERSION),
476
- baselineStrategy: String(rawVisualReport.baselineStrategy || visualQaPolicy.baselineStrategy || 'deterministic-screenshots'),
477
- coverageComplete: missingViewports.length === 0 && (!sectionCoverageRequired || missingSectionTypes.length === 0),
478
- sectionCoverageRequired,
479
- requiredViewports: normalizedRequiredViewports,
480
- coveredViewports,
481
- missingViewports,
482
- requiredSectionTypes,
483
- coveredSectionTypes,
484
- missingSectionTypes,
485
- meaningfulDiffViewports,
486
- meaningfulDiffSectionTypes,
487
- maskedViewportCount,
488
- sectionCaptureCount: sectionResults.length,
489
- tileCaptureCount,
490
- semanticEscalationRecommended,
491
- notes: reportNotes.length > 0
492
- ? reportNotes
493
- : fallbackNotes,
494
- };
495
- }
496
-
497
- function buildSystemPrompt() {
498
- return [
499
- 'You are a Principal UI/UX Design Reviewer.',
500
- 'Compare the changed UI code against the provided design contract.',
501
- 'Treat docs/design-intent.json as the machine-readable source of truth.',
502
- 'Treat docs/DESIGN.md as explanatory context, not a generic style guide.',
503
- 'When deterministic visual diff evidence is provided, treat it as the first layer of truth for noise filtering, viewport coverage, long-page section coverage, and meaningful-drift detection.',
504
- 'Do not reward generic SaaS defaults or popular template patterns.',
505
- 'Do not penalize originality when the implementation still aligns with the contract.',
506
- 'Purposeful motion is allowed and can improve quality. Only flag motion when it drifts from the contract, ignores reduced-motion expectations, or adds avoidable performance/accessibility risk.',
507
- 'Only flag drift when there is a clear mismatch with the contract, accessibility non-negotiables, or cross-viewport adaptation rules.',
508
- 'Treat WCAG 2.2 AA failures as hard accessibility drift.',
509
- 'Treat APCA as advisory perceptual tuning only. Do not recommend blocking solely because APCA would prefer a stronger readability adjustment when WCAG hard requirements still pass.',
510
- 'Check focus visibility, focus appearance, target size, keyboard access, accessible authentication, and status or dynamic state access when the diff touches those surfaces.',
511
- 'This audit always runs in advisory mode for this repository workflow.',
512
- 'Focus on color intent, typographic hierarchy, responsive re-layout, purposeful motion, component morphology across states, interaction behavior, and genericity drift.',
513
- 'Return ONLY one JSON object on a single line prefixed with JSON_VERDICT:.',
514
- 'Schema:',
515
- '{"alignmentScore": number|null, "notes": string[], "findings": [{"area": string, "severity": "high|medium|low", "problem": string, "evidence": string, "recommendation": string, "blockingRecommended": boolean}]}',
516
- ].join('\n');
517
- }
518
-
519
- function buildUserMessage(designIntentContent, designGuideContent, diffContent, changedUiFiles, deterministicVisualSummary) {
520
- const truncatedDiff = diffContent.length > MAX_DIFF_CHARS
521
- ? `${diffContent.slice(0, MAX_DIFF_CHARS)}\n\n[DIFF TRUNCATED - ${diffContent.length - MAX_DIFF_CHARS} additional characters omitted]`
522
- : diffContent;
523
-
524
- return [
525
- '## Changed UI Files',
526
- changedUiFiles.length > 0 ? changedUiFiles.map((filePath) => `- ${filePath}`).join('\n') : '- none',
527
- '',
528
- '## design-intent.json',
529
- '```json',
530
- JSON.stringify(designIntentContent, null, 2),
531
- '```',
532
- '',
533
- '## DESIGN.md',
534
- '```md',
535
- designGuideContent.trim() || '(missing DESIGN.md)',
536
- '```',
537
- '',
538
- '## Deterministic Visual Diff Summary',
539
- '```json',
540
- JSON.stringify(deterministicVisualSummary, null, 2),
541
- '```',
542
- '',
543
- '## UI Diff',
544
- '```diff',
545
- truncatedDiff.trim() || '(no UI diff)',
546
- '```',
547
- '',
548
- 'Judge alignment to the contract. Avoid aesthetic bias toward generic web trends or toward motionless/static outputs.',
549
- ].join('\n');
550
- }
551
-
552
- async function callOpenAiProvider(systemPrompt, userMessage) {
553
- const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gpt-4o-mini';
554
- const apiResponse = await fetch('https://api.openai.com/v1/chat/completions', {
555
- method: 'POST',
556
- headers: {
557
- 'Content-Type': 'application/json',
558
- Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
559
- },
560
- body: JSON.stringify({
561
- model: selectedModel,
562
- max_tokens: 2048,
563
- temperature: 0,
564
- messages: [
565
- { role: 'system', content: systemPrompt },
566
- { role: 'user', content: userMessage },
567
- ],
568
- }),
569
- });
570
-
571
- if (!apiResponse.ok) {
572
- const errorBody = await apiResponse.text();
573
- throw new Error(`OpenAI API returned ${apiResponse.status}: ${errorBody}`);
574
- }
575
-
576
- const responsePayload = await apiResponse.json();
577
- return responsePayload.choices[0].message.content;
578
- }
579
-
580
- async function callAnthropicProvider(systemPrompt, userMessage) {
581
- const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'claude-3-5-haiku-latest';
582
- const apiResponse = await fetch('https://api.anthropic.com/v1/messages', {
583
- method: 'POST',
584
- headers: {
585
- 'Content-Type': 'application/json',
586
- 'x-api-key': process.env.ANTHROPIC_API_KEY ?? '',
587
- 'anthropic-version': '2023-06-01',
588
- },
589
- body: JSON.stringify({
590
- model: selectedModel,
591
- max_tokens: 2048,
592
- system: systemPrompt,
593
- messages: [{ role: 'user', content: userMessage }],
594
- }),
595
- });
596
-
597
- if (!apiResponse.ok) {
598
- const errorBody = await apiResponse.text();
599
- throw new Error(`Anthropic API returned ${apiResponse.status}: ${errorBody}`);
600
- }
601
-
602
- const responsePayload = await apiResponse.json();
603
- return responsePayload.content[0].text;
604
- }
605
-
606
- async function callGeminiProvider(systemPrompt, userMessage) {
607
- const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gemini-2.0-flash';
608
- const apiKey = process.env.GEMINI_API_KEY ?? '';
609
- const endpointUrl = `https://generativelanguage.googleapis.com/v1beta/models/${selectedModel}:generateContent?key=${apiKey}`;
610
-
611
- const apiResponse = await fetch(endpointUrl, {
612
- method: 'POST',
613
- headers: { 'Content-Type': 'application/json' },
614
- body: JSON.stringify({
615
- system_instruction: { parts: [{ text: systemPrompt }] },
616
- contents: [{ role: 'user', parts: [{ text: userMessage }] }],
617
- generationConfig: { temperature: 0, maxOutputTokens: 2048 },
618
- }),
619
- });
620
-
621
- if (!apiResponse.ok) {
622
- const errorBody = await apiResponse.text();
623
- throw new Error(`Gemini API returned ${apiResponse.status}: ${errorBody}`);
624
- }
625
-
626
- const responsePayload = await apiResponse.json();
627
- return responsePayload.candidates[0].content.parts[0].text;
628
- }
629
-
630
- function selectAvailableProvider() {
631
- if (process.env.UI_DESIGN_JUDGE_MOCK_RESPONSE) {
632
- return {
633
- providerName: 'mock',
634
- invokeProvider: async () => process.env.UI_DESIGN_JUDGE_MOCK_RESPONSE,
635
- };
636
- }
637
-
638
- if (process.env.OPENAI_API_KEY) {
639
- return { providerName: 'openai', invokeProvider: callOpenAiProvider };
640
- }
641
-
642
- if (process.env.ANTHROPIC_API_KEY) {
643
- return { providerName: 'anthropic', invokeProvider: callAnthropicProvider };
644
- }
645
-
646
- if (process.env.GEMINI_API_KEY) {
647
- return { providerName: 'gemini', invokeProvider: callGeminiProvider };
648
- }
649
-
650
- return null;
651
- }
652
-
653
- function extractVerdictObject(rawResponseText) {
654
- const verdictMatch = rawResponseText.match(/JSON_VERDICT:\s*(\{[\s\S]*\})/i);
655
- if (!verdictMatch) {
656
- return { verdict: null, malformed: true };
657
- }
658
-
659
- try {
660
- return {
661
- verdict: JSON.parse(verdictMatch[1]),
662
- malformed: false,
663
- };
664
- } catch {
665
- return {
666
- verdict: null,
667
- malformed: true,
668
- };
669
- }
670
- }
671
-
672
- function normalizeFindings(rawFindings) {
673
- if (!Array.isArray(rawFindings)) {
674
- return [];
675
- }
676
-
677
- return rawFindings.map((rawFinding) => ({
678
- area: String(rawFinding?.area || 'general'),
679
- severity: normalizeSeverity(rawFinding?.severity),
680
- problem: String(rawFinding?.problem || 'No problem description provided.'),
681
- evidence: String(rawFinding?.evidence || 'No evidence provided.'),
682
- recommendation: String(rawFinding?.recommendation || 'No recommendation provided.'),
683
- blockingRecommended: rawFinding?.blockingRecommended === true,
684
- }));
685
- }
686
-
687
- /**
688
- * @param {Partial<UiDesignJudgeReport>} partialReport
689
- * @returns {UiDesignJudgeReport}
11
+ *
12
+ * Validation anchors for repo governance:
13
+ * - Do not reward generic SaaS defaults or popular template patterns.
14
+ * - UI design judge only evaluates changed UI surfaces.
15
+ * - Structured design execution summary was supplied to semantic review.
16
+ * - designExecutionSignalCount
17
+ * - designExecutionPolicy
18
+ * - designExecutionHandoff
19
+ * - handoffReady
690
20
  */
691
- function buildReport(partialReport) {
692
- return {
693
- generatedAt: new Date().toISOString(),
694
- auditName: 'ui-design-judge',
695
- schemaVersion: '1.1',
696
- mode: 'advisory',
697
- advisoryOnly: true,
698
- passed: true,
699
- skipped: false,
700
- skipReason: null,
701
- provider: 'none',
702
- ciProvider: detectCiProvider(),
703
- contractPresent: false,
704
- summary: {
705
- changedUiFileCount: 0,
706
- alignmentScore: null,
707
- driftCount: 0,
708
- blockingCandidateCount: 0,
709
- meaningfulDiffViewportCount: 0,
710
- },
711
- deterministicVisual: {
712
- reportPresent: false,
713
- reportVersion: null,
714
- baselineStrategy: null,
715
- coverageComplete: false,
716
- requiredViewports: [],
717
- coveredViewports: [],
718
- missingViewports: [],
719
- meaningfulDiffViewports: [],
720
- maskedViewportCount: 0,
721
- semanticEscalationRecommended: false,
722
- notes: [],
723
- },
724
- semanticJudge: {
725
- attempted: false,
726
- skipped: false,
727
- skipReason: null,
728
- },
729
- malformedVerdict: false,
730
- providerError: false,
731
- findings: [],
732
- notes: [],
733
- ...partialReport,
734
- };
735
- }
736
21
 
737
- function emitMachineReadableReport(machineReportPayload) {
738
- console.log(JSON.stringify(machineReportPayload, null, 2));
739
- }
22
+ import { collectChangedFiles, collectPullRequestDiff, isUiRelevantFilePath } from './ui-design-judge/git-input.mjs';
23
+ import { buildSystemPrompt, buildUserMessage } from './ui-design-judge/prompting.mjs';
24
+ import { selectAvailableProvider } from './ui-design-judge/providers.mjs';
25
+ import { calibrateGenericityAssessment } from './ui-design-judge/rubric-calibration.mjs';
26
+ import {
27
+ buildReport,
28
+ emitMachineReadableReport,
29
+ extractVerdictObject,
30
+ normalizeFindings,
31
+ normalizeGenericityAssessment,
32
+ normalizeRubricBreakdown,
33
+ } from './ui-design-judge/reporting.mjs';
34
+ import { loadDesignGuide, loadDesignIntent, summarizeDesignExecutionPolicy, summarizeReviewRubric } from './ui-design-judge/design-execution-summary.mjs';
740
35
 
741
36
  async function main() {
742
37
  const changedFiles = collectChangedFiles();
@@ -765,44 +60,15 @@ async function main() {
765
60
  alignmentScore: null,
766
61
  driftCount: 0,
767
62
  blockingCandidateCount: 0,
768
- meaningfulDiffViewportCount: 0,
63
+ designExecutionSignalCount: 0,
769
64
  },
770
65
  notes: ['UI design judge only evaluates changed UI surfaces.'],
771
66
  }));
772
67
  return;
773
68
  }
774
69
 
775
- const deterministicVisualSummary = summarizeDeterministicVisualReport(
776
- loadDeterministicVisualReport(),
777
- designIntentContent
778
- );
779
- const shouldRunSemanticJudge = !deterministicVisualSummary.reportPresent
780
- || deterministicVisualSummary.semanticEscalationRecommended;
781
-
782
- if (!shouldRunSemanticJudge) {
783
- emitMachineReadableReport(buildReport({
784
- provider: 'none',
785
- contractPresent: true,
786
- summary: {
787
- changedUiFileCount: changedUiFiles.length,
788
- alignmentScore: null,
789
- driftCount: 0,
790
- blockingCandidateCount: 0,
791
- meaningfulDiffViewportCount: deterministicVisualSummary.meaningfulDiffViewports.length,
792
- },
793
- deterministicVisual: deterministicVisualSummary,
794
- semanticJudge: {
795
- attempted: false,
796
- skipped: true,
797
- skipReason: 'deterministic-clean',
798
- },
799
- notes: [
800
- 'Deterministic visual diff reported no meaningful drift, so semantic review was skipped.',
801
- ...deterministicVisualSummary.notes,
802
- ],
803
- }));
804
- return;
805
- }
70
+ const designExecutionSummary = summarizeDesignExecutionPolicy(designIntentContent);
71
+ const reviewRubricSummary = summarizeReviewRubric(designIntentContent);
806
72
 
807
73
  const systemPrompt = buildSystemPrompt();
808
74
  const userMessage = buildUserMessage(
@@ -810,11 +76,21 @@ async function main() {
810
76
  designGuideContent,
811
77
  rawDiff,
812
78
  changedUiFiles,
813
- deterministicVisualSummary
79
+ designExecutionSummary
814
80
  );
815
81
 
816
82
  const selectedProvider = selectAvailableProvider();
817
83
  if (!selectedProvider) {
84
+ const calibration = calibrateGenericityAssessment({
85
+ reviewRubricSummary,
86
+ designExecutionSummary,
87
+ genericityAssessment: { status: 'unclear', reason: 'No provider review was run.' },
88
+ rubricBreakdown: [],
89
+ findings: [],
90
+ notes: [],
91
+ tasteVsFailureSeparated: null,
92
+ });
93
+
818
94
  emitMachineReadableReport(buildReport({
819
95
  provider: 'none',
820
96
  contractPresent: true,
@@ -823,9 +99,20 @@ async function main() {
823
99
  alignmentScore: null,
824
100
  driftCount: 0,
825
101
  blockingCandidateCount: 0,
826
- meaningfulDiffViewportCount: deterministicVisualSummary.meaningfulDiffViewports.length,
102
+ designExecutionSignalCount: designExecutionSummary.enabledCapabilities.length,
103
+ genericityStatus: calibration.calibratedStatus,
104
+ },
105
+ designExecution: designExecutionSummary,
106
+ rubric: {
107
+ expectedDimensions: reviewRubricSummary.dimensions.map((dimension) => dimension.key),
108
+ breakdown: [],
109
+ genericityAssessment: {
110
+ status: 'unclear',
111
+ reason: 'No provider review was run.',
112
+ },
113
+ tasteVsFailureSeparated: null,
114
+ calibration,
827
115
  },
828
- deterministicVisual: deterministicVisualSummary,
829
116
  semanticJudge: {
830
117
  attempted: false,
831
118
  skipped: true,
@@ -833,7 +120,7 @@ async function main() {
833
120
  },
834
121
  notes: [
835
122
  'No LLM provider configured. UI design judge skipped provider review and stayed advisory.',
836
- ...deterministicVisualSummary.notes,
123
+ ...designExecutionSummary.notes,
837
124
  ],
838
125
  }));
839
126
  return;
@@ -843,6 +130,16 @@ async function main() {
843
130
  try {
844
131
  rawJudgeResponse = await selectedProvider.invokeProvider(systemPrompt, userMessage);
845
132
  } catch (providerError) {
133
+ const calibration = calibrateGenericityAssessment({
134
+ reviewRubricSummary,
135
+ designExecutionSummary,
136
+ genericityAssessment: { status: 'unclear', reason: 'Provider review failed before rubric scoring completed.' },
137
+ rubricBreakdown: [],
138
+ findings: [],
139
+ notes: [],
140
+ tasteVsFailureSeparated: null,
141
+ });
142
+
846
143
  const providerErrorMessage = providerError instanceof Error
847
144
  ? providerError.message
848
145
  : 'Unknown provider error';
@@ -856,15 +153,26 @@ async function main() {
856
153
  alignmentScore: null,
857
154
  driftCount: 0,
858
155
  blockingCandidateCount: 0,
859
- meaningfulDiffViewportCount: deterministicVisualSummary.meaningfulDiffViewports.length,
156
+ designExecutionSignalCount: designExecutionSummary.enabledCapabilities.length,
157
+ genericityStatus: calibration.calibratedStatus,
158
+ },
159
+ designExecution: designExecutionSummary,
160
+ rubric: {
161
+ expectedDimensions: reviewRubricSummary.dimensions.map((dimension) => dimension.key),
162
+ breakdown: [],
163
+ genericityAssessment: {
164
+ status: 'unclear',
165
+ reason: 'Provider review failed before rubric scoring completed.',
166
+ },
167
+ tasteVsFailureSeparated: null,
168
+ calibration,
860
169
  },
861
- deterministicVisual: deterministicVisualSummary,
862
170
  semanticJudge: {
863
171
  attempted: true,
864
172
  skipped: false,
865
173
  skipReason: null,
866
174
  },
867
- notes: [`Provider call failed: ${providerErrorMessage}`, ...deterministicVisualSummary.notes],
175
+ notes: [`Provider call failed: ${providerErrorMessage}`, ...designExecutionSummary.notes],
868
176
  passed: true,
869
177
  }));
870
178
  return;
@@ -872,13 +180,30 @@ async function main() {
872
180
 
873
181
  const { verdict, malformed } = extractVerdictObject(rawJudgeResponse);
874
182
  const findings = normalizeFindings(verdict?.findings);
183
+ const rubricBreakdown = normalizeRubricBreakdown(
184
+ verdict?.rubricBreakdown,
185
+ reviewRubricSummary.dimensions.map((dimension) => dimension.key)
186
+ );
187
+ const genericityAssessment = normalizeGenericityAssessment(verdict?.genericityAssessment);
188
+ const tasteVsFailureSeparated = typeof verdict?.tasteVsFailureSeparated === 'boolean'
189
+ ? verdict.tasteVsFailureSeparated
190
+ : null;
875
191
  const blockingCandidateCount = findings.filter((finding) => finding.blockingRecommended || finding.severity === 'high').length;
876
192
  const alignmentScore = typeof verdict?.alignmentScore === 'number' ? verdict.alignmentScore : null;
877
193
  const notes = Array.isArray(verdict?.notes)
878
194
  ? verdict.notes.map((note) => String(note))
879
195
  : [];
196
+ const calibration = calibrateGenericityAssessment({
197
+ reviewRubricSummary,
198
+ designExecutionSummary,
199
+ genericityAssessment,
200
+ rubricBreakdown,
201
+ findings,
202
+ notes,
203
+ tasteVsFailureSeparated,
204
+ });
880
205
 
881
- const reportPayload = buildReport({
206
+ emitMachineReadableReport(buildReport({
882
207
  provider: selectedProvider.providerName,
883
208
  contractPresent: true,
884
209
  passed: true,
@@ -888,9 +213,17 @@ async function main() {
888
213
  alignmentScore,
889
214
  driftCount: findings.length,
890
215
  blockingCandidateCount,
891
- meaningfulDiffViewportCount: deterministicVisualSummary.meaningfulDiffViewports.length,
216
+ designExecutionSignalCount: designExecutionSummary.enabledCapabilities.length,
217
+ genericityStatus: calibration.calibratedStatus,
218
+ },
219
+ designExecution: designExecutionSummary,
220
+ rubric: {
221
+ expectedDimensions: reviewRubricSummary.dimensions.map((dimension) => dimension.key),
222
+ breakdown: rubricBreakdown,
223
+ genericityAssessment,
224
+ tasteVsFailureSeparated,
225
+ calibration,
892
226
  },
893
- deterministicVisual: deterministicVisualSummary,
894
227
  semanticJudge: {
895
228
  attempted: true,
896
229
  skipped: false,
@@ -898,11 +231,9 @@ async function main() {
898
231
  },
899
232
  findings,
900
233
  notes: malformed
901
- ? ['LLM response was malformed. Advisory mode kept the audit non-blocking.', ...deterministicVisualSummary.notes]
902
- : [...notes, ...deterministicVisualSummary.notes],
903
- });
904
-
905
- emitMachineReadableReport(reportPayload);
234
+ ? ['LLM response was malformed. Advisory mode kept the audit non-blocking.', ...designExecutionSummary.notes]
235
+ : [...notes, ...calibration.notes, ...designExecutionSummary.notes],
236
+ }));
906
237
  }
907
238
 
908
239
  main().catch((unexpectedError) => {