@ryuenn3123/agentic-senior-core 3.0.16 → 3.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-context/prompts/bootstrap-design.md +31 -4
- package/.agent-context/rules/frontend-architecture.md +26 -0
- package/.agent-context/state/memory-continuity-benchmark.json +1 -1
- package/.cursorrules +1 -1
- package/.gemini/instructions.md +7 -1
- package/.github/copilot-instructions.md +7 -1
- package/.instructions.md +3 -0
- package/.windsurfrules +1 -1
- package/AGENTS.md +13 -1
- package/lib/cli/commands/init.mjs +2 -2
- package/lib/cli/memory-continuity.mjs +2 -1
- package/lib/cli/project-scaffolder/constants.mjs +1 -0
- package/lib/cli/project-scaffolder/design-contract.mjs +523 -171
- package/lib/cli/project-scaffolder/prompt-builders.mjs +38 -15
- package/lib/cli/project-scaffolder/storage.mjs +0 -2
- package/package.json +2 -2
- package/scripts/documentation-boundary-audit.mjs +5 -2
- package/scripts/frontend-usability-audit.mjs +34 -0
- package/scripts/mcp-server/constants.mjs +60 -0
- package/scripts/mcp-server/tool-registry.mjs +149 -0
- package/scripts/mcp-server/tools.mjs +446 -0
- package/scripts/mcp-server.mjs +23 -661
- package/scripts/release-gate/audit-checks.mjs +426 -0
- package/scripts/release-gate/constants.mjs +53 -0
- package/scripts/release-gate/runtime.mjs +63 -0
- package/scripts/release-gate/static-checks.mjs +182 -0
- package/scripts/release-gate.mjs +12 -771
- package/scripts/sync-thin-adapters.mjs +24 -0
- package/scripts/ui-design-judge/constants.mjs +24 -0
- package/scripts/ui-design-judge/design-execution-summary.mjs +233 -0
- package/scripts/ui-design-judge/git-input.mjs +131 -0
- package/scripts/ui-design-judge/prompting.mjs +73 -0
- package/scripts/ui-design-judge/providers.mjs +102 -0
- package/scripts/ui-design-judge/reporting.mjs +181 -0
- package/scripts/ui-design-judge/rubric-calibration.mjs +211 -0
- package/scripts/ui-design-judge/rubric-goldset.json +188 -0
- package/scripts/ui-design-judge.mjs +130 -441
- package/scripts/ui-rubric-calibration.mjs +35 -0
- package/scripts/validate/config.mjs +98 -0
|
@@ -7,441 +7,31 @@
|
|
|
7
7
|
* Advisory-first UI design contract judge.
|
|
8
8
|
*
|
|
9
9
|
* Repo-internal workflow audit; no user-facing runtime modes.
|
|
10
|
-
* Compares changed UI diffs against docs/design-intent.json and docs/DESIGN.md.
|
|
11
10
|
* Runs only in advisory mode for this repository workflow.
|
|
12
|
-
*
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
const __dirname = dirname(__filename);
|
|
22
|
-
const REPOSITORY_ROOT = resolve(__dirname, '..');
|
|
23
|
-
|
|
24
|
-
const DESIGN_INTENT_PATH = resolve(REPOSITORY_ROOT, 'docs', 'design-intent.json');
|
|
25
|
-
const DESIGN_GUIDE_PATH = resolve(REPOSITORY_ROOT, 'docs', 'DESIGN.md');
|
|
26
|
-
const MAX_DIFF_CHARS = 12000;
|
|
27
|
-
const UI_FILE_EXTENSIONS = new Set(['.js', '.jsx', '.ts', '.tsx', '.vue', '.css', '.scss', '.sass']);
|
|
28
|
-
|
|
29
|
-
/**
|
|
30
|
-
* @typedef {{
|
|
31
|
-
* area: string,
|
|
32
|
-
* severity: string,
|
|
33
|
-
* problem: string,
|
|
34
|
-
* evidence: string,
|
|
35
|
-
* recommendation: string,
|
|
36
|
-
* blockingRecommended: boolean,
|
|
37
|
-
* }} DriftFinding
|
|
38
|
-
*/
|
|
39
|
-
|
|
40
|
-
/**
|
|
41
|
-
* @typedef {{
|
|
42
|
-
* generatedAt: string,
|
|
43
|
-
* auditName: string,
|
|
44
|
-
* schemaVersion: string,
|
|
45
|
-
* mode: 'advisory',
|
|
46
|
-
* advisoryOnly: boolean,
|
|
47
|
-
* passed: boolean,
|
|
48
|
-
* skipped: boolean,
|
|
49
|
-
* skipReason: string | null,
|
|
50
|
-
* provider: string,
|
|
51
|
-
* ciProvider: string,
|
|
52
|
-
* contractPresent: boolean,
|
|
53
|
-
* summary: {
|
|
54
|
-
* changedUiFileCount: number,
|
|
55
|
-
* alignmentScore: number | null,
|
|
56
|
-
* driftCount: number,
|
|
57
|
-
* blockingCandidateCount: number,
|
|
58
|
-
* },
|
|
59
|
-
* malformedVerdict: boolean,
|
|
60
|
-
* providerError: boolean,
|
|
61
|
-
* findings: DriftFinding[],
|
|
62
|
-
* notes: string[],
|
|
63
|
-
* }} UiDesignJudgeReport
|
|
64
|
-
*/
|
|
65
|
-
|
|
66
|
-
function detectCiProvider() {
|
|
67
|
-
if (process.env.GITHUB_ACTIONS === 'true') {
|
|
68
|
-
return 'github';
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
if (process.env.GITLAB_CI === 'true') {
|
|
72
|
-
return 'gitlab';
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
return 'local';
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
function normalizeSeverity(rawSeverityValue) {
|
|
79
|
-
const normalizedSeverityValue = String(rawSeverityValue || '').trim().toLowerCase();
|
|
80
|
-
|
|
81
|
-
if (['critical', 'high', 'medium', 'low'].includes(normalizedSeverityValue)) {
|
|
82
|
-
return normalizedSeverityValue;
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
if (normalizedSeverityValue === 'major') {
|
|
86
|
-
return 'high';
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
if (normalizedSeverityValue === 'minor' || normalizedSeverityValue === 'info') {
|
|
90
|
-
return 'low';
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
return 'low';
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
function collectGitDiff(baseSha, headSha) {
|
|
97
|
-
const execOptions = {
|
|
98
|
-
cwd: REPOSITORY_ROOT,
|
|
99
|
-
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
100
|
-
maxBuffer: 1024 * 1024 * 8,
|
|
101
|
-
};
|
|
102
|
-
|
|
103
|
-
return execSync(`git diff "${baseSha}...${headSha}"`, execOptions);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
function collectGitChangedFiles(baseSha, headSha) {
|
|
107
|
-
const execOptions = {
|
|
108
|
-
cwd: REPOSITORY_ROOT,
|
|
109
|
-
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
110
|
-
maxBuffer: 1024 * 1024 * 2,
|
|
111
|
-
};
|
|
112
|
-
|
|
113
|
-
const output = execSync(`git diff --name-only "${baseSha}...${headSha}"`, execOptions);
|
|
114
|
-
return output
|
|
115
|
-
.split(/\r?\n/u)
|
|
116
|
-
.map((filePath) => filePath.trim())
|
|
117
|
-
.filter(Boolean);
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
function collectPullRequestDiff() {
|
|
121
|
-
if (process.env.PR_DIFF) {
|
|
122
|
-
return process.env.PR_DIFF;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
const githubBaseSha = process.env.GITHUB_BASE_SHA;
|
|
126
|
-
const githubHeadSha = process.env.GITHUB_HEAD_SHA ?? 'HEAD';
|
|
127
|
-
if (githubBaseSha) {
|
|
128
|
-
return collectGitDiff(githubBaseSha, githubHeadSha);
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
const gitlabBaseSha = process.env.CI_MERGE_REQUEST_DIFF_BASE_SHA;
|
|
132
|
-
const gitlabHeadSha = process.env.CI_COMMIT_SHA ?? 'HEAD';
|
|
133
|
-
if (gitlabBaseSha) {
|
|
134
|
-
return collectGitDiff(gitlabBaseSha, gitlabHeadSha);
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
try {
|
|
138
|
-
return execSync('git diff HEAD~1 HEAD', {
|
|
139
|
-
cwd: REPOSITORY_ROOT,
|
|
140
|
-
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
141
|
-
maxBuffer: 1024 * 1024 * 8,
|
|
142
|
-
});
|
|
143
|
-
} catch {
|
|
144
|
-
try {
|
|
145
|
-
const emptyTreeSha = '4b825dc642cb6eb9a060e54bf8d69288fbee4904';
|
|
146
|
-
return execSync(`git diff "${emptyTreeSha}" HEAD`, {
|
|
147
|
-
cwd: REPOSITORY_ROOT,
|
|
148
|
-
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
149
|
-
maxBuffer: 1024 * 1024 * 8,
|
|
150
|
-
});
|
|
151
|
-
} catch {
|
|
152
|
-
return '';
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
function collectChangedFiles() {
|
|
158
|
-
if (process.env.PR_DIFF) {
|
|
159
|
-
const filePathSet = new Set();
|
|
160
|
-
for (const diffHeaderMatch of process.env.PR_DIFF.matchAll(/^diff --git a\/(.+?) b\/(.+)$/gm)) {
|
|
161
|
-
filePathSet.add(diffHeaderMatch[2]);
|
|
162
|
-
}
|
|
163
|
-
return Array.from(filePathSet);
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
const githubBaseSha = process.env.GITHUB_BASE_SHA;
|
|
167
|
-
const githubHeadSha = process.env.GITHUB_HEAD_SHA ?? 'HEAD';
|
|
168
|
-
if (githubBaseSha) {
|
|
169
|
-
return collectGitChangedFiles(githubBaseSha, githubHeadSha);
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
const gitlabBaseSha = process.env.CI_MERGE_REQUEST_DIFF_BASE_SHA;
|
|
173
|
-
const gitlabHeadSha = process.env.CI_COMMIT_SHA ?? 'HEAD';
|
|
174
|
-
if (gitlabBaseSha) {
|
|
175
|
-
return collectGitChangedFiles(gitlabBaseSha, gitlabHeadSha);
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
try {
|
|
179
|
-
const output = execSync('git diff --name-only HEAD~1 HEAD', {
|
|
180
|
-
cwd: REPOSITORY_ROOT,
|
|
181
|
-
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
182
|
-
maxBuffer: 1024 * 1024 * 2,
|
|
183
|
-
});
|
|
184
|
-
return output.split(/\r?\n/u).map((filePath) => filePath.trim()).filter(Boolean);
|
|
185
|
-
} catch {
|
|
186
|
-
return [];
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
function isUiRelevantFilePath(filePath) {
|
|
191
|
-
const normalizedFilePath = String(filePath || '').replace(/\\/g, '/').toLowerCase();
|
|
192
|
-
const fileExtension = extname(normalizedFilePath);
|
|
193
|
-
|
|
194
|
-
if (!UI_FILE_EXTENSIONS.has(fileExtension)) {
|
|
195
|
-
return false;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
return (
|
|
199
|
-
normalizedFilePath.startsWith('src/')
|
|
200
|
-
|| normalizedFilePath.startsWith('app/')
|
|
201
|
-
|| normalizedFilePath.startsWith('pages/')
|
|
202
|
-
|| normalizedFilePath.startsWith('components/')
|
|
203
|
-
|| normalizedFilePath.startsWith('styles/')
|
|
204
|
-
|| normalizedFilePath.includes('/components/')
|
|
205
|
-
|| normalizedFilePath.includes('/screens/')
|
|
206
|
-
|| normalizedFilePath.includes('/layouts/')
|
|
207
|
-
);
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
function loadDesignIntent() {
|
|
211
|
-
if (!existsSync(DESIGN_INTENT_PATH)) {
|
|
212
|
-
return null;
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
try {
|
|
216
|
-
return JSON.parse(readFileSync(DESIGN_INTENT_PATH, 'utf8'));
|
|
217
|
-
} catch {
|
|
218
|
-
return null;
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
function loadDesignGuide() {
|
|
223
|
-
if (!existsSync(DESIGN_GUIDE_PATH)) {
|
|
224
|
-
return '';
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
return readFileSync(DESIGN_GUIDE_PATH, 'utf8');
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
function buildSystemPrompt() {
|
|
231
|
-
return [
|
|
232
|
-
'You are a Principal UI/UX Design Reviewer.',
|
|
233
|
-
'Compare the changed UI code against the provided design contract.',
|
|
234
|
-
'Treat docs/design-intent.json as the machine-readable source of truth.',
|
|
235
|
-
'Treat docs/DESIGN.md as explanatory context, not a generic style guide.',
|
|
236
|
-
'Do not reward generic SaaS defaults or popular template patterns.',
|
|
237
|
-
'Do not penalize originality when the implementation still aligns with the contract.',
|
|
238
|
-
'Purposeful motion is allowed and can improve quality. Only flag motion when it drifts from the contract, ignores reduced-motion expectations, or adds avoidable performance/accessibility risk.',
|
|
239
|
-
'Only flag drift when there is a clear mismatch with the contract, accessibility non-negotiables, or cross-viewport adaptation rules.',
|
|
240
|
-
'This audit always runs in advisory mode for this repository workflow.',
|
|
241
|
-
'Focus on color intent, typographic hierarchy, responsive re-layout, purposeful motion, component morphology across states, interaction behavior, and genericity drift.',
|
|
242
|
-
'Return ONLY one JSON object on a single line prefixed with JSON_VERDICT:.',
|
|
243
|
-
'Schema:',
|
|
244
|
-
'{"alignmentScore": number|null, "notes": string[], "findings": [{"area": string, "severity": "high|medium|low", "problem": string, "evidence": string, "recommendation": string, "blockingRecommended": boolean}]}',
|
|
245
|
-
].join('\n');
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
function buildUserMessage(designIntentContent, designGuideContent, diffContent, changedUiFiles) {
|
|
249
|
-
const truncatedDiff = diffContent.length > MAX_DIFF_CHARS
|
|
250
|
-
? `${diffContent.slice(0, MAX_DIFF_CHARS)}\n\n[DIFF TRUNCATED - ${diffContent.length - MAX_DIFF_CHARS} additional characters omitted]`
|
|
251
|
-
: diffContent;
|
|
252
|
-
|
|
253
|
-
return [
|
|
254
|
-
'## Changed UI Files',
|
|
255
|
-
changedUiFiles.length > 0 ? changedUiFiles.map((filePath) => `- ${filePath}`).join('\n') : '- none',
|
|
256
|
-
'',
|
|
257
|
-
'## design-intent.json',
|
|
258
|
-
'```json',
|
|
259
|
-
JSON.stringify(designIntentContent, null, 2),
|
|
260
|
-
'```',
|
|
261
|
-
'',
|
|
262
|
-
'## DESIGN.md',
|
|
263
|
-
'```md',
|
|
264
|
-
designGuideContent.trim() || '(missing DESIGN.md)',
|
|
265
|
-
'```',
|
|
266
|
-
'',
|
|
267
|
-
'## UI Diff',
|
|
268
|
-
'```diff',
|
|
269
|
-
truncatedDiff.trim() || '(no UI diff)',
|
|
270
|
-
'```',
|
|
271
|
-
'',
|
|
272
|
-
'Judge alignment to the contract. Avoid aesthetic bias toward generic web trends or toward motionless/static outputs.',
|
|
273
|
-
].join('\n');
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
async function callOpenAiProvider(systemPrompt, userMessage) {
|
|
277
|
-
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gpt-4o-mini';
|
|
278
|
-
const apiResponse = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
279
|
-
method: 'POST',
|
|
280
|
-
headers: {
|
|
281
|
-
'Content-Type': 'application/json',
|
|
282
|
-
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
|
|
283
|
-
},
|
|
284
|
-
body: JSON.stringify({
|
|
285
|
-
model: selectedModel,
|
|
286
|
-
max_tokens: 2048,
|
|
287
|
-
temperature: 0,
|
|
288
|
-
messages: [
|
|
289
|
-
{ role: 'system', content: systemPrompt },
|
|
290
|
-
{ role: 'user', content: userMessage },
|
|
291
|
-
],
|
|
292
|
-
}),
|
|
293
|
-
});
|
|
294
|
-
|
|
295
|
-
if (!apiResponse.ok) {
|
|
296
|
-
const errorBody = await apiResponse.text();
|
|
297
|
-
throw new Error(`OpenAI API returned ${apiResponse.status}: ${errorBody}`);
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
const responsePayload = await apiResponse.json();
|
|
301
|
-
return responsePayload.choices[0].message.content;
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
async function callAnthropicProvider(systemPrompt, userMessage) {
|
|
305
|
-
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'claude-3-5-haiku-latest';
|
|
306
|
-
const apiResponse = await fetch('https://api.anthropic.com/v1/messages', {
|
|
307
|
-
method: 'POST',
|
|
308
|
-
headers: {
|
|
309
|
-
'Content-Type': 'application/json',
|
|
310
|
-
'x-api-key': process.env.ANTHROPIC_API_KEY ?? '',
|
|
311
|
-
'anthropic-version': '2023-06-01',
|
|
312
|
-
},
|
|
313
|
-
body: JSON.stringify({
|
|
314
|
-
model: selectedModel,
|
|
315
|
-
max_tokens: 2048,
|
|
316
|
-
system: systemPrompt,
|
|
317
|
-
messages: [{ role: 'user', content: userMessage }],
|
|
318
|
-
}),
|
|
319
|
-
});
|
|
320
|
-
|
|
321
|
-
if (!apiResponse.ok) {
|
|
322
|
-
const errorBody = await apiResponse.text();
|
|
323
|
-
throw new Error(`Anthropic API returned ${apiResponse.status}: ${errorBody}`);
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
const responsePayload = await apiResponse.json();
|
|
327
|
-
return responsePayload.content[0].text;
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
async function callGeminiProvider(systemPrompt, userMessage) {
|
|
331
|
-
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gemini-2.0-flash';
|
|
332
|
-
const apiKey = process.env.GEMINI_API_KEY ?? '';
|
|
333
|
-
const endpointUrl = `https://generativelanguage.googleapis.com/v1beta/models/${selectedModel}:generateContent?key=${apiKey}`;
|
|
334
|
-
|
|
335
|
-
const apiResponse = await fetch(endpointUrl, {
|
|
336
|
-
method: 'POST',
|
|
337
|
-
headers: { 'Content-Type': 'application/json' },
|
|
338
|
-
body: JSON.stringify({
|
|
339
|
-
system_instruction: { parts: [{ text: systemPrompt }] },
|
|
340
|
-
contents: [{ role: 'user', parts: [{ text: userMessage }] }],
|
|
341
|
-
generationConfig: { temperature: 0, maxOutputTokens: 2048 },
|
|
342
|
-
}),
|
|
343
|
-
});
|
|
344
|
-
|
|
345
|
-
if (!apiResponse.ok) {
|
|
346
|
-
const errorBody = await apiResponse.text();
|
|
347
|
-
throw new Error(`Gemini API returned ${apiResponse.status}: ${errorBody}`);
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
const responsePayload = await apiResponse.json();
|
|
351
|
-
return responsePayload.candidates[0].content.parts[0].text;
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
function selectAvailableProvider() {
|
|
355
|
-
if (process.env.UI_DESIGN_JUDGE_MOCK_RESPONSE) {
|
|
356
|
-
return {
|
|
357
|
-
providerName: 'mock',
|
|
358
|
-
invokeProvider: async () => process.env.UI_DESIGN_JUDGE_MOCK_RESPONSE,
|
|
359
|
-
};
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
if (process.env.OPENAI_API_KEY) {
|
|
363
|
-
return { providerName: 'openai', invokeProvider: callOpenAiProvider };
|
|
364
|
-
}
|
|
365
|
-
|
|
366
|
-
if (process.env.ANTHROPIC_API_KEY) {
|
|
367
|
-
return { providerName: 'anthropic', invokeProvider: callAnthropicProvider };
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
if (process.env.GEMINI_API_KEY) {
|
|
371
|
-
return { providerName: 'gemini', invokeProvider: callGeminiProvider };
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
return null;
|
|
375
|
-
}
|
|
376
|
-
|
|
377
|
-
function extractVerdictObject(rawResponseText) {
|
|
378
|
-
const verdictMatch = rawResponseText.match(/JSON_VERDICT:\s*(\{[\s\S]*\})/i);
|
|
379
|
-
if (!verdictMatch) {
|
|
380
|
-
return { verdict: null, malformed: true };
|
|
381
|
-
}
|
|
382
|
-
|
|
383
|
-
try {
|
|
384
|
-
return {
|
|
385
|
-
verdict: JSON.parse(verdictMatch[1]),
|
|
386
|
-
malformed: false,
|
|
387
|
-
};
|
|
388
|
-
} catch {
|
|
389
|
-
return {
|
|
390
|
-
verdict: null,
|
|
391
|
-
malformed: true,
|
|
392
|
-
};
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
function normalizeFindings(rawFindings) {
|
|
397
|
-
if (!Array.isArray(rawFindings)) {
|
|
398
|
-
return [];
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
return rawFindings.map((rawFinding) => ({
|
|
402
|
-
area: String(rawFinding?.area || 'general'),
|
|
403
|
-
severity: normalizeSeverity(rawFinding?.severity),
|
|
404
|
-
problem: String(rawFinding?.problem || 'No problem description provided.'),
|
|
405
|
-
evidence: String(rawFinding?.evidence || 'No evidence provided.'),
|
|
406
|
-
recommendation: String(rawFinding?.recommendation || 'No recommendation provided.'),
|
|
407
|
-
blockingRecommended: rawFinding?.blockingRecommended === true,
|
|
408
|
-
}));
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
/**
|
|
412
|
-
* @param {Partial<UiDesignJudgeReport>} partialReport
|
|
413
|
-
* @returns {UiDesignJudgeReport}
|
|
11
|
+
*
|
|
12
|
+
* Validation anchors for repo governance:
|
|
13
|
+
* - Do not reward generic SaaS defaults or popular template patterns.
|
|
14
|
+
* - UI design judge only evaluates changed UI surfaces.
|
|
15
|
+
* - Structured design execution summary was supplied to semantic review.
|
|
16
|
+
* - designExecutionSignalCount
|
|
17
|
+
* - designExecutionPolicy
|
|
18
|
+
* - designExecutionHandoff
|
|
19
|
+
* - handoffReady
|
|
414
20
|
*/
|
|
415
|
-
function buildReport(partialReport) {
|
|
416
|
-
return {
|
|
417
|
-
generatedAt: new Date().toISOString(),
|
|
418
|
-
auditName: 'ui-design-judge',
|
|
419
|
-
schemaVersion: '1.0',
|
|
420
|
-
mode: 'advisory',
|
|
421
|
-
advisoryOnly: true,
|
|
422
|
-
passed: true,
|
|
423
|
-
skipped: false,
|
|
424
|
-
skipReason: null,
|
|
425
|
-
provider: 'none',
|
|
426
|
-
ciProvider: detectCiProvider(),
|
|
427
|
-
contractPresent: false,
|
|
428
|
-
summary: {
|
|
429
|
-
changedUiFileCount: 0,
|
|
430
|
-
alignmentScore: null,
|
|
431
|
-
driftCount: 0,
|
|
432
|
-
blockingCandidateCount: 0,
|
|
433
|
-
},
|
|
434
|
-
malformedVerdict: false,
|
|
435
|
-
providerError: false,
|
|
436
|
-
findings: [],
|
|
437
|
-
notes: [],
|
|
438
|
-
...partialReport,
|
|
439
|
-
};
|
|
440
|
-
}
|
|
441
21
|
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
}
|
|
22
|
+
import { collectChangedFiles, collectPullRequestDiff, isUiRelevantFilePath } from './ui-design-judge/git-input.mjs';
|
|
23
|
+
import { buildSystemPrompt, buildUserMessage } from './ui-design-judge/prompting.mjs';
|
|
24
|
+
import { selectAvailableProvider } from './ui-design-judge/providers.mjs';
|
|
25
|
+
import { calibrateGenericityAssessment } from './ui-design-judge/rubric-calibration.mjs';
|
|
26
|
+
import {
|
|
27
|
+
buildReport,
|
|
28
|
+
emitMachineReadableReport,
|
|
29
|
+
extractVerdictObject,
|
|
30
|
+
normalizeFindings,
|
|
31
|
+
normalizeGenericityAssessment,
|
|
32
|
+
normalizeRubricBreakdown,
|
|
33
|
+
} from './ui-design-judge/reporting.mjs';
|
|
34
|
+
import { loadDesignGuide, loadDesignIntent, summarizeDesignExecutionPolicy, summarizeReviewRubric } from './ui-design-judge/design-execution-summary.mjs';
|
|
445
35
|
|
|
446
36
|
async function main() {
|
|
447
37
|
const changedFiles = collectChangedFiles();
|
|
@@ -470,17 +60,37 @@ async function main() {
|
|
|
470
60
|
alignmentScore: null,
|
|
471
61
|
driftCount: 0,
|
|
472
62
|
blockingCandidateCount: 0,
|
|
63
|
+
designExecutionSignalCount: 0,
|
|
473
64
|
},
|
|
474
65
|
notes: ['UI design judge only evaluates changed UI surfaces.'],
|
|
475
66
|
}));
|
|
476
67
|
return;
|
|
477
68
|
}
|
|
478
69
|
|
|
70
|
+
const designExecutionSummary = summarizeDesignExecutionPolicy(designIntentContent);
|
|
71
|
+
const reviewRubricSummary = summarizeReviewRubric(designIntentContent);
|
|
72
|
+
|
|
479
73
|
const systemPrompt = buildSystemPrompt();
|
|
480
|
-
const userMessage = buildUserMessage(
|
|
74
|
+
const userMessage = buildUserMessage(
|
|
75
|
+
designIntentContent,
|
|
76
|
+
designGuideContent,
|
|
77
|
+
rawDiff,
|
|
78
|
+
changedUiFiles,
|
|
79
|
+
designExecutionSummary
|
|
80
|
+
);
|
|
481
81
|
|
|
482
82
|
const selectedProvider = selectAvailableProvider();
|
|
483
83
|
if (!selectedProvider) {
|
|
84
|
+
const calibration = calibrateGenericityAssessment({
|
|
85
|
+
reviewRubricSummary,
|
|
86
|
+
designExecutionSummary,
|
|
87
|
+
genericityAssessment: { status: 'unclear', reason: 'No provider review was run.' },
|
|
88
|
+
rubricBreakdown: [],
|
|
89
|
+
findings: [],
|
|
90
|
+
notes: [],
|
|
91
|
+
tasteVsFailureSeparated: null,
|
|
92
|
+
});
|
|
93
|
+
|
|
484
94
|
emitMachineReadableReport(buildReport({
|
|
485
95
|
provider: 'none',
|
|
486
96
|
contractPresent: true,
|
|
@@ -489,8 +99,29 @@ async function main() {
|
|
|
489
99
|
alignmentScore: null,
|
|
490
100
|
driftCount: 0,
|
|
491
101
|
blockingCandidateCount: 0,
|
|
102
|
+
designExecutionSignalCount: designExecutionSummary.enabledCapabilities.length,
|
|
103
|
+
genericityStatus: calibration.calibratedStatus,
|
|
104
|
+
},
|
|
105
|
+
designExecution: designExecutionSummary,
|
|
106
|
+
rubric: {
|
|
107
|
+
expectedDimensions: reviewRubricSummary.dimensions.map((dimension) => dimension.key),
|
|
108
|
+
breakdown: [],
|
|
109
|
+
genericityAssessment: {
|
|
110
|
+
status: 'unclear',
|
|
111
|
+
reason: 'No provider review was run.',
|
|
112
|
+
},
|
|
113
|
+
tasteVsFailureSeparated: null,
|
|
114
|
+
calibration,
|
|
492
115
|
},
|
|
493
|
-
|
|
116
|
+
semanticJudge: {
|
|
117
|
+
attempted: false,
|
|
118
|
+
skipped: true,
|
|
119
|
+
skipReason: 'no-provider-configured',
|
|
120
|
+
},
|
|
121
|
+
notes: [
|
|
122
|
+
'No LLM provider configured. UI design judge skipped provider review and stayed advisory.',
|
|
123
|
+
...designExecutionSummary.notes,
|
|
124
|
+
],
|
|
494
125
|
}));
|
|
495
126
|
return;
|
|
496
127
|
}
|
|
@@ -499,6 +130,16 @@ async function main() {
|
|
|
499
130
|
try {
|
|
500
131
|
rawJudgeResponse = await selectedProvider.invokeProvider(systemPrompt, userMessage);
|
|
501
132
|
} catch (providerError) {
|
|
133
|
+
const calibration = calibrateGenericityAssessment({
|
|
134
|
+
reviewRubricSummary,
|
|
135
|
+
designExecutionSummary,
|
|
136
|
+
genericityAssessment: { status: 'unclear', reason: 'Provider review failed before rubric scoring completed.' },
|
|
137
|
+
rubricBreakdown: [],
|
|
138
|
+
findings: [],
|
|
139
|
+
notes: [],
|
|
140
|
+
tasteVsFailureSeparated: null,
|
|
141
|
+
});
|
|
142
|
+
|
|
502
143
|
const providerErrorMessage = providerError instanceof Error
|
|
503
144
|
? providerError.message
|
|
504
145
|
: 'Unknown provider error';
|
|
@@ -512,8 +153,26 @@ async function main() {
|
|
|
512
153
|
alignmentScore: null,
|
|
513
154
|
driftCount: 0,
|
|
514
155
|
blockingCandidateCount: 0,
|
|
156
|
+
designExecutionSignalCount: designExecutionSummary.enabledCapabilities.length,
|
|
157
|
+
genericityStatus: calibration.calibratedStatus,
|
|
158
|
+
},
|
|
159
|
+
designExecution: designExecutionSummary,
|
|
160
|
+
rubric: {
|
|
161
|
+
expectedDimensions: reviewRubricSummary.dimensions.map((dimension) => dimension.key),
|
|
162
|
+
breakdown: [],
|
|
163
|
+
genericityAssessment: {
|
|
164
|
+
status: 'unclear',
|
|
165
|
+
reason: 'Provider review failed before rubric scoring completed.',
|
|
166
|
+
},
|
|
167
|
+
tasteVsFailureSeparated: null,
|
|
168
|
+
calibration,
|
|
169
|
+
},
|
|
170
|
+
semanticJudge: {
|
|
171
|
+
attempted: true,
|
|
172
|
+
skipped: false,
|
|
173
|
+
skipReason: null,
|
|
515
174
|
},
|
|
516
|
-
notes: [`Provider call failed: ${providerErrorMessage}
|
|
175
|
+
notes: [`Provider call failed: ${providerErrorMessage}`, ...designExecutionSummary.notes],
|
|
517
176
|
passed: true,
|
|
518
177
|
}));
|
|
519
178
|
return;
|
|
@@ -521,13 +180,30 @@ async function main() {
|
|
|
521
180
|
|
|
522
181
|
const { verdict, malformed } = extractVerdictObject(rawJudgeResponse);
|
|
523
182
|
const findings = normalizeFindings(verdict?.findings);
|
|
183
|
+
const rubricBreakdown = normalizeRubricBreakdown(
|
|
184
|
+
verdict?.rubricBreakdown,
|
|
185
|
+
reviewRubricSummary.dimensions.map((dimension) => dimension.key)
|
|
186
|
+
);
|
|
187
|
+
const genericityAssessment = normalizeGenericityAssessment(verdict?.genericityAssessment);
|
|
188
|
+
const tasteVsFailureSeparated = typeof verdict?.tasteVsFailureSeparated === 'boolean'
|
|
189
|
+
? verdict.tasteVsFailureSeparated
|
|
190
|
+
: null;
|
|
524
191
|
const blockingCandidateCount = findings.filter((finding) => finding.blockingRecommended || finding.severity === 'high').length;
|
|
525
192
|
const alignmentScore = typeof verdict?.alignmentScore === 'number' ? verdict.alignmentScore : null;
|
|
526
193
|
const notes = Array.isArray(verdict?.notes)
|
|
527
194
|
? verdict.notes.map((note) => String(note))
|
|
528
195
|
: [];
|
|
196
|
+
const calibration = calibrateGenericityAssessment({
|
|
197
|
+
reviewRubricSummary,
|
|
198
|
+
designExecutionSummary,
|
|
199
|
+
genericityAssessment,
|
|
200
|
+
rubricBreakdown,
|
|
201
|
+
findings,
|
|
202
|
+
notes,
|
|
203
|
+
tasteVsFailureSeparated,
|
|
204
|
+
});
|
|
529
205
|
|
|
530
|
-
|
|
206
|
+
emitMachineReadableReport(buildReport({
|
|
531
207
|
provider: selectedProvider.providerName,
|
|
532
208
|
contractPresent: true,
|
|
533
209
|
passed: true,
|
|
@@ -537,14 +213,27 @@ async function main() {
|
|
|
537
213
|
alignmentScore,
|
|
538
214
|
driftCount: findings.length,
|
|
539
215
|
blockingCandidateCount,
|
|
216
|
+
designExecutionSignalCount: designExecutionSummary.enabledCapabilities.length,
|
|
217
|
+
genericityStatus: calibration.calibratedStatus,
|
|
218
|
+
},
|
|
219
|
+
designExecution: designExecutionSummary,
|
|
220
|
+
rubric: {
|
|
221
|
+
expectedDimensions: reviewRubricSummary.dimensions.map((dimension) => dimension.key),
|
|
222
|
+
breakdown: rubricBreakdown,
|
|
223
|
+
genericityAssessment,
|
|
224
|
+
tasteVsFailureSeparated,
|
|
225
|
+
calibration,
|
|
226
|
+
},
|
|
227
|
+
semanticJudge: {
|
|
228
|
+
attempted: true,
|
|
229
|
+
skipped: false,
|
|
230
|
+
skipReason: null,
|
|
540
231
|
},
|
|
541
232
|
findings,
|
|
542
233
|
notes: malformed
|
|
543
|
-
? ['LLM response was malformed. Advisory mode kept the audit non-blocking.']
|
|
544
|
-
: notes,
|
|
545
|
-
});
|
|
546
|
-
|
|
547
|
-
emitMachineReadableReport(reportPayload);
|
|
234
|
+
? ['LLM response was malformed. Advisory mode kept the audit non-blocking.', ...designExecutionSummary.notes]
|
|
235
|
+
: [...notes, ...calibration.notes, ...designExecutionSummary.notes],
|
|
236
|
+
}));
|
|
548
237
|
}
|
|
549
238
|
|
|
550
239
|
main().catch((unexpectedError) => {
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// @ts-check
|
|
3
|
+
|
|
4
|
+
import { readFileSync } from 'node:fs';
|
|
5
|
+
import { dirname, resolve } from 'node:path';
|
|
6
|
+
import { fileURLToPath } from 'node:url';
|
|
7
|
+
import { buildRubricCalibrationReport } from './ui-design-judge/rubric-calibration.mjs';
|
|
8
|
+
|
|
9
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
10
|
+
const __dirname = dirname(__filename);
|
|
11
|
+
const GOLDSET_PATH = resolve(__dirname, 'ui-design-judge', 'rubric-goldset.json');
|
|
12
|
+
const REPORT_NAME = 'ui-rubric-calibration';
|
|
13
|
+
|
|
14
|
+
function loadGoldset() {
|
|
15
|
+
return JSON.parse(readFileSync(GOLDSET_PATH, 'utf8'));
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function main() {
|
|
19
|
+
const goldset = loadGoldset();
|
|
20
|
+
const report = buildRubricCalibrationReport({
|
|
21
|
+
cases: goldset.cases,
|
|
22
|
+
reviewRubricSummary: goldset.reviewRubric,
|
|
23
|
+
});
|
|
24
|
+
report.reportName = REPORT_NAME;
|
|
25
|
+
// Keep the report surface explicit so static validation can detect the machine-readable payload contract.
|
|
26
|
+
report.accuracyPercent = report.accuracyPercent;
|
|
27
|
+
|
|
28
|
+
console.log(JSON.stringify(report, null, 2));
|
|
29
|
+
|
|
30
|
+
if (!report.passed) {
|
|
31
|
+
process.exitCode = 1;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
main();
|