@ryuenn3123/agentic-senior-core 3.0.17 → 3.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/.agent-context/prompts/bootstrap-design.md +84 -94
  2. package/.agent-context/prompts/init-project.md +32 -100
  3. package/.agent-context/prompts/refactor.md +22 -44
  4. package/.agent-context/prompts/review-code.md +28 -52
  5. package/.agent-context/review-checklists/architecture-review.md +31 -62
  6. package/.agent-context/review-checklists/pr-checklist.md +74 -108
  7. package/.agent-context/rules/api-docs.md +18 -206
  8. package/.agent-context/rules/architecture.md +40 -207
  9. package/.agent-context/rules/database-design.md +10 -199
  10. package/.agent-context/rules/docker-runtime.md +5 -5
  11. package/.agent-context/rules/efficiency-vs-hype.md +11 -149
  12. package/.agent-context/rules/error-handling.md +9 -231
  13. package/.agent-context/rules/event-driven.md +17 -221
  14. package/.agent-context/rules/frontend-architecture.md +66 -119
  15. package/.agent-context/rules/git-workflow.md +1 -1
  16. package/.agent-context/rules/microservices.md +28 -161
  17. package/.agent-context/rules/naming-conv.md +8 -138
  18. package/.agent-context/rules/performance.md +9 -175
  19. package/.agent-context/rules/realtime.md +11 -44
  20. package/.agent-context/rules/security.md +11 -295
  21. package/.agent-context/rules/testing.md +9 -174
  22. package/.agent-context/state/benchmark-analysis.json +3 -3
  23. package/.agent-context/state/memory-continuity-benchmark.json +1 -1
  24. package/.agent-context/state/onboarding-report.json +71 -11
  25. package/.agents/workflows/init-project.md +7 -24
  26. package/.agents/workflows/refactor.md +7 -24
  27. package/.agents/workflows/review-code.md +7 -24
  28. package/.cursorrules +22 -21
  29. package/.gemini/instructions.md +2 -2
  30. package/.github/copilot-instructions.md +2 -2
  31. package/.instructions.md +112 -213
  32. package/.windsurfrules +22 -21
  33. package/AGENTS.md +4 -4
  34. package/CONTRIBUTING.md +13 -22
  35. package/README.md +6 -20
  36. package/lib/cli/commands/init.mjs +102 -148
  37. package/lib/cli/commands/launch.mjs +3 -3
  38. package/lib/cli/commands/optimize.mjs +14 -4
  39. package/lib/cli/commands/upgrade.mjs +25 -23
  40. package/lib/cli/compiler.mjs +96 -62
  41. package/lib/cli/constants.mjs +28 -136
  42. package/lib/cli/detector/design-evidence.mjs +189 -6
  43. package/lib/cli/detector.mjs +6 -7
  44. package/lib/cli/init-detection-flow.mjs +10 -93
  45. package/lib/cli/init-selection.mjs +2 -68
  46. package/lib/cli/project-scaffolder/constants.mjs +1 -1
  47. package/lib/cli/project-scaffolder/design-contract.mjs +438 -335
  48. package/lib/cli/project-scaffolder/discovery.mjs +36 -82
  49. package/lib/cli/project-scaffolder/prompt-builders.mjs +55 -63
  50. package/lib/cli/project-scaffolder/storage.mjs +0 -4
  51. package/lib/cli/token-optimization.mjs +1 -1
  52. package/lib/cli/utils.mjs +75 -9
  53. package/package.json +2 -2
  54. package/scripts/detection-benchmark.mjs +4 -15
  55. package/scripts/documentation-boundary-audit.mjs +9 -9
  56. package/scripts/explain-on-demand-audit.mjs +11 -11
  57. package/scripts/forbidden-content-check.mjs +9 -9
  58. package/scripts/frontend-usability-audit.mjs +57 -36
  59. package/scripts/llm-judge.mjs +1 -1
  60. package/scripts/mcp-server/constants.mjs +60 -0
  61. package/scripts/mcp-server/tool-registry.mjs +149 -0
  62. package/scripts/mcp-server/tools.mjs +446 -0
  63. package/scripts/mcp-server.mjs +23 -661
  64. package/scripts/release-gate/audit-checks.mjs +426 -0
  65. package/scripts/release-gate/constants.mjs +53 -0
  66. package/scripts/release-gate/runtime.mjs +63 -0
  67. package/scripts/release-gate/static-checks.mjs +182 -0
  68. package/scripts/release-gate.mjs +13 -794
  69. package/scripts/rules-guardian-audit.mjs +14 -13
  70. package/scripts/single-source-lazy-loading-audit.mjs +3 -3
  71. package/scripts/sync-thin-adapters.mjs +5 -5
  72. package/scripts/ui-design-judge/constants.mjs +24 -0
  73. package/scripts/ui-design-judge/design-execution-summary.mjs +259 -0
  74. package/scripts/ui-design-judge/git-input.mjs +131 -0
  75. package/scripts/ui-design-judge/prompting.mjs +73 -0
  76. package/scripts/ui-design-judge/providers.mjs +102 -0
  77. package/scripts/ui-design-judge/reporting.mjs +182 -0
  78. package/scripts/ui-design-judge/rubric-calibration.mjs +214 -0
  79. package/scripts/ui-design-judge/rubric-goldset.json +188 -0
  80. package/scripts/ui-design-judge.mjs +166 -771
  81. package/scripts/ui-rubric-calibration.mjs +35 -0
  82. package/scripts/validate/config.mjs +198 -55
  83. package/scripts/validate/coverage-checks.mjs +32 -7
  84. package/scripts/validate.mjs +8 -4
  85. package/lib/cli/architect.mjs +0 -431
@@ -0,0 +1,102 @@
1
+ // @ts-check
2
+
3
+ async function callOpenAiProvider(systemPrompt, userMessage) {
4
+ const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gpt-4o-mini';
5
+ const apiResponse = await fetch('https://api.openai.com/v1/chat/completions', {
6
+ method: 'POST',
7
+ headers: {
8
+ 'Content-Type': 'application/json',
9
+ Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
10
+ },
11
+ body: JSON.stringify({
12
+ model: selectedModel,
13
+ max_tokens: 2048,
14
+ temperature: 0,
15
+ messages: [
16
+ { role: 'system', content: systemPrompt },
17
+ { role: 'user', content: userMessage },
18
+ ],
19
+ }),
20
+ });
21
+
22
+ if (!apiResponse.ok) {
23
+ const errorBody = await apiResponse.text();
24
+ throw new Error(`OpenAI API returned ${apiResponse.status}: ${errorBody}`);
25
+ }
26
+
27
+ const responsePayload = await apiResponse.json();
28
+ return responsePayload.choices[0].message.content;
29
+ }
30
+
31
+ async function callAnthropicProvider(systemPrompt, userMessage) {
32
+ const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'claude-3-5-haiku-latest';
33
+ const apiResponse = await fetch('https://api.anthropic.com/v1/messages', {
34
+ method: 'POST',
35
+ headers: {
36
+ 'Content-Type': 'application/json',
37
+ 'x-api-key': process.env.ANTHROPIC_API_KEY ?? '',
38
+ 'anthropic-version': '2023-06-01',
39
+ },
40
+ body: JSON.stringify({
41
+ model: selectedModel,
42
+ max_tokens: 2048,
43
+ system: systemPrompt,
44
+ messages: [{ role: 'user', content: userMessage }],
45
+ }),
46
+ });
47
+
48
+ if (!apiResponse.ok) {
49
+ const errorBody = await apiResponse.text();
50
+ throw new Error(`Anthropic API returned ${apiResponse.status}: ${errorBody}`);
51
+ }
52
+
53
+ const responsePayload = await apiResponse.json();
54
+ return responsePayload.content[0].text;
55
+ }
56
+
57
+ async function callGeminiProvider(systemPrompt, userMessage) {
58
+ const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gemini-2.0-flash';
59
+ const apiKey = process.env.GEMINI_API_KEY ?? '';
60
+ const endpointUrl = `https://generativelanguage.googleapis.com/v1beta/models/${selectedModel}:generateContent?key=${apiKey}`;
61
+
62
+ const apiResponse = await fetch(endpointUrl, {
63
+ method: 'POST',
64
+ headers: { 'Content-Type': 'application/json' },
65
+ body: JSON.stringify({
66
+ system_instruction: { parts: [{ text: systemPrompt }] },
67
+ contents: [{ role: 'user', parts: [{ text: userMessage }] }],
68
+ generationConfig: { temperature: 0, maxOutputTokens: 2048 },
69
+ }),
70
+ });
71
+
72
+ if (!apiResponse.ok) {
73
+ const errorBody = await apiResponse.text();
74
+ throw new Error(`Gemini API returned ${apiResponse.status}: ${errorBody}`);
75
+ }
76
+
77
+ const responsePayload = await apiResponse.json();
78
+ return responsePayload.candidates[0].content.parts[0].text;
79
+ }
80
+
81
+ export function selectAvailableProvider() {
82
+ if (process.env.UI_DESIGN_JUDGE_MOCK_RESPONSE) {
83
+ return {
84
+ providerName: 'mock',
85
+ invokeProvider: async () => process.env.UI_DESIGN_JUDGE_MOCK_RESPONSE,
86
+ };
87
+ }
88
+
89
+ if (process.env.OPENAI_API_KEY) {
90
+ return { providerName: 'openai', invokeProvider: callOpenAiProvider };
91
+ }
92
+
93
+ if (process.env.ANTHROPIC_API_KEY) {
94
+ return { providerName: 'anthropic', invokeProvider: callAnthropicProvider };
95
+ }
96
+
97
+ if (process.env.GEMINI_API_KEY) {
98
+ return { providerName: 'gemini', invokeProvider: callGeminiProvider };
99
+ }
100
+
101
+ return null;
102
+ }
@@ -0,0 +1,182 @@
1
+ // @ts-check
2
+
3
+ import { detectCiProvider } from './git-input.mjs';
4
+
5
+ function normalizeSeverity(rawSeverityValue) {
6
+ const normalizedSeverityValue = String(rawSeverityValue || '').trim().toLowerCase();
7
+
8
+ if (['critical', 'high', 'medium', 'low'].includes(normalizedSeverityValue)) {
9
+ return normalizedSeverityValue;
10
+ }
11
+
12
+ if (normalizedSeverityValue === 'major') {
13
+ return 'high';
14
+ }
15
+
16
+ if (normalizedSeverityValue === 'minor' || normalizedSeverityValue === 'info') {
17
+ return 'low';
18
+ }
19
+
20
+ return 'low';
21
+ }
22
+
23
+ export function extractVerdictObject(rawResponseText) {
24
+ const verdictMatch = rawResponseText.match(/JSON_VERDICT:\s*(\{[\s\S]*\})/i);
25
+ if (!verdictMatch) {
26
+ return { verdict: null, malformed: true };
27
+ }
28
+
29
+ try {
30
+ return {
31
+ verdict: JSON.parse(verdictMatch[1]),
32
+ malformed: false,
33
+ };
34
+ } catch {
35
+ return {
36
+ verdict: null,
37
+ malformed: true,
38
+ };
39
+ }
40
+ }
41
+
42
+ export function normalizeFindings(rawFindings) {
43
+ if (!Array.isArray(rawFindings)) {
44
+ return [];
45
+ }
46
+
47
+ return rawFindings.map((rawFinding) => ({
48
+ area: String(rawFinding?.area || 'general'),
49
+ severity: normalizeSeverity(rawFinding?.severity),
50
+ problem: String(rawFinding?.problem || 'No problem description provided.'),
51
+ evidence: String(rawFinding?.evidence || 'No evidence provided.'),
52
+ requiredAction: String(rawFinding?.requiredAction || 'No required action provided.'),
53
+ blockingRecommended: rawFinding?.blockingRecommended === true,
54
+ }));
55
+ }
56
+
57
+ function normalizeRubricVerdict(rawVerdictValue) {
58
+ const normalizedVerdictValue = String(rawVerdictValue || '').trim().toLowerCase();
59
+ if (['strong', 'acceptable', 'weak', 'unclear'].includes(normalizedVerdictValue)) {
60
+ return normalizedVerdictValue;
61
+ }
62
+ return 'unclear';
63
+ }
64
+
65
+ export function normalizeRubricBreakdown(rawRubricBreakdown, expectedDimensions = []) {
66
+ if (!Array.isArray(rawRubricBreakdown)) {
67
+ return [];
68
+ }
69
+
70
+ const expectedDimensionNames = Array.isArray(expectedDimensions)
71
+ ? expectedDimensions.map((dimension) => String(dimension || '').trim()).filter(Boolean)
72
+ : [];
73
+
74
+ return rawRubricBreakdown
75
+ .map((rawDimensionEntry) => ({
76
+ dimension: String(rawDimensionEntry?.dimension || '').trim(),
77
+ score: typeof rawDimensionEntry?.score === 'number' && Number.isFinite(rawDimensionEntry.score)
78
+ ? rawDimensionEntry.score
79
+ : null,
80
+ verdict: normalizeRubricVerdict(rawDimensionEntry?.verdict),
81
+ reason: String(rawDimensionEntry?.reason || 'No rubric reason provided.'),
82
+ blocking: rawDimensionEntry?.blocking === true,
83
+ }))
84
+ .filter((dimensionEntry) => {
85
+ if (!dimensionEntry.dimension) {
86
+ return false;
87
+ }
88
+ return expectedDimensionNames.length === 0 || expectedDimensionNames.includes(dimensionEntry.dimension);
89
+ });
90
+ }
91
+
92
+ export function normalizeGenericityAssessment(rawGenericityAssessment) {
93
+ const normalizedStatus = String(rawGenericityAssessment?.status || '').trim().toLowerCase();
94
+ return {
95
+ status: ['distinctive', 'mixed', 'generic', 'unclear'].includes(normalizedStatus)
96
+ ? normalizedStatus
97
+ : 'unclear',
98
+ reason: String(rawGenericityAssessment?.reason || 'No genericity assessment provided.'),
99
+ };
100
+ }
101
+
102
+ export function buildReport(partialReport) {
103
+ return {
104
+ generatedAt: new Date().toISOString(),
105
+ auditName: 'ui-design-judge',
106
+ schemaVersion: '1.2',
107
+ mode: 'advisory',
108
+ advisoryOnly: true,
109
+ passed: true,
110
+ skipped: false,
111
+ skipReason: null,
112
+ provider: 'none',
113
+ ciProvider: detectCiProvider(),
114
+ contractPresent: false,
115
+ summary: {
116
+ changedUiFileCount: 0,
117
+ alignmentScore: null,
118
+ driftCount: 0,
119
+ blockingCandidateCount: 0,
120
+ designExecutionSignalCount: 0,
121
+ genericityStatus: 'unclear',
122
+ },
123
+ designExecution: {
124
+ policyPresent: false,
125
+ representationStrategy: null,
126
+ contractReady: false,
127
+ screenshotDependencyForbidden: false,
128
+ repoEvidenceAvailable: false,
129
+ handoffPresent: false,
130
+ handoffVersion: null,
131
+ handoffReady: false,
132
+ handoffArtifactCount: 0,
133
+ presentHandoffArtifacts: [],
134
+ missingHandoffArtifacts: [],
135
+ repoEvidenceSummaryVersion: null,
136
+ requiredCapabilities: [],
137
+ enabledCapabilities: [],
138
+ missingCapabilities: [],
139
+ semanticReviewFocus: [],
140
+ notes: [],
141
+ },
142
+ rubric: {
143
+ expectedDimensions: [],
144
+ breakdown: [],
145
+ genericityAssessment: {
146
+ status: 'unclear',
147
+ reason: 'No genericity assessment provided.',
148
+ },
149
+ tasteVsFailureSeparated: null,
150
+ calibration: {
151
+ version: 'ui-rubric-calibration-v1',
152
+ providerStatus: 'unclear',
153
+ calibratedStatus: 'unclear',
154
+ statusChanged: false,
155
+ namedGenericityRequired: false,
156
+ matchedGenericitySignals: [],
157
+ matchedForbiddenPatterns: [],
158
+ matchedValidBoldSignals: [],
159
+ blockingFindingCount: 0,
160
+ contractFidelityWeak: false,
161
+ contractDriftDetected: false,
162
+ tasteVsFailureSeparated: null,
163
+ evidenceTextCount: 0,
164
+ notes: ['No rubric calibration was performed.'],
165
+ },
166
+ },
167
+ semanticJudge: {
168
+ attempted: false,
169
+ skipped: false,
170
+ skipReason: null,
171
+ },
172
+ malformedVerdict: false,
173
+ providerError: false,
174
+ findings: [],
175
+ notes: [],
176
+ ...partialReport,
177
+ };
178
+ }
179
+
180
+ export function emitMachineReadableReport(machineReportPayload) {
181
+ console.log(JSON.stringify(machineReportPayload, null, 2));
182
+ }
@@ -0,0 +1,214 @@
1
+ // @ts-check
2
+
3
+ function normalizeForSignalMatch(rawValue) {
4
+ return String(rawValue || '')
5
+ .toLowerCase()
6
+ .replace(/[^a-z0-9]+/g, ' ')
7
+ .trim()
8
+ .replace(/\s+/g, ' ');
9
+ }
10
+
11
+ function collectEvidenceTexts({
12
+ genericityAssessment,
13
+ rubricBreakdown,
14
+ findings,
15
+ notes,
16
+ }) {
17
+ const textParts = [
18
+ genericityAssessment?.reason,
19
+ ...(Array.isArray(rubricBreakdown)
20
+ ? rubricBreakdown.flatMap((dimensionEntry) => [dimensionEntry?.dimension, dimensionEntry?.reason])
21
+ : []),
22
+ ...(Array.isArray(findings)
23
+ ? findings.flatMap((finding) => [
24
+ finding?.area,
25
+ finding?.problem,
26
+ finding?.evidence,
27
+ finding?.requiredAction,
28
+ ])
29
+ : []),
30
+ ...(Array.isArray(notes) ? notes : []),
31
+ ];
32
+
33
+ return textParts
34
+ .map((textValue) => normalizeForSignalMatch(textValue))
35
+ .filter(Boolean);
36
+ }
37
+
38
+ export function collectMatchedSignals(signalList, evidenceTexts) {
39
+ const normalizedEvidenceText = Array.isArray(evidenceTexts)
40
+ ? evidenceTexts.join(' ')
41
+ : normalizeForSignalMatch(evidenceTexts);
42
+
43
+ if (!normalizedEvidenceText) {
44
+ return [];
45
+ }
46
+
47
+ return (Array.isArray(signalList) ? signalList : [])
48
+ .map((signalValue) => ({
49
+ raw: String(signalValue || '').trim(),
50
+ normalized: normalizeForSignalMatch(signalValue),
51
+ }))
52
+ .filter((signalEntry) => signalEntry.raw && signalEntry.normalized)
53
+ .filter((signalEntry) => normalizedEvidenceText.includes(signalEntry.normalized))
54
+ .map((signalEntry) => signalEntry.raw);
55
+ }
56
+
57
+ function detectContractDrift(rubricBreakdown, findings, designExecutionSummary) {
58
+ const contractFidelityEntry = Array.isArray(rubricBreakdown)
59
+ ? rubricBreakdown.find((dimensionEntry) => dimensionEntry?.dimension === 'contractFidelity')
60
+ : null;
61
+ const blockingFindingCount = Array.isArray(findings)
62
+ ? findings.filter((finding) => finding?.blockingRecommended === true || ['critical', 'high'].includes(String(finding?.severity || '').toLowerCase())).length
63
+ : 0;
64
+ const contractFidelityWeak = contractFidelityEntry
65
+ ? contractFidelityEntry.verdict === 'weak'
66
+ || contractFidelityEntry.verdict === 'unclear'
67
+ || (typeof contractFidelityEntry.score === 'number' && contractFidelityEntry.score < 70)
68
+ : false;
69
+ const contractReady = designExecutionSummary?.contractReady === true;
70
+
71
+ return {
72
+ blockingFindingCount,
73
+ contractFidelityWeak,
74
+ contractDriftDetected: !contractReady || contractFidelityWeak || blockingFindingCount > 0,
75
+ };
76
+ }
77
+
78
+ export function calibrateGenericityAssessment({
79
+ reviewRubricSummary,
80
+ designExecutionSummary,
81
+ genericityAssessment,
82
+ rubricBreakdown,
83
+ findings,
84
+ notes,
85
+ tasteVsFailureSeparated,
86
+ }) {
87
+ const providerStatus = String(genericityAssessment?.status || 'unclear').trim().toLowerCase() || 'unclear';
88
+ const evidenceTexts = collectEvidenceTexts({
89
+ genericityAssessment,
90
+ rubricBreakdown,
91
+ findings,
92
+ notes,
93
+ });
94
+ const matchedGenericitySignals = collectMatchedSignals(reviewRubricSummary?.genericitySignals, evidenceTexts);
95
+ const matchedValidBoldSignals = collectMatchedSignals(reviewRubricSummary?.validBoldSignals, evidenceTexts);
96
+ const matchedForbiddenPatterns = collectMatchedSignals(reviewRubricSummary?.forbiddenPatterns, evidenceTexts);
97
+ const { blockingFindingCount, contractFidelityWeak, contractDriftDetected } = detectContractDrift(
98
+ rubricBreakdown,
99
+ findings,
100
+ designExecutionSummary
101
+ );
102
+ const namedGenericityRequired = reviewRubricSummary?.reportingRules?.mustExplainGenericity === true;
103
+ const calibrationNotes = [];
104
+ let calibratedStatus = providerStatus;
105
+
106
+ if (
107
+ namedGenericityRequired
108
+ && ['generic', 'mixed'].includes(providerStatus)
109
+ && matchedGenericitySignals.length === 0
110
+ && matchedForbiddenPatterns.length === 0
111
+ ) {
112
+ calibratedStatus = 'unclear';
113
+ calibrationNotes.push('Genericity claim was not backed by any named drift signal.');
114
+ }
115
+
116
+ if ((matchedGenericitySignals.length > 0 || matchedForbiddenPatterns.length > 0) && matchedValidBoldSignals.length === 0) {
117
+ calibratedStatus = contractDriftDetected || matchedGenericitySignals.length + matchedForbiddenPatterns.length >= 2
118
+ ? 'generic'
119
+ : 'mixed';
120
+ calibrationNotes.push('Named genericity drift signals dominate the review evidence.');
121
+ } else if (matchedValidBoldSignals.length > 0 && matchedGenericitySignals.length === 0 && matchedForbiddenPatterns.length === 0) {
122
+ if (contractDriftDetected) {
123
+ calibratedStatus = 'mixed';
124
+ calibrationNotes.push('Authored signals are present, but contract drift prevents a distinctive verdict.');
125
+ } else if (matchedValidBoldSignals.length >= 2) {
126
+ calibratedStatus = 'distinctive';
127
+ calibrationNotes.push('Multiple valid bold signals were named without generic drift evidence.');
128
+ } else if (providerStatus === 'unclear') {
129
+ calibratedStatus = 'mixed';
130
+ calibrationNotes.push('One valid bold signal was named, but evidence is not strong enough for a distinctive verdict.');
131
+ }
132
+ } else if ((matchedGenericitySignals.length > 0 || matchedForbiddenPatterns.length > 0) && matchedValidBoldSignals.length > 0) {
133
+ calibratedStatus = contractDriftDetected ? 'mixed' : 'mixed';
134
+ calibrationNotes.push('The evidence contains both generic drift and legitimate authored moves.');
135
+ } else if (providerStatus === 'distinctive' && contractDriftDetected) {
136
+ calibratedStatus = 'mixed';
137
+ calibrationNotes.push('Distinctive tone does not override contract drift or blocking findings.');
138
+ }
139
+
140
+ if (tasteVsFailureSeparated === false && calibratedStatus === 'distinctive') {
141
+ calibratedStatus = 'mixed';
142
+ calibrationNotes.push('The review did not separate taste preference from real failure conditions.');
143
+ }
144
+
145
+ if (calibrationNotes.length === 0) {
146
+ calibrationNotes.push('Provider verdict stayed intact after rubric calibration.');
147
+ }
148
+
149
+ return {
150
+ version: 'ui-rubric-calibration-v1',
151
+ providerStatus,
152
+ calibratedStatus,
153
+ statusChanged: calibratedStatus !== providerStatus,
154
+ namedGenericityRequired,
155
+ matchedGenericitySignals,
156
+ matchedForbiddenPatterns,
157
+ matchedValidBoldSignals,
158
+ blockingFindingCount,
159
+ contractFidelityWeak,
160
+ contractDriftDetected,
161
+ tasteVsFailureSeparated,
162
+ evidenceTextCount: evidenceTexts.length,
163
+ notes: calibrationNotes,
164
+ };
165
+ }
166
+
167
+ export function buildRubricCalibrationReport({
168
+ cases,
169
+ reviewRubricSummary,
170
+ }) {
171
+ const normalizedCases = Array.isArray(cases) ? cases : [];
172
+ const results = normalizedCases.map((caseEntry) => {
173
+ const calibration = calibrateGenericityAssessment({
174
+ reviewRubricSummary,
175
+ designExecutionSummary: caseEntry.designExecutionSummary,
176
+ genericityAssessment: caseEntry.genericityAssessment,
177
+ rubricBreakdown: caseEntry.rubricBreakdown,
178
+ findings: caseEntry.findings,
179
+ notes: caseEntry.notes,
180
+ tasteVsFailureSeparated: caseEntry.tasteVsFailureSeparated,
181
+ });
182
+ const expected = caseEntry.expected && typeof caseEntry.expected === 'object'
183
+ ? caseEntry.expected
184
+ : {};
185
+
186
+ const statusMatches = String(expected.calibratedStatus || '') === calibration.calibratedStatus;
187
+ const contractDriftMatches = typeof expected.contractDriftDetected === 'boolean'
188
+ ? expected.contractDriftDetected === calibration.contractDriftDetected
189
+ : true;
190
+
191
+ return {
192
+ id: String(caseEntry.id || 'unknown-case'),
193
+ label: String(caseEntry.label || ''),
194
+ passed: statusMatches && contractDriftMatches,
195
+ expected,
196
+ calibration,
197
+ };
198
+ });
199
+
200
+ const passedCaseCount = results.filter((resultEntry) => resultEntry.passed).length;
201
+ const totalCases = results.length;
202
+
203
+ return {
204
+ generatedAt: new Date().toISOString(),
205
+ reportName: 'ui-rubric-calibration',
206
+ schemaVersion: '1.0',
207
+ passed: passedCaseCount === totalCases,
208
+ failureCount: totalCases - passedCaseCount,
209
+ totalCases,
210
+ passedCaseCount,
211
+ accuracyPercent: totalCases === 0 ? 0 : Number(((passedCaseCount / totalCases) * 100).toFixed(1)),
212
+ results,
213
+ };
214
+ }
@@ -0,0 +1,188 @@
1
+ {
2
+ "version": "ui-rubric-goldset-v1",
3
+ "reviewRubric": {
4
+ "version": "ui-rubric-v1",
5
+ "genericitySignals": [
6
+ "safe-centered-hero-without-product-rationale",
7
+ "balanced-card-grid-without-priority-shift",
8
+ "default-framework-button-and-input-treatment",
9
+ "trend-gradient-without-structural-role",
10
+ "interchangeable-dashboard-chrome"
11
+ ],
12
+ "validBoldSignals": [
13
+ "one-clear-signature-move",
14
+ "project-specific-layout-tension",
15
+ "purposeful-motion-as-identity",
16
+ "distinct-typographic-hierarchy",
17
+ "non-template-task-priority"
18
+ ],
19
+ "reportingRules": {
20
+ "mustExplainGenericity": true,
21
+ "mustSeparateTasteFromFailure": true,
22
+ "contractFidelityOverridesPersonalTaste": true
23
+ }
24
+ },
25
+ "cases": [
26
+ {
27
+ "id": "distinctive-authored-valid",
28
+ "label": "Authored UI with clear valid bold signals and no contract drift",
29
+ "designExecutionSummary": {
30
+ "contractReady": true
31
+ },
32
+ "genericityAssessment": {
33
+ "status": "distinctive",
34
+ "reason": "The UI lands one clear signature move with project specific layout tension and non template task priority."
35
+ },
36
+ "rubricBreakdown": [
37
+ {
38
+ "dimension": "contractFidelity",
39
+ "score": 88,
40
+ "verdict": "strong",
41
+ "reason": "Contract fidelity stays intact while the layout uses non template task priority.",
42
+ "blocking": true
43
+ }
44
+ ],
45
+ "findings": [],
46
+ "notes": [
47
+ "The composition keeps one clear signature move instead of collapsing into default chrome."
48
+ ],
49
+ "tasteVsFailureSeparated": true,
50
+ "expected": {
51
+ "calibratedStatus": "distinctive",
52
+ "contractDriftDetected": false
53
+ }
54
+ },
55
+ {
56
+ "id": "bold-but-contract-drift",
57
+ "label": "Bold ideas exist but contract drift keeps the result mixed",
58
+ "designExecutionSummary": {
59
+ "contractReady": true
60
+ },
61
+ "genericityAssessment": {
62
+ "status": "distinctive",
63
+ "reason": "There is project specific layout tension and one clear signature move, but the mobile hierarchy drifted."
64
+ },
65
+ "rubricBreakdown": [
66
+ {
67
+ "dimension": "contractFidelity",
68
+ "score": 54,
69
+ "verdict": "weak",
70
+ "reason": "Contract fidelity weakened when the primary CTA moved below secondary content.",
71
+ "blocking": true
72
+ }
73
+ ],
74
+ "findings": [
75
+ {
76
+ "area": "responsive",
77
+ "severity": "high",
78
+ "problem": "Mobile layout no longer follows the contract.",
79
+ "evidence": "The primary action now drops below supporting proof.",
80
+ "requiredAction": "Restore the intended task order.",
81
+ "blockingRecommended": true
82
+ }
83
+ ],
84
+ "notes": [
85
+ "The distinctive direction is real, but contract fidelity still drifted."
86
+ ],
87
+ "tasteVsFailureSeparated": true,
88
+ "expected": {
89
+ "calibratedStatus": "mixed",
90
+ "contractDriftDetected": true
91
+ }
92
+ },
93
+ {
94
+ "id": "generic-template-drift",
95
+ "label": "Generic template drift is named clearly and stays generic",
96
+ "designExecutionSummary": {
97
+ "contractReady": false
98
+ },
99
+ "genericityAssessment": {
100
+ "status": "mixed",
101
+ "reason": "The redesign fell back to balanced card grid without priority shift plus default framework button and input treatment."
102
+ },
103
+ "rubricBreakdown": [
104
+ {
105
+ "dimension": "contractFidelity",
106
+ "score": 49,
107
+ "verdict": "weak",
108
+ "reason": "The hierarchy collapsed into interchangeable dashboard chrome.",
109
+ "blocking": true
110
+ }
111
+ ],
112
+ "findings": [
113
+ {
114
+ "area": "layout",
115
+ "severity": "high",
116
+ "problem": "The page uses balanced card grid without priority shift.",
117
+ "evidence": "Every block carries equal weight and the CTA no longer leads.",
118
+ "requiredAction": "Rebuild the page around one dominant task surface.",
119
+ "blockingRecommended": true
120
+ }
121
+ ],
122
+ "notes": [
123
+ "Interchangeable dashboard chrome now dominates the flow."
124
+ ],
125
+ "tasteVsFailureSeparated": true,
126
+ "expected": {
127
+ "calibratedStatus": "generic",
128
+ "contractDriftDetected": true
129
+ }
130
+ },
131
+ {
132
+ "id": "provider-overcalls-generic",
133
+ "label": "Provider called the work generic, but the evidence points to valid authored moves",
134
+ "designExecutionSummary": {
135
+ "contractReady": true
136
+ },
137
+ "genericityAssessment": {
138
+ "status": "generic",
139
+ "reason": "The work feels generic, although it carries one clear signature move and distinct typographic hierarchy."
140
+ },
141
+ "rubricBreakdown": [
142
+ {
143
+ "dimension": "contractFidelity",
144
+ "score": 82,
145
+ "verdict": "strong",
146
+ "reason": "Contract fidelity is strong and the task order remains deliberate.",
147
+ "blocking": true
148
+ }
149
+ ],
150
+ "findings": [],
151
+ "notes": [
152
+ "The surface uses one clear signature move with distinct typographic hierarchy and non template task priority."
153
+ ],
154
+ "tasteVsFailureSeparated": true,
155
+ "expected": {
156
+ "calibratedStatus": "distinctive",
157
+ "contractDriftDetected": false
158
+ }
159
+ },
160
+ {
161
+ "id": "unnamed-genericity-claim",
162
+ "label": "Genericity claim without named drift signal drops to unclear",
163
+ "designExecutionSummary": {
164
+ "contractReady": true
165
+ },
166
+ "genericityAssessment": {
167
+ "status": "generic",
168
+ "reason": "The layout still feels a little too safe and familiar."
169
+ },
170
+ "rubricBreakdown": [
171
+ {
172
+ "dimension": "contractFidelity",
173
+ "score": 78,
174
+ "verdict": "acceptable",
175
+ "reason": "The contract is mostly intact.",
176
+ "blocking": true
177
+ }
178
+ ],
179
+ "findings": [],
180
+ "notes": [],
181
+ "tasteVsFailureSeparated": true,
182
+ "expected": {
183
+ "calibratedStatus": "unclear",
184
+ "contractDriftDetected": false
185
+ }
186
+ }
187
+ ]
188
+ }