npm - mstro-app - Versions diffs - 0.5.1 → 0.5.5 - Mend

mstro-app 0.5.1 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (240) hide show

package/dist/server/services/websocket/quality-grading.js CHANGED Viewed

@@ -3,7 +3,7 @@
 // Category -> Dimension Mapping
 // ============================================================================
 const SECURITY_CATEGORIES = new Set(['security']);
-const RELIABILITY_CATEGORIES = new Set(['bugs', 'logic', 'performance', 'complexity']);
+const RELIABILITY_CATEGORIES = new Set(['bugs', 'logic', 'performance', 'complexity', 'build']);
 const MAINTAINABILITY_CATEGORIES = new Set([
     'lint',
     'linting',
@@ -28,52 +28,102 @@ export function categoryToDimension(category) {
         return 'maintainability';
     return 'maintainability';
 }
+/** Categories that represent architectural problems — used by the arch penalty. */
+const ARCHITECTURE_CATEGORIES = new Set(['architecture', 'oop']);
 // ============================================================================
-// Legacy Fallback
+// Score Bands & Modifier Math
 // ============================================================================
 /**
- * Score-to-grade conversion used by legacy callers that still operate on a
- * single 0-100 number. The new multi-dimensional path computes grades
- * directly from finding shape; this remains for backward compatibility.
+ * Score boundaries for each base grade. Note the gap between C (70+) and F+
+ * (≤69): the band 60-69 maps to F+ instead of D, per product spec ("60s and
+ * below is F").
  */
-export function gradeFromScore(score) {
-    if (score >= 90)
-        return 'A';
-    if (score >= 80)
-        return 'B';
-    if (score >= 70)
-        return 'C';
-    if (score >= 60)
-        return 'D';
-    return 'F';
-}
-// ============================================================================
-// Score Bands
-// ============================================================================
-const BAND_TOP = {
+const BASE_BAND_TOP = {
     A: 100,
     B: 89,
     C: 79,
-    D: 69,
-    F: 59,
+    F: 69, // F covers 56-69 (F+ for 65-69, F for 56-64) — F- splits off below
 };
-const BAND_BOTTOM = {
+const BASE_BAND_BOTTOM = {
     A: 90,
     B: 80,
     C: 70,
-    D: 60,
-    F: 0,
+    F: 56, // F- covers 0-55 — handled specially in scoreToGrade()
 };
 /**
- * Linearly interpolate a score within a grade's band.
+ * Convert a 0-100 score to the full letter grade including +/- modifier.
  *
- * `position` is in [0, 1]: 0 means "as bad as this grade gets" (band bottom),
- * 1 means "as good as this grade gets" (band top, just below the next grade).
+ * Within an A/B/C band, the band is split into thirds:
+ *   X-  bottom third (e.g., A-: 90-92)
+ *   X   middle third (e.g., A : 93-96)
+ *   X+  top third    (e.g., A+: 97-100)
+ *
+ * The F band uses two slices instead of three because there is no academic
+ * "F0" anchor and the user wanted F+/F/F-:
+ *   F-  0-55           "critically broken"
+ *   F   56-64          "broken"
+ *   F+  65-69          "barely failing"
+ *
+ * Compile/critical-severity hard caps are applied separately, not by score.
+ */
+export function scoreToGrade(score) {
+    if (score >= 97)
+        return 'A+';
+    if (score >= 93)
+        return 'A';
+    if (score >= 90)
+        return 'A-';
+    if (score >= 87)
+        return 'B+';
+    if (score >= 83)
+        return 'B';
+    if (score >= 80)
+        return 'B-';
+    if (score >= 77)
+        return 'C+';
+    if (score >= 73)
+        return 'C';
+    if (score >= 70)
+        return 'C-';
+    if (score >= 65)
+        return 'F+';
+    if (score >= 56)
+        return 'F';
+    return 'F-';
+}
+/**
+ * Legacy single-letter conversion. Returns the *base* grade only (no
+ * modifier) for compatibility with callers that pre-date the +/- rollout
+ * (`scoreBreakdown.categoryPenalties[].grade`, etc.). New surfaces should
+ * call `scoreToGrade()` instead.
+ */
+export function gradeFromScore(score) {
+    const full = scoreToGrade(score);
+    // Strip the modifier so legacy callers still see exactly one of A/B/C/F.
+    return baseGradeOf(full);
+}
+/** Strip the +/- modifier from a letter grade. */
+function baseGradeOf(g) {
+    if (g === 'N/A' || g === 'D')
+        return g;
+    if (g.startsWith('A'))
+        return 'A';
+    if (g.startsWith('B'))
+        return 'B';
+    if (g.startsWith('C'))
+        return 'C';
+    return 'F';
+}
+/**
+ * Linearly interpolate a score within a base band.
+ *
+ * `position` is in [0, 1]: 0 = "as bad as this grade gets" (band bottom),
+ * 1 = "as good as this grade gets" (band top, just below the next grade).
  */
 function scoreInBand(grade, position) {
     const clamped = Math.max(0, Math.min(1, position));
-    const bottom = BAND_BOTTOM[grade];
-    const top = BAND_TOP[grade];
+    const bottom = BASE_BAND_BOTTOM[grade];
+    const top = BASE_BAND_TOP[grade];
     return Math.round(bottom + (top - bottom) * clamped);
 }
 // ============================================================================
@@ -111,132 +161,111 @@ function worstSeverity(counts) {
  * security finding immediately drops the grade below B because security
  * issues can't be amortized over codebase size.
  *
- * Within-band score: more findings at the threshold severity -> lower score.
- * The interpolation favors "fewer issues is meaningfully better" so 1 medium
- * scores higher than 5 mediums even though both are grade C.
+ * A critical security issue caps at F- (the worst grade). One low-severity
+ * finding still earns a B- because every team has a few.
  */
 function gradeSecurity(findings) {
     const counts = countSeverities(findings);
     const worst = worstSeverity(counts);
     if (counts.total === 0) {
-        return {
-            name: 'security',
-            score: 100,
-            grade: 'A',
-            rationale: '0 security findings',
-            available: true,
-            findingCount: 0,
-            worstSeverity: null,
-        };
+        return makeDimension('security', 100, '0 security findings', 0, null);
     }
-    let grade;
-    let position;
-    let rationale;
     if (counts.critical > 0) {
-        grade = 'F';
-        // F band: fewer criticals -> higher within-band, but still F.
-        position = 1 / (1 + counts.critical);
-        rationale = `${counts.critical} critical-severity security ${pluralize('issue', counts.critical)}`;
+        // Critical security issue → F-, not just F. There's no recovering by
+        // averaging this away across a clean codebase.
+        return makeDimension('security', Math.max(0, 55 - counts.critical * 5), `${counts.critical} critical-severity security ${pluralize('issue', counts.critical)}`, counts.total, worst);
     }
-    else if (counts.high > 0) {
-        grade = 'D';
+    let baseGrade;
+    let position;
+    let rationale;
+    if (counts.high > 0) {
+        baseGrade = 'F';
         position = 1 / (1 + counts.high);
         rationale = `${counts.high} high-severity security ${pluralize('issue', counts.high)}`;
     }
     else if (counts.medium > 0) {
-        grade = 'C';
+        baseGrade = 'C';
         position = 1 / (1 + counts.medium);
         rationale = `${counts.medium} medium-severity security ${pluralize('issue', counts.medium)}`;
     }
     else {
         // Only low-severity findings.
-        grade = 'B';
-        // 1 low -> top of B (89); more lows -> down toward 80.
+        baseGrade = 'B';
         position = 1 / Math.max(1, counts.low);
         rationale = `${counts.low} low-severity security ${pluralize('issue', counts.low)}`;
     }
-    return {
-        name: 'security',
-        score: scoreInBand(grade, position),
-        grade,
-        rationale,
-        available: true,
-        findingCount: counts.total,
-        worstSeverity: worst,
-    };
+    const score = scoreInBand(baseGrade, position);
+    return makeDimension('security', score, rationale, counts.total, worst);
 }
-function reliabilityBandClean(counts) {
-    const position = counts.total === 0 ? 1 : 0.5;
-    const rationale = counts.total === 0 ? '0 reliability findings' : '1 low-severity reliability issue';
-    return { grade: 'A', position, rationale };
+function reliabilityByCount(n) {
+    // Stricter than Maintainability's count ladder: a couple of real bugs hurt
+    // more than a couple of lint warnings, but a single isolated medium bug on
+    // a small project shouldn't pin the codebase at C.
+    const label = `${n} reliability ${pluralize('issue', n)}`;
+    if (n <= 2)
+        return { grade: 'A', position: 1 - n / 2, label };
+    if (n <= 6)
+        return { grade: 'B', position: 1 - (n - 2) / 4, label };
+    if (n <= 15)
+        return { grade: 'C', position: 1 - (n - 6) / 9, label };
+    return { grade: 'F', position: 1 / (1 + (n - 15) / 15), label };
+}
+function reliabilityByDensity(n, kloc) {
+    // Density thresholds are tighter than Maintainability (5/10/25). A 50 KLOC
+    // codebase with 100 reliability bugs (density 2) is "minor cleanup", not
+    // pristine — but 1.4/KLOC is still A-band because real-world projects
+    // never get to zero. The escape hatch handles severity outliers above this.
+    const density = n / kloc;
+    const label = `${roundOne(density)} reliability ${pluralize('issue', n)} / KLOC`;
+    if (density < 1.5)
+        return { grade: 'A', position: 1 - density / 1.5, label };
+    if (density < 4)
+        return { grade: 'B', position: 1 - (density - 1.5) / 2.5, label };
+    if (density < 8)
+        return { grade: 'C', position: 1 - (density - 4) / 4, label };
+    return { grade: 'F', position: 1 / (1 + (density - 8) / 8), label };
 }
-function reliabilityBandSevere(counts) {
+function reliabilityEscape(counts) {
     if (counts.critical > 0) {
-        return {
-            grade: 'F',
-            position: 1 / (1 + counts.critical),
-            rationale: `${counts.critical} critical-severity ${pluralize('bug', counts.critical)}`,
-        };
+        return { grade: 'F', note: `${counts.critical} critical-severity ${pluralize('bug', counts.critical)}` };
     }
-    if (counts.high >= 2) {
-        return {
-            grade: 'D',
-            position: 1 / (1 + (counts.high - 1)),
-            rationale: `${counts.high} high-severity ${pluralize('bug', counts.high)}`,
-        };
+    if (counts.high > 0) {
+        return { grade: 'C', note: `${counts.high} high-severity ${pluralize('bug', counts.high)}` };
     }
     return null;
 }
-function reliabilityBandMid(counts) {
-    if (counts.high >= 1) {
-        return {
-            grade: 'C',
-            position: 1 / (1 + counts.high),
-            rationale: `${counts.high} high-severity ${pluralize('bug', counts.high)}`,
-        };
-    }
-    if (counts.medium >= 3) {
-        return {
-            grade: 'C',
-            position: 1 / Math.max(1, counts.medium - 2),
-            rationale: `${counts.medium} medium-severity reliability ${pluralize('issue', counts.medium)}`,
-        };
-    }
-    if (counts.medium >= 1) {
-        return {
-            grade: 'B',
-            position: 1 / Math.max(1, counts.medium),
-            rationale: `${counts.medium} medium-severity reliability ${pluralize('issue', counts.medium)}`,
-        };
-    }
-    // Only low-severity findings, > 1 of them.
-    return {
-        grade: 'B',
-        position: 1 / Math.max(1, counts.low - 1),
-        rationale: `${counts.low} low-severity reliability ${pluralize('issue', counts.low)}`,
-    };
-}
 /**
- * Reliability grading — slightly more lenient than Security because not every
- * complexity warning is a runtime defect. A single low-severity logic issue
- * still earns an A; medium issues escalate gradually.
+ * Reliability grading — density-based with a severity escape hatch.
+ *
+ * - Empty / ≤1 low: A-band (clean by convention).
+ * - Density-based grade (≥5 KLOC) or count-based grade (<5 KLOC) drives
+ *   the baseline. Both ladders mirror Maintainability's so reliability and
+ *   maintainability remain comparable at a glance.
+ * - Severity escape: critical → F, high → C. This matches Maintainability and
+ *   prevents a handful of medium-density bugs from being silently rated A
+ *   when at least one is severe.
+ *
+ * Build/compile errors flow in via `build` category with severity `critical`
+ * and therefore land at F via the escape hatch — no special-case branching.
  */
-function gradeReliability(findings) {
+function gradeReliability(findings, totalLines) {
     const counts = countSeverities(findings);
     const worst = worstSeverity(counts);
-    const isClean = counts.total === 0 || (counts.low <= 1 && counts.medium === 0 && counts.high === 0 && counts.critical === 0);
-    const band = isClean
-        ? reliabilityBandClean(counts)
-        : reliabilityBandSevere(counts) ?? reliabilityBandMid(counts);
-    return {
-        name: 'reliability',
-        score: scoreInBand(band.grade, band.position),
-        grade: band.grade,
-        rationale: band.rationale,
-        available: true,
-        findingCount: counts.total,
-        worstSeverity: worst,
-    };
+    const kloc = Math.max(totalLines / 1000, 1.0);
+    if (counts.total === 0) {
+        return makeDimension('reliability', 100, '0 reliability findings', 0, null);
+    }
+    // ≤1 low and nothing else is treated as clean — every team has one.
+    if (counts.low <= 1 && counts.medium === 0 && counts.high === 0 && counts.critical === 0) {
+        return makeDimension('reliability', scoreInBand('A', 0.5), '1 low-severity reliability issue', counts.total, worst);
+    }
+    const band = kloc < 5 ? reliabilityByCount(counts.total) : reliabilityByDensity(counts.total, kloc);
+    const severityCap = reliabilityEscape(counts);
+    const useCap = severityCap && baseIsWorse(severityCap.grade, band.grade);
+    const finalGrade = useCap ? severityCap.grade : band.grade;
+    const finalPosition = useCap ? 0.5 : band.position;
+    const rationale = useCap ? `${band.label}, ${severityCap.note}` : band.label;
+    return makeDimension('reliability', scoreInBand(finalGrade, finalPosition), rationale, counts.total, worst);
 }
 function maintainabilityByCount(n) {
     const label = `${n} maintainability ${pluralize('issue', n)}`;
@@ -246,9 +275,7 @@ function maintainabilityByCount(n) {
         return { grade: 'B', position: 1 - (n - 5) / 10, label };
     if (n <= 30)
         return { grade: 'C', position: 1 - (n - 15) / 15, label };
-    if (n <= 60)
-        return { grade: 'D', position: 1 - (n - 30) / 30, label };
-    return { grade: 'F', position: 1 / (1 + (n - 60) / 30), label };
+    return { grade: 'F', position: 1 / (1 + (n - 30) / 30), label };
 }
 function maintainabilityByDensity(n, kloc) {
     const density = n / kloc;
@@ -259,13 +286,11 @@ function maintainabilityByDensity(n, kloc) {
         return { grade: 'B', position: 1 - (density - 5) / 5, label };
     if (density < 25)
         return { grade: 'C', position: 1 - (density - 10) / 15, label };
-    if (density < 50)
-        return { grade: 'D', position: 1 - (density - 25) / 25, label };
-    return { grade: 'F', position: 1 / (1 + (density - 50) / 25), label };
+    return { grade: 'F', position: 1 / (1 + (density - 25) / 25), label };
 }
 function maintainabilityEscape(counts) {
     if (counts.critical > 0) {
-        return { grade: 'D', note: `${counts.critical} critical-severity ${pluralize('issue', counts.critical)}` };
+        return { grade: 'F', note: `${counts.critical} critical-severity ${pluralize('issue', counts.critical)}` };
     }
     if (counts.high > 0) {
         return { grade: 'C', note: `${counts.high} high-severity ${pluralize('issue', counts.high)}` };
@@ -278,57 +303,173 @@ function maintainabilityEscape(counts) {
  * (one extra lint issue moves density by 1.0+), so we fall back to absolute
  * counts — preventing tiny projects from being unfairly penalized.
  *
- * Severity escape hatch: a single high-severity maintainability finding
- * (e.g., a 1500-line file) caps the grade at C; a critical caps at D.
- * "Worst wins" — we take min of density-grade and severity-cap.
+ * Severity escape hatch: a critical maintainability finding (e.g., a 3000-
+ * line file with high cohesion-violation severity) caps at F; a high-severity
+ * one caps at C. "Worst wins" — we take min of density-grade and severity-cap.
  */
 function gradeMaintainability(findings, totalLines) {
     const counts = countSeverities(findings);
     const kloc = Math.max(totalLines / 1000, 1.0);
     if (counts.total === 0) {
-        return {
-            name: 'maintainability',
-            score: 100,
-            grade: 'A',
-            rationale: '0 maintainability findings',
-            available: true,
-            findingCount: 0,
-            worstSeverity: null,
-        };
+        return makeDimension('maintainability', 100, '0 maintainability findings', 0, null);
     }
     const band = kloc < 5 ? maintainabilityByCount(counts.total) : maintainabilityByDensity(counts.total, kloc);
     const severityCap = maintainabilityEscape(counts);
-    const useCap = severityCap && gradeIsWorse(severityCap.grade, band.grade);
+    const useCap = severityCap && baseIsWorse(severityCap.grade, band.grade);
     const finalGrade = useCap ? severityCap.grade : band.grade;
     const finalPosition = useCap ? 0.5 : band.position;
     const rationale = useCap ? `${band.label}, ${severityCap.note}` : band.label;
+    return makeDimension('maintainability', scoreInBand(finalGrade, finalPosition), rationale, counts.total, worstSeverity(counts));
+}
+// ============================================================================
+// Architectural Penalty
+// ============================================================================
+/**
+ * Drop a dimension's grade by N letters because of architectural findings.
+ *
+ * Rationale: a high-severity architectural problem (god class, leaky
+ * abstraction, broken layering) is qualitatively different from a long-file
+ * lint warning — it pollutes every change that touches the affected code.
+ * The user spec calls for explicit letter-grade drops:
+ *
+ *   - 1 high-severity arch issue        → drop 1 letter
+ *   - 2+ high-severity arch issues      → drop 2 letters
+ *   - any critical-severity arch issue  → drop 2 letters
+ *
+ * Letters drop A → B → C → F → F-. We never go lower than F-. The drop is
+ * applied AFTER the dimension's normal grading so the displayed score still
+ * reflects the underlying finding count, but the letter grade carries the
+ * architectural weight that a density-based score would otherwise miss.
+ */
+function archDropCount(archFindings) {
+    let highCount = 0;
+    let criticalCount = 0;
+    for (const f of archFindings) {
+        if (f.severity === 'critical')
+            criticalCount++;
+        else if (f.severity === 'high')
+            highCount++;
+    }
+    if (criticalCount >= 1)
+        return 2;
+    if (highCount >= 2)
+        return 2;
+    if (highCount >= 1)
+        return 1;
+    return 0;
+}
+const BASE_LETTERS = ['A', 'B', 'C', 'F'];
+function gradeModifier(grade) {
+    if (grade.endsWith('+'))
+        return '+';
+    if (grade.endsWith('-'))
+        return '-';
+    return '';
+}
+function applyModifierToTargetBase(targetBase, modifier) {
+    // F's modifier semantics differ from A/B/C: F+ is "barely failing" while
+    // A+/B+/C+ are "top of band." For simplicity we map any modifier on F to
+    // its matching variant, and use F- (the worst) for any post-F overshoot.
+    if (targetBase === 'F') {
+        if (modifier === '+')
+            return 'F+';
+        if (modifier === '-')
+            return 'F-';
+        return 'F';
+    }
+    if (modifier === '+')
+        return `${targetBase}+`;
+    if (modifier === '-')
+        return `${targetBase}-`;
+    return targetBase;
+}
+/**
+ * Drop a grade by N "letters." A "letter" here means a full base-grade step
+ * (A → B → C → F → F-), preserving the modifier when possible. So A+ dropped
+ * by 1 becomes B+, not A. Stops at F-.
+ */
+function dropGradeByLetters(grade, letters) {
+    if (letters <= 0 || grade === 'N/A' || grade === 'D')
+        return grade;
+    const baseLetter = baseGradeOf(grade);
+    const baseIdx = BASE_LETTERS.indexOf(baseLetter);
+    if (baseIdx === -1)
+        return grade;
+    const targetBaseIdx = baseIdx + letters;
+    // Past the F base — bottom out at F- (the absolute worst grade).
+    if (targetBaseIdx > 3)
+        return 'F-';
+    const targetBase = BASE_LETTERS[targetBaseIdx];
+    return applyModifierToTargetBase(targetBase, gradeModifier(grade));
+}
+function applyArchPenalty(dim, archFindings) {
+    const drop = archDropCount(archFindings);
+    if (drop === 0)
+        return dim;
+    const dropped = dropGradeByLetters(dim.grade, drop);
+    if (dropped === dim.grade)
+        return dim;
+    const archCount = archFindings.length;
+    const noun = pluralize('architectural finding', archCount);
+    const note = `dropped ${drop} ${pluralize('letter', drop)} by ${archCount} ${noun}`;
     return {
-        name: 'maintainability',
-        score: scoreInBand(finalGrade, finalPosition),
-        grade: finalGrade,
-        rationale,
-        available: true,
-        findingCount: counts.total,
-        worstSeverity: worstSeverity(counts),
+        ...dim,
+        grade: dropped,
+        // Re-anchor score to the new band's midpoint so score and letter agree.
+        score: anchorScoreToGrade(dropped, dim.score),
+        rationale: dim.rationale === '0 maintainability findings' || dim.findingCount === 0
+            ? note
+            : `${dim.rationale}; ${note}`,
     };
 }
+/**
+ * Re-snap a score to fall within the band of the given grade. Used after
+ * applying the architectural penalty so the displayed score never disagrees
+ * with the displayed letter (e.g., grade C with score 89 would be jarring).
+ *
+ * If the original score is already in-band, keep it; otherwise pick the
+ * band's midpoint as a sensible default.
+ */
+function anchorScoreToGrade(grade, originalScore) {
+    if (grade === 'N/A' || grade === 'D')
+        return originalScore;
+    const ranges = {
+        'A+': [97, 100], A: [93, 96], 'A-': [90, 92],
+        'B+': [87, 89], B: [83, 86], 'B-': [80, 82],
+        'C+': [77, 79], C: [73, 76], 'C-': [70, 72],
+        'F+': [65, 69], F: [56, 64], 'F-': [0, 55],
+    };
+    const [lo, hi] = ranges[grade];
+    if (originalScore >= lo && originalScore <= hi)
+        return originalScore;
+    return Math.round((lo + hi) / 2);
+}
 // ============================================================================
 // Grade Comparison Helpers
 // ============================================================================
-const GRADE_RANK = {
-    F: 1,
-    D: 2,
-    C: 3,
-    B: 4,
-    A: 5,
+const BASE_RANK = { F: 1, C: 2, B: 3, A: 4 };
+function baseIsWorse(a, b) {
+    return BASE_RANK[a] < BASE_RANK[b];
+}
+const FULL_RANK = {
+    'F-': 0, F: 1, 'F+': 2,
+    'C-': 3, C: 4, 'C+': 5,
+    'B-': 6, B: 7, 'B+': 8,
+    'A-': 9, A: 10, 'A+': 11,
 };
-function gradeIsWorse(a, b) {
-    return GRADE_RANK[a] < GRADE_RANK[b];
+function gradeRank(g) {
+    if (g === 'N/A')
+        return -1;
+    if (g === 'D')
+        return 1.5; // legacy: between F+ and C-
+    return FULL_RANK[g];
 }
 function worstOf(grades) {
-    let worst = 'A';
+    let worst = 'A+';
     for (const g of grades) {
-        if (gradeIsWorse(g, worst))
+        if (g === 'N/A')
+            continue;
+        if (gradeRank(g) < gradeRank(worst))
             worst = g;
     }
     return worst;
@@ -345,6 +486,17 @@ function roundOne(n) {
 function dimensionDisplayName(name) {
     return name.charAt(0).toUpperCase() + name.slice(1);
 }
+function makeDimension(name, score, rationale, findingCount, worst) {
+    return {
+        name,
+        score,
+        grade: scoreToGrade(score),
+        rationale,
+        available: true,
+        findingCount,
+        worstSeverity: worst,
+    };
+}
 function naDimension(name) {
     return {
         name,
@@ -359,26 +511,14 @@ function naDimension(name) {
 // ============================================================================
 // Top-Level Entry Point
 // ============================================================================
-/**
- * Compute the full multi-dimensional quality rating from the merged finding
- * set. Callers can override availability in two ways:
- *   - `availableDimensions`: hard whitelist — only listed dims are graded.
- *   - `forceNA`: forces specific dims to N/A even if they would otherwise
- *     auto-detect as available. Use this when the underlying tools didn't
- *     run (e.g., no linter installed -> Maintainability has limited coverage).
- *
- * Default availability rules:
- *   - maintainability is always available (lint/format/length checks always run)
- *   - security/reliability are available iff at least one finding maps there
- *
- * Overall score uses min(avg, worst) so a single bad dimension caps the
- * total — you cannot earn a great overall score by averaging away a hole.
- */
 function bucketByDimension(findings) {
     const security = [];
     const reliability = [];
     const maintainability = [];
+    const architecture = [];
     for (const f of findings) {
+        if (ARCHITECTURE_CATEGORIES.has(f.category))
+            architecture.push(f);
         const dim = categoryToDimension(f.category);
         if (dim === 'security')
             security.push(f);
@@ -387,7 +527,7 @@ function bucketByDimension(findings) {
         else
             maintainability.push(f);
     }
-    return { security, reliability, maintainability };
+    return { security, reliability, maintainability, architecture };
 }
 function isDimensionAvailable(dim, hasFindings, options) {
     if (options?.forceNA?.has(dim))
@@ -398,6 +538,14 @@ function isDimensionAvailable(dim, hasFindings, options) {
     // Auto-detect: maintainability always on, security/reliability iff findings exist.
     return dim === 'maintainability' ? true : hasFindings;
 }
+/**
+ * Combine the available dimensions into a single overall grade + score.
+ *
+ * "Worst dimension wins" for the letter grade — a single failing dimension
+ * caps the overall score, matching how SonarQube's quality gate behaves.
+ * The numeric score is `min(avg, worst)` so a great Maintainability score
+ * can't paper over a Security failure.
+ */
 function computeOverall(availableDims) {
     if (availableDims.length === 0) {
         return { grade: 'N/A', score: 0 };
@@ -405,28 +553,34 @@ function computeOverall(availableDims) {
     const grades = availableDims.map((d) => d.grade);
     const scores = availableDims.map((d) => d.score);
     const avg = scores.reduce((s, n) => s + n, 0) / scores.length;
-    return {
-        grade: worstOf(grades),
-        score: Math.round(Math.min(avg, Math.min(...scores))),
-    };
+    const worst = worstOf(grades);
+    // Re-snap the displayed score so it lives in the worst dimension's band —
+    // otherwise we'd display a B-letter with a C-numeric score (or vice versa).
+    const blendedScore = Math.round(Math.min(avg, Math.min(...scores)));
+    return { grade: worst, score: anchorScoreToGrade(worst, blendedScore) };
 }
 export function computeQualityRating(allFindings, totalLines, options) {
     const buckets = bucketByDimension(allFindings);
+    // Initial dimension grades, before architectural penalty.
     const security = isDimensionAvailable('security', buckets.security.length > 0, options)
         ? gradeSecurity(buckets.security)
         : naDimension('security');
-    const reliability = isDimensionAvailable('reliability', buckets.reliability.length > 0, options)
-        ? gradeReliability(buckets.reliability)
+    const reliabilityRaw = isDimensionAvailable('reliability', buckets.reliability.length > 0, options)
+        ? gradeReliability(buckets.reliability, totalLines)
         : naDimension('reliability');
-    const maintainability = isDimensionAvailable('maintainability', true, options)
+    const maintainabilityRaw = isDimensionAvailable('maintainability', true, options)
         ? gradeMaintainability(buckets.maintainability, totalLines)
         : naDimension('maintainability');
-    const dimensions = [security, reliability, maintainability];
+    // Architectural penalty: hits whichever dimension(s) have arch findings
+    // bucketed into them (currently maintainability via the category map).
+    const archFindings = buckets.architecture;
+    const maintainability = maintainabilityRaw.available
+        ? applyArchPenalty(maintainabilityRaw, archFindings)
+        : maintainabilityRaw;
+    const dimensions = [security, reliabilityRaw, maintainability];
     const availableDims = dimensions.filter((d) => d.available);
     const overall = computeOverall(availableDims);
-    // Quality gate.
-    const qualityGate = computeQualityGate(security, reliability);
-    // Grade rationale.
+    const qualityGate = computeQualityGate(security, reliabilityRaw, archFindings.length);
     const gradeRationale = computeGradeRationale(availableDims, overall.grade, allFindings.length);
     return {
         overall,
@@ -440,17 +594,27 @@ export function computeQualityRating(allFindings, totalLines, options) {
 // ============================================================================
 /**
  * The Quality Gate is a coarse PASS/FAIL signal layered on top of the grades.
- * It only fires for the most user-actionable thresholds — any medium+ security
- * finding, or any critical bug. N/A dimensions never trigger a fail (we don't
- * fail on missing data).
+ * It only fires for the most user-actionable thresholds — any C-or-worse
+ * security grade, any F-tier reliability grade, or 2+ high-severity
+ * architectural findings. N/A dimensions never trigger a fail (we don't fail
+ * on missing data).
  */
-function computeQualityGate(security, reliability) {
+function isFTier(g) {
+    return g === 'F+' || g === 'F' || g === 'F-' || g === 'D';
+}
+function isCorWorse(g) {
+    return baseGradeOf(g) === 'C' || isFTier(g);
+}
+function computeQualityGate(security, reliability, archFindingCount) {
     const failingConditions = [];
-    if (security.available && (security.grade === 'C' || security.grade === 'D' || security.grade === 'F')) {
+    if (security.available && isCorWorse(security.grade)) {
         failingConditions.push(`Security grade ${security.grade} — ${security.rationale}`);
     }
-    if (reliability.available && reliability.grade === 'F') {
-        failingConditions.push(`Reliability grade F — ${reliability.rationale}`);
+    if (reliability.available && isFTier(reliability.grade)) {
+        failingConditions.push(`Reliability grade ${reliability.grade} — ${reliability.rationale}`);
+    }
+    if (archFindingCount >= 2) {
+        failingConditions.push(`${archFindingCount} architectural findings`);
     }
     return {
         passed: failingConditions.length === 0,
@@ -467,11 +631,15 @@ function computeGradeRationale(availableDims, overallGrade, totalFindingCount) {
     if (availableDims.length === 0 || overallGrade === 'N/A') {
         return 'No dimensions available to grade';
     }
-    // All available dimensions equal -> "consistent quality".
-    const firstGrade = availableDims[0].grade;
-    const allEqual = availableDims.every((d) => d.grade === firstGrade);
-    if (allEqual) {
-        return `All dimensions ${firstGrade} — consistent quality`;
+    // All available dimensions share the same base letter -> "consistent
+    // quality". With +/- modifiers it's normal for sibling dimensions to land
+    // at A vs A+ depending on within-band position; calling that "inconsistent"
+    // would be misleading. We compare base letters so the user-facing message
+    // captures the high-level shape rather than every minor band difference.
+    const firstBase = baseGradeOf(availableDims[0].grade);
+    const allSameBase = availableDims.every((d) => baseGradeOf(d.grade) === firstBase);
+    if (allSameBase) {
+        return `All dimensions ${firstBase}-tier — consistent quality`;
     }
     // Find the dimension that pinned the overall grade (worst available).
     const worstDim = availableDims.find((d) => d.grade === overallGrade) ??