npm - mstro-app - Versions diffs - 0.5.1 → 0.5.6 - Mend

mstro-app 0.5.1 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (283) hide show

package/server/mcp/classifier/factory.ts ADDED Viewed

@@ -0,0 +1,195 @@
+// Copyright (c) 2025-present Mstro, Inc. All rights reserved.
+// Licensed under the MIT License. See LICENSE file for details.
+/**
+ * Bouncer classifier factory.
+ *
+ * Two entry points:
+ *
+ * - `getClassifier()` — production path. Reads
+ *   `settings.bouncerClassifier: { engine, model }` and returns the
+ *   matching `BouncerClassifier` instance. If the persisted config is
+ *   missing, malformed, or names a non-eligible model, it logs a clear
+ *   warning and falls back to `ClaudeBouncerClassifier` + Haiku — the
+ *   Bouncer must always have a classifier to call, so "no config" and
+ *   "bad config" both collapse to the known-safe default rather than
+ *   throwing.
+ *
+ * - `createBouncerClassifier(options?)` — direct-construction helper used
+ *   by the engineSwap feature-flag gate (see `engine-swap-flag.test.ts`).
+ *   Accepts an explicit `engineId` and is deliberately feature-flag-aware:
+ *   when `engineSwap` is disabled, the flag short-circuits to Claude.
+ *
+ * New callers should prefer `getClassifier()` so the user-selected model
+ * takes effect without plumbing. The bouncer-integration layer constructs
+ * its default classifier lazily so env var changes and settings edits
+ * propagate on the next classification call.
+ */
+import { OpenCodeServerManager } from '../../engines/opencode/OpenCodeServerManager.js';
+import type { EngineId } from '../../engines/types.js';
+import {
+  BOUNCER_ELIGIBLE_MODELS,
+  type BouncerClassifierConfig,
+  DEFAULT_BOUNCER_CLASSIFIER,
+  getBouncerClassifier,
+  isEngineSwapEnabled,
+} from '../../services/settings.js';
+import type { BouncerClassifier } from './BouncerClassifier.js';
+import { ClaudeBouncerClassifier } from './ClaudeBouncerClassifier.js';
+import { OpenCodeBouncerClassifier } from './OpenCodeBouncerClassifier.js';
+/** Options accepted by every classifier implementation. */
+export interface ClassifierFactoryOptions {
+  /**
+   * Which engine backs the classifier. With `engineSwap` off this is
+   * ignored and `'claude-code'` is used; with the flag on, non-Claude
+   * engines throw until their implementations land (Epic 4).
+   */
+  engineId?: EngineId;
+}
+/**
+ * Construct the Layer-2 Bouncer classifier by engine id (no settings
+ * lookup). Exists for the `engineSwap` feature-flag gate, which asserts
+ * that the factory is flag-aware in both on/off states. New production
+ * callers should route through {@link getClassifier} instead.
+ */
+export function createBouncerClassifier(
+  options: ClassifierFactoryOptions = {},
+): BouncerClassifier {
+  if (!isEngineSwapEnabled()) {
+    return new ClaudeBouncerClassifier();
+  }
+  const engineId = options.engineId ?? 'claude-code';
+  switch (engineId) {
+    case 'claude-code':
+      return new ClaudeBouncerClassifier();
+    case 'opencode':
+      // Wired through `getClassifier()` (settings path). Direct engine-id
+      // construction stays intentionally narrow — callers that want the
+      // OpenCode classifier should pick it via the Settings UI so the
+      // shared `OpenCodeServerManager` is available.
+      throw new Error(
+        'OpenCode bouncer classifier is not implemented yet (Epic 4). ' +
+          'Keep engineSwap off until the OpenCode classifier ships.',
+      );
+    default: {
+      const exhaustive: never = engineId;
+      throw new Error(`Unknown classifier engine id: ${String(exhaustive)}`);
+    }
+  }
+}
+/**
+ * Process-lifetime singleton for the `opencode serve` subprocess used by
+ * the classifier. Deliberately separate from the engines-side manager so
+ * tests can inject a mock client without touching the engine factory.
+ * Lazy: never created until an OpenCode classifier is first requested.
+ */
+let sharedOpenCodeManager: OpenCodeServerManager | null = null;
+let openCodeManagerFactory: () => OpenCodeServerManager = () =>
+  new OpenCodeServerManager({ registerProcessHandlers: true });
+function getSharedOpenCodeServerManager(): OpenCodeServerManager {
+  if (!sharedOpenCodeManager) {
+    sharedOpenCodeManager = openCodeManagerFactory();
+  }
+  return sharedOpenCodeManager;
+}
+/**
+ * Override the OpenCode manager used by the classifier factory. Test-only;
+ * production code never calls this. Pass `null` to reset to the default.
+ */
+export function __setOpenCodeManagerFactoryForTests(
+  factory: (() => OpenCodeServerManager) | null,
+): void {
+  sharedOpenCodeManager = null;
+  openCodeManagerFactory = factory
+    ?? (() => new OpenCodeServerManager({ registerProcessHandlers: true }));
+}
+/**
+ * Log a fallback reason in a single place so grep + log analysis surface
+ * every path where we silently dropped back to Claude+Haiku. Goes to
+ * stderr (matching the rest of the Bouncer logs) so it shows up in the
+ * CLI's `--trace` output and in audit transcripts.
+ */
+function logFallback(reason: string): void {
+  console.warn(
+    `[Bouncer] Classifier config invalid, falling back to Claude+Haiku: ${reason}`,
+  );
+}
+/**
+ * Construct a `BouncerClassifier` for the provided config. Throws on bad
+ * config — callers that need fallback semantics should use
+ * {@link getClassifier} instead.
+ */
+export function createClassifierForConfig(
+  config: BouncerClassifierConfig,
+): BouncerClassifier {
+  const eligible = BOUNCER_ELIGIBLE_MODELS[config.engine];
+  if (!eligible || !eligible.includes(config.model)) {
+    throw new Error(
+      `Model '${config.model}' is not bouncer-eligible for engine '${config.engine}'`,
+    );
+  }
+  switch (config.engine) {
+    case 'claude-code':
+      // The Claude classifier currently hardcodes `--model haiku` in the
+      // subprocess call. Passing `sonnet` still returns Haiku until a
+      // later issue threads the model through — the eligibility check
+      // guards correctness; the subprocess args are a follow-up.
+      return new ClaudeBouncerClassifier();
+    case 'opencode':
+      return new OpenCodeBouncerClassifier({
+        manager: getSharedOpenCodeServerManager(),
+        model: config.model,
+      });
+    default: {
+      const exhaustive: never = config.engine;
+      throw new Error(`Unknown classifier engine id: ${String(exhaustive)}`);
+    }
+  }
+}
+/**
+ * Production classifier accessor. Reads the user's current Bouncer
+ * classifier choice from persistent settings and returns a fresh
+ * `BouncerClassifier` instance. Invalid or missing config logs a clear
+ * warning and falls back to the default Claude+Haiku classifier — the
+ * Bouncer is a required security layer, so "no classifier available" is
+ * never an acceptable outcome.
+ *
+ * Called on every `reviewOperation()` path (indirectly via the
+ * integration layer's lazy default); cheap because classifier
+ * construction is synchronous and does not spawn subprocesses until the
+ * first `classify()` call.
+ */
+export function getClassifier(): BouncerClassifier {
+  let config: BouncerClassifierConfig;
+  try {
+    config = getBouncerClassifier();
+  } catch (err) {
+    logFallback(err instanceof Error ? err.message : String(err));
+    return new ClaudeBouncerClassifier();
+  }
+  try {
+    return createClassifierForConfig(config);
+  } catch (err) {
+    logFallback(err instanceof Error ? err.message : String(err));
+    // Last-resort fallback — if even the default config can't build the
+    // classifier (e.g. OpenCode catalogue edit broke the model list), we
+    // still return Claude+Haiku so the Bouncer keeps functioning.
+    if (
+      config.engine === DEFAULT_BOUNCER_CLASSIFIER.engine &&
+      config.model === DEFAULT_BOUNCER_CLASSIFIER.model
+    ) {
+      return new ClaudeBouncerClassifier();
+    }
+    return new ClaudeBouncerClassifier();
+  }
+}

package/server/mcp/server.ts CHANGED Viewed

@@ -62,6 +62,51 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
   };
 });
+/**
+ * Bridge AskUserQuestion to the running CLI server. Claude pauses on this
+ * tool until we return; the CLI server pushes the questions to the web UI
+ * via WebSocket, awaits the user's answers, and returns them here.
+ *
+ * On any failure (server unreachable, timeout, no tab routing context) we
+ * return `behavior: allow` with the input unchanged. Claude treats it as
+ * "no answers" and proceeds with its own guesses — same fallback as before
+ * we had this integration. Better than blocking the run.
+ */
+async function bridgeAskUserQuestion(
+  input: Record<string, unknown>,
+): Promise<{ behavior: 'allow'; updatedInput: Record<string, unknown> }> {
+  const port = process.env.MSTRO_PORT;
+  const tabId = process.env.MSTRO_TAB_ID;
+  const secret = process.env.MSTRO_BOUNCER_SECRET;
+  const toolUseId = process.env.MSTRO_CURRENT_TOOL_USE_ID || `aq-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
+  if (!port || !tabId || !secret) {
+    console.error('[MCP Bouncer] AskUserQuestion: missing routing context (port/tabId/secret) — passing through with no answers');
+    return { behavior: 'allow', updatedInput: input };
+  }
+  try {
+    const res = await fetch(`http://127.0.0.1:${port}/internal/ask-user-question`, {
+      method: 'POST',
+      headers: { 'content-type': 'application/json', 'x-mstro-bouncer-secret': secret },
+      body: JSON.stringify({ toolUseId, tabId, questions: input.questions }),
+    });
+    if (!res.ok) {
+      console.error(`[MCP Bouncer] AskUserQuestion bridge returned ${res.status} — passing through with no answers`);
+      return { behavior: 'allow', updatedInput: input };
+    }
+    const json = (await res.json()) as { answers?: Record<string, string> };
+    const answers = json.answers && typeof json.answers === 'object' ? json.answers : {};
+    return {
+      behavior: 'allow',
+      updatedInput: { questions: input.questions, answers },
+    };
+  } catch (err) {
+    console.error(`[MCP Bouncer] AskUserQuestion bridge failed: ${err instanceof Error ? err.message : String(err)} — passing through with no answers`);
+    return { behavior: 'allow', updatedInput: input };
+  }
+}
 /**
  * Handle tool calls (approval_prompt)
  */
@@ -75,6 +120,18 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
     input: Record<string, unknown>;
   };
+  // AskUserQuestion is a clarifying-question tool — Claude needs the user's
+  // answers in `updatedInput.answers`, not a yes/no permission decision. Skip
+  // the security review entirely (the prior pattern fast-path also auto-allowed
+  // this) and route to the web UI bridge for real interactive answering.
+  if (tool_name === 'AskUserQuestion') {
+    console.error('[MCP Bouncer] AskUserQuestion received — bridging to web UI');
+    const response = await bridgeAskUserQuestion(input);
+    return {
+      content: [{ type: 'text', text: JSON.stringify(response) }],
+    };
+  }
   console.error(`[MCP Bouncer] Analyzing ${tool_name} request...`);
   // Format operation string for bouncer analysis

package/server/routes/index.ts CHANGED Viewed

@@ -9,4 +9,5 @@
 export { createFileRoutes } from './files.js'
 export { createImproviseRoutes } from './improvise.js'
 export { createInstanceRoutes, createShutdownRoute } from './instances.js'
+export { createInternalRoutes } from './internal.js'
 export { createNotificationRoutes } from './notifications.js'

package/server/routes/internal.ts ADDED Viewed

@@ -0,0 +1,112 @@
+// Copyright (c) 2025-present Mstro, Inc. All rights reserved.
+/**
+ * Internal Routes
+ *
+ * HTTP endpoints used by sibling subprocesses (like the MCP bouncer) to talk
+ * back to the running CLI server. NOT mounted under `/api/*` — these are gated
+ * by the per-process bouncer secret instead of the user's session token.
+ *
+ * Currently a single endpoint:
+ *   POST /internal/ask-user-question
+ *     Bouncer pauses Claude on AskUserQuestion; this blocks until the web
+ *     user answers, then returns the answers Claude needs to continue.
+ */
+import { Hono } from 'hono'
+import {
+  isValidBouncerSecret,
+  registerPendingQuestion,
+} from '../services/websocket/ask-user-question-bridge.js'
+import type { HandlerContext } from '../services/websocket/handler-context.js'
+import { broadcastTabEvent } from '../services/websocket/tab-broadcast.js'
+import type {
+  AskUserQuestionItem,
+  AskUserQuestionPayload,
+} from '../services/websocket/types.js'
+interface AskUserQuestionRequestBody {
+  toolUseId?: unknown
+  tabId?: unknown
+  questions?: unknown
+  /** Override default 15min timeout (ms). Optional. */
+  timeoutMs?: unknown
+}
+/** Narrow an unknown into AskUserQuestionItem[] without throwing. */
+function parseQuestions(value: unknown): AskUserQuestionItem[] | null {
+  if (!Array.isArray(value)) return null
+  const out: AskUserQuestionItem[] = []
+  for (const raw of value) {
+    if (!raw || typeof raw !== 'object') return null
+    const r = raw as Record<string, unknown>
+    if (typeof r.question !== 'string' || typeof r.header !== 'string') return null
+    if (!Array.isArray(r.options)) return null
+    const options = r.options.map((o) => {
+      if (!o || typeof o !== 'object') return null
+      const oo = o as Record<string, unknown>
+      if (typeof oo.label !== 'string') return null
+      return {
+        label: oo.label,
+        description: typeof oo.description === 'string' ? oo.description : '',
+        preview: typeof oo.preview === 'string' ? oo.preview : undefined,
+      }
+    })
+    if (options.some((o) => o === null)) return null
+    out.push({
+      question: r.question,
+      header: r.header,
+      options: options as AskUserQuestionItem['options'],
+      multiSelect: r.multiSelect === true,
+    })
+  }
+  return out
+}
+export function createInternalRoutes(ctx: HandlerContext): Hono {
+  const app = new Hono()
+  app.post('/ask-user-question', async (c) => {
+    const secret = c.req.header('x-mstro-bouncer-secret')
+    if (!isValidBouncerSecret(secret)) {
+      return c.json({ error: 'Forbidden' }, 403)
+    }
+    let body: AskUserQuestionRequestBody
+    try {
+      body = (await c.req.json()) as AskUserQuestionRequestBody
+    } catch {
+      return c.json({ error: 'Invalid JSON' }, 400)
+    }
+    const toolUseId = typeof body.toolUseId === 'string' ? body.toolUseId : ''
+    const tabId = typeof body.tabId === 'string' ? body.tabId : ''
+    const questions = parseQuestions(body.questions)
+    if (!toolUseId || !tabId || !questions || questions.length === 0) {
+      return c.json({ error: 'toolUseId, tabId, and non-empty questions[] are required' }, 400)
+    }
+    const timeoutMs =
+      typeof body.timeoutMs === 'number' && body.timeoutMs > 0 ? body.timeoutMs : undefined
+    const payload: AskUserQuestionPayload = { toolUseId, questions }
+    broadcastTabEvent(ctx, tabId, 'askUserQuestion', payload)
+    try {
+      const answers = await registerPendingQuestion({ toolUseId, tabId, timeoutMs })
+      return c.json({ answers })
+    } catch (err) {
+      const reason = err instanceof Error ? err.message : 'cancelled'
+      // Tell every web client to dismiss the card so users don't keep poking
+      // an already-dead question.
+      broadcastTabEvent(ctx, tabId, 'askUserQuestionDismissed', {
+        toolUseId,
+        reason: reason === 'timeout' ? 'timeout' : 'cancelled',
+      })
+      const status = reason === 'timeout' ? 504 : 410
+      return c.json({ error: reason }, status)
+    }
+  })
+  return app
+}

package/server/services/plan/agent-resolver.ts ADDED Viewed

@@ -0,0 +1,115 @@
+// Copyright (c) 2025-present Mstro, Inc. All rights reserved.
+/**
+ * Agent Resolver — Maps issue.agents hints to subagents installed on the user's system.
+ *
+ * Issue front matter may specify `agents` as either canonical Claude Code subagent
+ * names (e.g. `backend-architect`) or general role pointers (e.g. `backend engineer`).
+ * This module bridges the two: it consults AgentManager (project / global / bundled
+ * `.claude/agents/`) and resolves each hint to a concrete agent name when possible,
+ * falling back to the original hint when no match is found so the executor can still
+ * surface the user's intent in the prompt.
+ */
+import { type AgentInfo, agentManager } from '../../utils/agent-manager.js';
+export interface ResolvedAgent {
+  /** The original hint as written in the issue front matter. */
+  hint: string;
+  /** The resolved canonical agent name, or null if no installed agent matched. */
+  resolvedName: string | null;
+  /** The matching agent info, or null if no installed agent matched. */
+  info: AgentInfo | null;
+}
+const NON_WORD = /[^a-z0-9]+/g;
+function normalize(input: string): string {
+  return input.toLowerCase().replace(NON_WORD, ' ').trim();
+}
+function tokenize(input: string): string[] {
+  return normalize(input).split(' ').filter(Boolean);
+}
+/**
+ * Discover every available agent across project / global / bundled directories.
+ * Project entries shadow global, which shadows bundled (deduped by canonical name).
+ */
+function listAvailableAgents(workingDir: string): AgentInfo[] {
+  const seen = new Map<string, AgentInfo>();
+  const layers = [
+    agentManager.listProjectAgents(workingDir),
+    agentManager.listGlobalAgents(),
+    agentManager.listBundledAgents(),
+  ];
+  for (const layer of layers) {
+    for (const agent of layer) {
+      if (!seen.has(agent.name)) seen.set(agent.name, agent);
+    }
+  }
+  return Array.from(seen.values());
+}
+/**
+ * Score how well an agent matches a hint. Returns 0 when there is no token overlap.
+ * Higher is better. Exact normalized matches return Infinity.
+ */
+function matchScore(hint: string, agent: AgentInfo): number {
+  const normalizedHint = normalize(hint);
+  const normalizedName = normalize(agent.name);
+  if (normalizedHint === normalizedName) return Number.POSITIVE_INFINITY;
+  const hintTokens = tokenize(hint);
+  if (hintTokens.length === 0) return 0;
+  const haystack = `${normalizedName} ${normalize(agent.description ?? '')}`;
+  let matched = 0;
+  for (const token of hintTokens) {
+    if (token.length < 2) continue;
+    if (haystack.includes(token)) matched++;
+  }
+  if (matched === 0) return 0;
+  // Reward agents whose name (not just description) contains hint tokens.
+  const nameMatches = hintTokens.filter(t => t.length >= 2 && normalizedName.includes(t)).length;
+  return matched + nameMatches * 0.5;
+}
+/**
+ * Resolve a single hint against the catalog of available agents.
+ * Returns the highest-scoring agent, or null when no agent has any token overlap.
+ */
+function resolveHint(hint: string, available: AgentInfo[]): AgentInfo | null {
+  let bestScore = 0;
+  let best: AgentInfo | null = null;
+  for (const agent of available) {
+    const score = matchScore(hint, agent);
+    if (score > bestScore) {
+      bestScore = score;
+      best = agent;
+    }
+  }
+  return best;
+}
+/**
+ * Resolve every hint in `agents` against the user's installed Claude Code subagents.
+ * Hints with no match are preserved (resolvedName: null) so the executor can still
+ * mention them in the prompt with a graceful fallback note.
+ */
+export function resolveAgentHints(agents: string[], workingDir: string): ResolvedAgent[] {
+  if (!agents || agents.length === 0) return [];
+  const available = listAvailableAgents(workingDir);
+  return agents
+    .map(raw => raw.trim())
+    .filter(Boolean)
+    .map(hint => {
+      const info = resolveHint(hint, available);
+      return {
+        hint,
+        resolvedName: info?.name ?? null,
+        info,
+      };
+    });
+}

package/server/services/plan/agents/code-review.md CHANGED Viewed

@@ -74,19 +74,49 @@ For each finding, use this reasoning process:
 ## Scoring Guidelines
-The overall grade is computed deterministically from your findings, not from a number you supply. Severity and category on each finding are what drive the grade — pick them carefully.
+The overall grade is computed deterministically from your findings, not from a number you supply. **Severity and category on each finding are what drive the grade — pick them carefully.** When in doubt, downgrade.
-Three independent dimension grades are computed:
+### Severity Ladder — calibrate by likelihood × user impact, not just by topic
-- **Security** (category: `security`) — uses a severity-threshold rule: A = 0 findings, B = only low, C = ≥1 medium, D = ≥1 high, F = ≥1 critical.
-- **Reliability** (categories: `bugs`, `logic`, `performance`) — severity-threshold rule, slightly more lenient: A = 0 findings or ≤1 low, B = ≥2 low or ≤2 medium, C = ≥3 medium or ≥1 high, D = ≥2 high, F = ≥1 critical.
-- **Maintainability** (categories: `architecture`, `oop`, `maintainability`) — density-based (issues per 1000 lines), with a severity escape hatch: any high finding caps at C, any critical caps at D.
+Severity should answer two questions:
+1. **How likely is this to actually trigger?** (Common path vs. edge case vs. theoretical)
+2. **What happens when it triggers?** (User-visible breakage / data loss vs. internal-only / cosmetic)
-Overall grade = the worst of the three dimensions. A single critical security finding caps the entire codebase at F.
+Use this ladder. Worked examples follow each level.
-This means **severity is load-bearing**: marking something `high` when it's really `low` will swing the grade unfairly. When in doubt, downgrade. A finding without clear evidence of harm is `low`.
+- **`critical`** — Reserved for "this is broken in production today on common code paths." Active data corruption, RCE, auth bypass for normal users, unrecoverable crash on the happy path. If the on-call would page at 3 AM for it, it's critical.
+  - ✅ SQL injection on a public form. Hard-coded production credentials in a deployed file. A `null`-deref on the homepage render path.
+  - ❌ "Could become a problem if traffic 100×". "Edge case where two clients race within 50ms." A theoretical bug in error-handling code that has never run.
-You may still emit `score`, `grade`, and `scoreRationale` for reference — they are persisted but ignored when computing the displayed grade. Focus your effort on accurate findings, not on guessing the overall number.
+- **`high`** — A real bug or vulnerability that **definitely affects normal users on common code paths** with **user-visible consequences** (broken UI, wrong data shown, action silently fails). Or an exploitable security issue that requires only realistic conditions.
+  - ✅ Wrong state shown after a successful save (UI/UX bug). XSS via reflected URL parameter on a logged-in dashboard. Wrong calculation in a money-handling code path. Memory leak that grows on every page-view.
+  - ❌ Race condition on degraded shutdown paths. Edge-case exploit gated behind admin auth on a feature that hasn't shipped. A theoretical SSRF on an internal endpoint with no user reach. Defense-in-depth gaps (rate limit absent, header missing) — those are `low`.
+- **`medium`** — Real issue but affects an edge case OR has limited user impact OR requires unusual conditions to trigger. Worth fixing eventually; not blocking.
+  - ✅ Missing error handling on a rarely-failing dependency. Logic bug in an admin-only page. A bug only reachable when two specific feature flags are both on. Performance issue that adds 50 ms but isn't user-perceptible.
+  - ❌ "Best practice" preferences with no user impact. Theoretical bugs in unreachable code.
+- **`low`** — Improbable, theoretical, or cosmetic. Defense-in-depth missing, style/preference, "could be cleaner." Many of these are fine to leave for years.
+  - ✅ Missing rate limit on a low-traffic admin endpoint. SQL injection-shaped pattern that ends up safely parameterized. A `console.log` left in code. A nullable field that's only null in a code path that never executes.
+### Likelihood-weighted severity rules
+Apply these as veto rules **after** you've chosen a severity from topic alone:
+- If the bug only fires on a path that **realistically never executes in production**, downgrade by at least one step (high→medium, medium→low). A bug that requires "the network connection drops between line 42 and 43 of the shutdown handler" is `low` even if its consequences would be severe.
+- If the issue has **no user-visible effect** (no UI/UX impact, no incorrect data shown, no security boundary crossed), it caps at `medium`. UI/UX wiring bugs and broken interactive flows skew higher; pure-internal architecture / observability gaps skew lower.
+- If the issue is a **defense-in-depth gap** (rate limits, hardening headers, additional validation on already-validated input), cap at `low` unless you can articulate the realistic exploit chain that survives the existing defenses.
+- If exploitability requires **conditions that only matter at high traffic / wide user attack surface**, downgrade for early-stage projects: this is `low` or `medium`, not `high`. (Make this explicit in the description so the reader knows the call.)
+### Three dimension grades the engine derives
+- **Security** (category: `security`) — strictest. A = 0 findings, B = only low, C = ≥1 medium, F = ≥1 high, F- = ≥1 critical.
+- **Reliability** (categories: `bugs`, `logic`, `performance`) — density-based grade per KLOC with severity escape: critical → F, any high → caps at C. Multiple medium findings escalate gradually rather than auto-failing.
+- **Maintainability** (categories: `architecture`, `oop`, `maintainability`) — density-based with severity escape: critical → F, any high → C.
+Overall grade = the worst of the three. A single critical security finding caps the entire codebase at F-.
+You may still emit `score`, `grade`, and `scoreRationale` for reference — they are persisted but ignored when computing the displayed grade. Focus your effort on accurate severity classification, not on guessing the overall number.
 ## Output