npm - agentboss - Versions diffs - 0.1.0 - Mend

agentboss 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/README.md +34 -0
package/bin/aboss.js +288 -0
package/client/dist/assets/index-C1wFD_Vo.css +1 -0
package/client/dist/assets/index-DBj1Ujlx.js +137 -0
package/client/dist/index.html +34 -0
package/package.json +64 -0
package/server/analysis/daily-aggregator.js +258 -0
package/server/analysis/difficulty.js +129 -0
package/server/analysis/dimensions/ai-knowledge.js +172 -0
package/server/analysis/dimensions/ai-tools.js +161 -0
package/server/analysis/dimensions/judgement.js +107 -0
package/server/analysis/dimensions/llm-merge.js +57 -0
package/server/analysis/dimensions/output-quality.js +167 -0
package/server/analysis/dimensions/problem-definition.js +104 -0
package/server/analysis/dimensions/system-thinking.js +225 -0
package/server/analysis/evidence-builder.js +104 -0
package/server/analysis/job.js +273 -0
package/server/analysis/report-builder.js +581 -0
package/server/analysis/scoring-v2.js +72 -0
package/server/analysis/text-signals.js +179 -0
package/server/analysis/thresholds-v2.js +358 -0
package/server/api/advice.js +124 -0
package/server/api/analysis.js +141 -0
package/server/api/execution.js +330 -0
package/server/api/metrics.js +277 -0
package/server/api/overview.js +308 -0
package/server/api/project.js +255 -0
package/server/api/reports.js +125 -0
package/server/api/sessions.js +118 -0
package/server/api/settings.js +119 -0
package/server/db/connection.js +175 -0
package/server/db/queries.js +1051 -0
package/server/db/schema.js +487 -0
package/server/etl/active-time.js +150 -0
package/server/etl/backfill-subagents.js +178 -0
package/server/etl/claude-code.js +826 -0
package/server/etl/detect.js +341 -0
package/server/etl/judge-filter.js +117 -0
package/server/etl/opencode.js +606 -0
package/server/execution/job.js +662 -0
package/server/execution/prompt.js +227 -0
package/server/execution/runner.js +218 -0
package/server/index.js +94 -0
package/server/llm/advice-prompt.js +339 -0
package/server/llm/advice.js +384 -0
package/server/llm/analysis-prompt.js +162 -0
package/server/llm/cli-runner.js +249 -0
package/server/llm/judge-prompts.js +179 -0
package/server/llm/judge.js +118 -0
package/server/llm/project-advice-prompt.js +332 -0
package/server/llm/project-advice.js +491 -0
package/server/llm/session-analyzer.js +122 -0
package/server/utils/project.js +80 -0

package/client/dist/index.html ADDED Viewed

@@ -0,0 +1,34 @@
+<!DOCTYPE html>
+<html lang="zh-Hans">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Agent Boss</title>
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=Geist+Mono:wght@400;500;600&family=Space+Grotesk:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <style>
+      html, body { background: #F5F7FA; }
+    </style>
+    <script>
+      // Apply the saved theme before first paint to avoid a flash of the
+      // wrong palette. Mirrors the localStorage key used by ThemeToggle.
+      (function () {
+        try {
+          if (localStorage.getItem('ab-theme') === 'hack') {
+            document.documentElement.setAttribute('data-theme', 'hack');
+            document.documentElement.style.background = '#0a0e0f';
+          }
+        } catch (e) {}
+      })();
+    </script>
+    <script type="module" crossorigin src="/assets/index-DBj1Ujlx.js"></script>
+    <link rel="stylesheet" crossorigin href="/assets/index-C1wFD_Vo.css">
+  </head>
+  <body>
+    <div id="root"></div>
+  </body>
+</html>

package/package.json ADDED Viewed

@@ -0,0 +1,64 @@
+{
+  "name": "agentboss",
+  "version": "0.1.0",
+  "description": "AI Agent collaboration analytics - become your AI agent's boss, not its babysitter",
+  "main": "server/index.js",
+  "bin": {
+    "aboss": "./bin/aboss.js"
+  },
+  "scripts": {
+    "start": "npm run client:build && node bin/aboss.js",
+    "server": "node bin/aboss.js",
+    "dev": "node bin/aboss.js --no-open",
+    "client:dev": "cd client && npx vite",
+    "client:build": "cd client && npx vite build",
+    "test": "node --test server/**/*.test.js",
+    "prepublishOnly": "npm run client:build"
+  },
+  "files": [
+    "bin/",
+    "server/**/*.js",
+    "!server/**/*.test.js",
+    "!server/**/__fixtures__/**",
+    "!server/**/__tests__/**",
+    "!server/test-utils/**",
+    "client/dist/",
+    "README.md"
+  ],
+  "keywords": [
+    "ai",
+    "agent",
+    "analytics",
+    "opencode",
+    "claude-code",
+    "developer-tools"
+  ],
+  "author": "Felix",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/2026hackathon/AgentBoss.git"
+  },
+  "homepage": "https://github.com/2026hackathon/AgentBoss#readme",
+  "bugs": {
+    "url": "https://github.com/2026hackathon/AgentBoss/issues"
+  },
+  "dependencies": {
+    "express": "^4.21.0",
+    "open": "^10.1.0",
+    "sql.js": "^1.12.0",
+    "sqlite3": "^6.0.1",
+    "uuid": "^11.1.0"
+  },
+  "devDependencies": {
+    "@vitejs/plugin-react": "^4.3.0",
+    "react": "^18.3.0",
+    "react-dom": "^18.3.0",
+    "react-router-dom": "^6.28.0",
+    "recharts": "^2.15.0",
+    "vite": "^5.4.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  }
+}

package/server/analysis/daily-aggregator.js ADDED Viewed

@@ -0,0 +1,258 @@
+/**
+ * Daily Summary Aggregation for Agent Boss
+ *
+ * Computes daily_summary and hourly_activity rows for a given date by
+ * aggregating data from unified_session and unified_message.
+ *
+ * @author Felix
+ */
+const {
+  getSessionsByDate,
+  getMessagesBySession,
+  upsertDailySummary,
+  upsertHourlyActivity,
+} = require('../db/queries');
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+/**
+ * Group an array of objects by a key property.
+ *
+ * @param {Object[]} items
+ * @param {string} key
+ * @returns {Object<string, Object[]>}
+ */
+function groupBy(items, key) {
+  const groups = {};
+  for (const item of items) {
+    const k = item[key] || 'unknown';
+    if (!groups[k]) groups[k] = [];
+    groups[k].push(item);
+  }
+  return groups;
+}
+/**
+ * Extract the hour (0–23) from an ISO timestamp string.
+ *
+ * @param {string} timestamp  ISO 8601 datetime
+ * @returns {number}
+ */
+function getHour(timestamp) {
+  if (!timestamp) return 0;
+  const d = new Date(timestamp);
+  return d.getHours();
+}
+/**
+ * Sum a numeric field across an array of objects.
+ *
+ * @param {Object[]} items
+ * @param {string} field
+ * @returns {number}
+ */
+function sumField(items, field) {
+  return items.reduce((acc, item) => acc + (item[field] || 0), 0);
+}
+/**
+ * Find the hour with the most messages from a flat message array.
+ *
+ * @param {Object[]} messages
+ * @returns {number|null}  Hour 0–23, or null if no messages
+ */
+function findPeakHour(messages) {
+  if (messages.length === 0) return null;
+  const hourCounts = new Array(24).fill(0);
+  for (const msg of messages) {
+    const h = getHour(msg.timestamp);
+    hourCounts[h]++;
+  }
+  let peakHour = 0;
+  let peakCount = 0;
+  for (let h = 0; h < 24; h++) {
+    if (hourCounts[h] > peakCount) {
+      peakCount = hourCounts[h];
+      peakHour = h;
+    }
+  }
+  return peakCount > 0 ? peakHour : null;
+}
+// ---------------------------------------------------------------------------
+// Source-level summary builder
+// ---------------------------------------------------------------------------
+/**
+ * Build a daily_summary object for a group of sessions belonging to one source.
+ *
+ * @param {Object} db
+ * @param {string} date
+ * @param {string} source
+ * @param {Object[]} sessions
+ * @returns {Object}  daily_summary row
+ */
+function buildSourceSummary(db, date, source, sessions) {
+  // Collect all messages for the sessions in this group
+  const allMessages = [];
+  for (const sess of sessions) {
+    const msgs = getMessagesBySession(db, sess.id);
+    allMessages.push(...msgs);
+  }
+  // Compute aggregate values
+  const session_count = sessions.length;
+  const message_count = sumField(sessions, 'message_count');
+  const tool_call_count = sumField(sessions, 'tool_call_count');
+  const tokens_input = sumField(sessions, 'tokens_input');
+  const tokens_output = sumField(sessions, 'tokens_output');
+  const tokens_reasoning = sumField(sessions, 'tokens_reasoning');
+  const tokens_cache_read = sumField(sessions, 'tokens_cache_read');
+  const tokens_cache_write = sumField(sessions, 'tokens_cache_write');
+  const cost_usd = sessions.reduce((acc, s) => acc + (s.cost_usd || 0), 0);
+  const error_count = sumField(sessions, 'error_count');
+  const additions = sumField(sessions, 'summary_additions');
+  const deletions = sumField(sessions, 'summary_deletions');
+  const active_minutes = sumField(sessions, 'active_minutes');
+  const revert_count = sessions.filter((s) => s.reverted).length;
+  // Timestamps: first and last activity
+  const startedTimes = sessions
+    .map((s) => s.started_at)
+    .filter(Boolean)
+    .sort();
+  const endedTimes = sessions
+    .map((s) => s.ended_at || s.started_at)
+    .filter(Boolean)
+    .sort();
+  const first_activity_at = startedTimes[0] || null;
+  const last_activity_at = endedTimes[endedTimes.length - 1] || null;
+  // Peak hour: find hour with most messages
+  const peak_hour = findPeakHour(allMessages);
+  return {
+    id: `${date}_${source}`,
+    date,
+    source,
+    session_count,
+    message_count,
+    tool_call_count,
+    tokens_input,
+    tokens_output,
+    tokens_reasoning,
+    tokens_cache_read,
+    tokens_cache_write,
+    cost_usd,
+    first_activity_at,
+    last_activity_at,
+    active_minutes,
+    peak_hour,
+    error_count,
+    revert_count,
+    additions,
+    deletions,
+  };
+}
+// ---------------------------------------------------------------------------
+// Hourly activity builder
+// ---------------------------------------------------------------------------
+/**
+ * Compute hourly_activity rows for a date from all sessions/messages.
+ *
+ * @param {Object} db
+ * @param {string} date
+ * @param {Object[]} allSessions  All sessions for this date
+ */
+function computeHourlyActivity(db, date, allSessions) {
+  // Group sessions by source
+  const sourceGroups = groupBy(allSessions, 'source');
+  const sources = [...Object.keys(sourceGroups), 'all'];
+  for (const source of sources) {
+    const sessions = source === 'all' ? allSessions : sourceGroups[source];
+    if (!sessions || sessions.length === 0) continue;
+    // Collect all messages
+    const allMessages = [];
+    for (const sess of sessions) {
+      const msgs = getMessagesBySession(db, sess.id);
+      allMessages.push(...msgs);
+    }
+    // Count per hour
+    for (let hour = 0; hour < 24; hour++) {
+      const msgsInHour = allMessages.filter(
+        (m) => getHour(m.timestamp) === hour
+      );
+      const errorMsgsInHour = msgsInHour.filter((m) => m.is_error);
+      // Sessions active in this hour: session whose time range overlaps the hour
+      const hourStart = new Date(`${date}T${String(hour).padStart(2, '0')}:00:00`);
+      const hourEnd = new Date(`${date}T${String(hour).padStart(2, '0')}:59:59`);
+      const activeSessions = sessions.filter((s) => {
+        const start = new Date(s.started_at);
+        const end = s.ended_at ? new Date(s.ended_at) : start;
+        return start <= hourEnd && end >= hourStart;
+      });
+      // Only write rows where there is activity
+      if (msgsInHour.length > 0 || activeSessions.length > 0) {
+        upsertHourlyActivity(db, {
+          date,
+          hour,
+          source,
+          message_count: msgsInHour.length,
+          session_count: activeSessions.length,
+          error_count: errorMsgsInHour.length,
+          tool_call_count: 0, // Would need tool call data per hour for precision
+        });
+      }
+    }
+  }
+}
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+/**
+ * Aggregate session data into daily_summary for a specific date.
+ *
+ * @param {object} db  sql.js Database instance
+ * @param {string} date  YYYY-MM-DD
+ */
+function aggregateDailySummary(db, date) {
+  // 1. Get all sessions for this date
+  const allSessions = getSessionsByDate(db, date);
+  if (allSessions.length === 0) return;
+  // 2. Group by source
+  const sourceGroups = groupBy(allSessions, 'source');
+  // 3. Write per-source summaries
+  for (const [source, sessions] of Object.entries(sourceGroups)) {
+    const summary = buildSourceSummary(db, date, source, sessions);
+    upsertDailySummary(db, summary);
+  }
+  // 4. Write "all" source summary
+  const allSummary = buildSourceSummary(db, date, 'all', allSessions);
+  allSummary.id = `${date}_all`;
+  allSummary.source = 'all';
+  upsertDailySummary(db, allSummary);
+  // 5. Calculate hourly activity
+  computeHourlyActivity(db, date, allSessions);
+}
+module.exports = { aggregateDailySummary };

package/server/analysis/difficulty.js ADDED Viewed

@@ -0,0 +1,129 @@
+/**
+ * Task difficulty classifier.
+ *
+ * Buckets a unified_session into one of {1, 2, 3, 4} so that the v2
+ * capability thresholds can be normalised against task scale.  See
+ * docs/superpowers/specs/2026-06-13-capability-model-v2.md §3 for the
+ * rule table and rationale.
+ *
+ * The classifier is intentionally pure: it does not touch the database
+ * and takes its inputs from a single session row plus a small set of
+ * aggregates the caller already has on hand.  This keeps it cheap to
+ * recompute (which we do every time a session is re-scored).
+ *
+ * @author Felix
+ */
+'use strict';
+/** Bucket constants — exported so scorers can reference them by name. */
+const TRIVIAL = 1;
+const ROUTINE = 2;
+const COMPLEX = 3;
+const HEAVY   = 4;
+const LABELS = {
+  [TRIVIAL]: 'trivial',
+  [ROUTINE]: 'routine',
+  [COMPLEX]: 'complex',
+  [HEAVY]:   'heavy',
+};
+/**
+ * @typedef {Object} DifficultyInput
+ * @property {number=} messageCount
+ * @property {number=} toolCallCount
+ * @property {number=} durationMinutes
+ * @property {number=} totalTokens             input + output + reasoning
+ * @property {number=} filesChanged            number of distinct files written
+ * @property {boolean|number=} reverted        session was reverted (1 / true)
+ */
+/**
+ * Classify a session into a difficulty bucket (1-4).
+ *
+ * Higher buckets always win, even if the lower-bucket rule still
+ * matches.  This guards against a 50-message session being labelled
+ * "trivial" just because its `duration < 5min` (e.g. autopilot bursts).
+ *
+ * @param {DifficultyInput} input
+ * @returns {{ bucket: 1|2|3|4, label: string, reasons: string[] }}
+ */
+function classify(input) {
+  const i = input || {};
+  const msgs    = Number(i.messageCount    || 0);
+  const tools   = Number(i.toolCallCount   || 0);
+  const dur     = Number(i.durationMinutes || 0);
+  const tokens  = Number(i.totalTokens     || 0);
+  const files   = Number(i.filesChanged    || 0);
+  const reverted = Boolean(i.reverted);
+  const reasons = [];
+  // ---- HEAVY (4) ----------------------------------------------------
+  if (msgs   > 40)       reasons.push(`HEAVY: messages>${40} (=${msgs})`);
+  if (tools  > 40)       reasons.push(`HEAVY: tool_calls>${40} (=${tools})`);
+  if (tokens > 200_000)  reasons.push(`HEAVY: tokens>200k (=${tokens})`);
+  if (dur    > 120)      reasons.push(`HEAVY: duration>120min (=${dur})`);
+  if (reverted)          reasons.push('HEAVY: session reverted');
+  if (reasons.length) return { bucket: HEAVY, label: LABELS[HEAVY], reasons };
+  // ---- COMPLEX (3) --------------------------------------------------
+  if (msgs   >= 16 && msgs   <= 40) reasons.push(`COMPLEX: messages in 16-40 (=${msgs})`);
+  if (tools  >= 13 && tools  <= 40) reasons.push(`COMPLEX: tool_calls in 13-40 (=${tools})`);
+  if (files  > 3)                   reasons.push(`COMPLEX: files_changed>3 (=${files})`);
+  if (dur    >= 30 && dur    <= 120) reasons.push(`COMPLEX: duration 30-120min (=${dur})`);
+  if (reasons.length) return { bucket: COMPLEX, label: LABELS[COMPLEX], reasons };
+  // ---- ROUTINE (2) --------------------------------------------------
+  if (msgs   >= 5  && msgs   <= 15) reasons.push(`ROUTINE: messages 5-15 (=${msgs})`);
+  if (tools  >= 3  && tools  <= 12) reasons.push(`ROUTINE: tool_calls 3-12 (=${tools})`);
+  if (dur    >= 5  && dur    < 30)  reasons.push(`ROUTINE: duration 5-30min (=${dur})`);
+  if (reasons.length) return { bucket: ROUTINE, label: LABELS[ROUTINE], reasons };
+  // ---- TRIVIAL (1) — default ---------------------------------------
+  reasons.push(
+    `TRIVIAL: messages<5 (${msgs}), tools<=2 (${tools}), duration<5min (${dur})`
+  );
+  return { bucket: TRIVIAL, label: LABELS[TRIVIAL], reasons };
+}
+/**
+ * Convenience: classify directly from a unified_session row joined with
+ * its aggregate counts.  The caller is responsible for computing
+ * `filesChanged` (we don't go to the DB here on purpose — keeps this
+ * module pure and dependency-free for unit tests).
+ *
+ * @param {Object} session            unified_session row
+ * @param {Object} [extras]           optional pre-computed extras
+ * @param {number} [extras.filesChanged]
+ * @returns {{ bucket: number, label: string, reasons: string[] }}
+ */
+function classifySession(session, extras = {}) {
+  if (!session) return classify({});
+  return classify({
+    messageCount:    session.message_count,
+    toolCallCount:   session.tool_call_count,
+    durationMinutes: session.active_minutes ?? session.duration_minutes,
+    totalTokens:
+      (session.tokens_input || 0) +
+      (session.tokens_output || 0) +
+      (session.tokens_reasoning || 0),
+    filesChanged: extras.filesChanged ?? session.summary_files,
+    reverted: session.reverted,
+  });
+}
+module.exports = {
+  classify,
+  classifySession,
+  // bucket constants — handy for `const { HEAVY } = require('./difficulty')`
+  TRIVIAL,
+  ROUTINE,
+  COMPLEX,
+  HEAVY,
+  LABELS,
+};

package/server/analysis/dimensions/ai-knowledge.js ADDED Viewed

@@ -0,0 +1,172 @@
+/**
+ * E1 — AI Knowledge Coverage.
+ *
+ * Captures whether the AI knows your stack well enough.
+ *
+ * Two paths:
+ *   • LLM judge   — uses cli-runner to ask another agent to audit the
+ *                   assistant outputs.  Cached in
+ *                   session_analysis.llm_judge_v2.
+ *   • Rule fallback — keyword spotting in the user follow-up text to
+ *                   infer "AI said something the user had to correct".
+ *
+ * @author Felix
+ */
+'use strict';
+const {
+  fetchMessages,
+  matchesAny,
+  STALENESS_PATTERNS,
+} = require('../text-signals');
+const { explainIndicator, rollupDimension, scoreToLevel, E1 } = require('../thresholds-v2');
+const { mergeIndicator, dimensionSource } = require('./llm-merge');
+const { makeEvidence } = require('../evidence-builder');
+const BEST_PRACTICE_PATTERNS = [
+  /标准做法/, /最佳实践/, /应该用/, /更好的方式/, /推荐使用/, /建议(用|换)/,
+  /best practice/i, /should use/i, /recommended (way|approach)/i, /a better way/i,
+];
+function analyzeRules(db, session, difficulty = 2) {
+  const messages = fetchMessages(db, session.id);
+  const haveText = messages.some((m) => m.text && m.text.length > 0);
+  let domain_errors = null;
+  let staleness = null;
+  let best_practice = null;
+  let assistantTurns = 0;
+  let correctionsAfterAssistant = 0;
+  let stalenessHits = 0;
+  let bestPracticeHits = 0;
+  if (haveText) {
+    for (let i = 0; i < messages.length; i++) {
+      const m = messages[i];
+      if (m.role === 'assistant') assistantTurns++;
+      if (m.role === 'user' && m.text) {
+        const prev = i > 0 ? messages[i - 1] : null;
+        const followsAssistant = prev && prev.role === 'assistant';
+        if (followsAssistant) {
+          if (matchesAny(m.text, STALENESS_PATTERNS)) {
+            correctionsAfterAssistant++;
+            stalenessHits++;
+          } else if (matchesAny(m.text, BEST_PRACTICE_PATTERNS)) {
+            bestPracticeHits++;
+          }
+        }
+      }
+    }
+    if (assistantTurns > 0) {
+      domain_errors = correctionsAfterAssistant / assistantTurns;
+      best_practice = 1 - bestPracticeHits / assistantTurns;
+    }
+    staleness = stalenessHits;
+  }
+  const deE = explainIndicator(E1.domain_errors, domain_errors, difficulty);
+  const stE = explainIndicator(E1.staleness,     staleness,     difficulty);
+  const bpE = explainIndicator(E1.best_practice, best_practice, difficulty);
+  const subScores = {
+    domain_errors: deE.score,
+    staleness:     stE.score,
+    best_practice: bpE.score,
+  };
+  const subLevels = {
+    domain_errors: deE.level,
+    staleness:     stE.level,
+    best_practice: bpE.level,
+  };
+  const subEvidence = buildSubEvidence(
+    { de: deE, st: stE, bp: bpE },
+    {
+      mode: 'rules',
+      haveText,
+      assistantTurns,
+      correctionsAfterAssistant,
+      stalenessHits,
+      bestPracticeHits,
+    },
+    difficulty
+  );
+  const score = rollupDimension('E1', subScores);
+  const level = scoreToLevel(score);
+  return {
+    subScores,
+    subLevels,
+    subEvidence,
+    raw: { domain_errors, staleness, best_practice, assistantTurns, correctionsAfterAssistant, stalenessHits, bestPracticeHits },
+    score,
+    level,
+    judgeSource: 'rules',
+    llmJudge: null,
+  };
+}
+/**
+ * Public entry — synchronous.  `llmCell` is llmJudge.E1 (or null).
+ */
+function analyze(db, session, difficulty = 2, llmCell = null) {
+  const ruleResult = analyzeRules(db, session, difficulty);
+  if (!llmCell) return ruleResult;
+  const cell = llmCell;
+  const m = {
+    domain_errors: mergeIndicator(cell.domain_errors, ruleResult.subScores.domain_errors, ruleResult.subLevels.domain_errors),
+    staleness:     mergeIndicator(cell.staleness,     ruleResult.subScores.staleness,     ruleResult.subLevels.staleness),
+    best_practice: mergeIndicator(cell.best_practice, ruleResult.subScores.best_practice, ruleResult.subLevels.best_practice),
+  };
+  const subScores = { domain_errors: m.domain_errors.score, staleness: m.staleness.score, best_practice: m.best_practice.score };
+  const subLevels = { domain_errors: m.domain_errors.level, staleness: m.staleness.level, best_practice: m.best_practice.level };
+  const subEvidence = {
+    domain_errors: { ...ruleResult.subEvidence.domain_errors, what: m.domain_errors.evidence || ruleResult.subEvidence.domain_errors.what, level: subLevels.domain_errors, score: subScores.domain_errors },
+    staleness:     { ...ruleResult.subEvidence.staleness,     what: m.staleness.evidence     || ruleResult.subEvidence.staleness.what,     level: subLevels.staleness,     score: subScores.staleness },
+    best_practice: { ...ruleResult.subEvidence.best_practice, what: m.best_practice.evidence || ruleResult.subEvidence.best_practice.what, level: subLevels.best_practice, score: subScores.best_practice },
+  };
+  const score = rollupDimension('E1', subScores);
+  const level = scoreToLevel(score);
+  const judgeSource = dimensionSource([m.domain_errors.source, m.staleness.source, m.best_practice.source]);
+  return { subScores, subLevels, subEvidence, raw: { llmCell: cell, ruleRaw: ruleResult.raw }, score, level, judgeSource, llmJudge: null };
+}
+function buildSubEvidence(expls, ctx, difficulty) {
+  const { de, st, bp } = expls;
+  // rules mode
+  const { haveText, assistantTurns, correctionsAfterAssistant, stalenessHits, bestPracticeHits } = ctx;
+  return {
+    domain_errors: makeEvidence({
+      key: 'domain_errors', label: '领域错误率',
+      what: haveText && assistantTurns > 0
+        ? `规则版：紧跟 AI 回复后的用户消息中含纠错关键词（"废弃"、"不存在"、"应该是"等）的比例：${correctionsAfterAssistant} / ${assistantTurns} 个助手回合。`
+        : '规则版：消息文本缺失或无 AI 回合，无法评估（未启用 LLM judge）。',
+      expl: de, unit: '%', difficulty,
+    }),
+    staleness: makeEvidence({
+      key: 'staleness', label: '知识时效性',
+      what: haveText
+        ? `规则版：用户消息中含过时/废弃关键词命中 ${stalenessHits} 次。`
+        : '规则版：消息文本缺失，无法识别过时引用。',
+      expl: st, unit: '次', difficulty,
+    }),
+    best_practice: makeEvidence({
+      key: 'best_practice', label: '最佳实践采纳率',
+      what: haveText && assistantTurns > 0
+        ? `规则版：1 - (用户提示"应该用 X"的比例) = 1 - ${bestPracticeHits} / ${assistantTurns}。开启 LLM judge 可获得真实 LLM 评估。`
+        : '规则版：消息文本缺失或无 AI 回合，使用默认估算。',
+      expl: bp, unit: '%', difficulty,
+    }),
+  };
+}
+module.exports = { analyze, analyzeRules };