agentboss 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +34 -0
  2. package/bin/aboss.js +288 -0
  3. package/client/dist/assets/index-C1wFD_Vo.css +1 -0
  4. package/client/dist/assets/index-DBj1Ujlx.js +137 -0
  5. package/client/dist/index.html +34 -0
  6. package/package.json +64 -0
  7. package/server/analysis/daily-aggregator.js +258 -0
  8. package/server/analysis/difficulty.js +129 -0
  9. package/server/analysis/dimensions/ai-knowledge.js +172 -0
  10. package/server/analysis/dimensions/ai-tools.js +161 -0
  11. package/server/analysis/dimensions/judgement.js +107 -0
  12. package/server/analysis/dimensions/llm-merge.js +57 -0
  13. package/server/analysis/dimensions/output-quality.js +167 -0
  14. package/server/analysis/dimensions/problem-definition.js +104 -0
  15. package/server/analysis/dimensions/system-thinking.js +225 -0
  16. package/server/analysis/evidence-builder.js +104 -0
  17. package/server/analysis/job.js +273 -0
  18. package/server/analysis/report-builder.js +581 -0
  19. package/server/analysis/scoring-v2.js +72 -0
  20. package/server/analysis/text-signals.js +179 -0
  21. package/server/analysis/thresholds-v2.js +358 -0
  22. package/server/api/advice.js +124 -0
  23. package/server/api/analysis.js +141 -0
  24. package/server/api/execution.js +330 -0
  25. package/server/api/metrics.js +277 -0
  26. package/server/api/overview.js +308 -0
  27. package/server/api/project.js +255 -0
  28. package/server/api/reports.js +125 -0
  29. package/server/api/sessions.js +118 -0
  30. package/server/api/settings.js +119 -0
  31. package/server/db/connection.js +175 -0
  32. package/server/db/queries.js +1051 -0
  33. package/server/db/schema.js +487 -0
  34. package/server/etl/active-time.js +150 -0
  35. package/server/etl/backfill-subagents.js +178 -0
  36. package/server/etl/claude-code.js +826 -0
  37. package/server/etl/detect.js +341 -0
  38. package/server/etl/judge-filter.js +117 -0
  39. package/server/etl/opencode.js +606 -0
  40. package/server/execution/job.js +662 -0
  41. package/server/execution/prompt.js +227 -0
  42. package/server/execution/runner.js +218 -0
  43. package/server/index.js +94 -0
  44. package/server/llm/advice-prompt.js +339 -0
  45. package/server/llm/advice.js +384 -0
  46. package/server/llm/analysis-prompt.js +162 -0
  47. package/server/llm/cli-runner.js +249 -0
  48. package/server/llm/judge-prompts.js +179 -0
  49. package/server/llm/judge.js +118 -0
  50. package/server/llm/project-advice-prompt.js +332 -0
  51. package/server/llm/project-advice.js +491 -0
  52. package/server/llm/session-analyzer.js +122 -0
  53. package/server/utils/project.js +80 -0
@@ -0,0 +1,34 @@
1
+ <!DOCTYPE html>
2
+ <html lang="zh-Hans">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Agent Boss</title>
7
+ <link rel="preconnect" href="https://fonts.googleapis.com" />
8
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
9
+ <link
10
+ href="https://fonts.googleapis.com/css2?family=Geist+Mono:wght@400;500;600&family=Space+Grotesk:wght@400;500;600;700&display=swap"
11
+ rel="stylesheet"
12
+ />
13
+ <style>
14
+ html, body { background: #F5F7FA; }
15
+ </style>
16
+ <script>
17
+ // Apply the saved theme before first paint to avoid a flash of the
18
+ // wrong palette. Mirrors the localStorage key used by ThemeToggle.
19
+ (function () {
20
+ try {
21
+ if (localStorage.getItem('ab-theme') === 'hack') {
22
+ document.documentElement.setAttribute('data-theme', 'hack');
23
+ document.documentElement.style.background = '#0a0e0f';
24
+ }
25
+ } catch (e) {}
26
+ })();
27
+ </script>
28
+ <script type="module" crossorigin src="/assets/index-DBj1Ujlx.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-C1wFD_Vo.css">
30
+ </head>
31
+ <body>
32
+ <div id="root"></div>
33
+ </body>
34
+ </html>
package/package.json ADDED
@@ -0,0 +1,64 @@
1
+ {
2
+ "name": "agentboss",
3
+ "version": "0.1.0",
4
+ "description": "AI Agent collaboration analytics - become your AI agent's boss, not its babysitter",
5
+ "main": "server/index.js",
6
+ "bin": {
7
+ "aboss": "./bin/aboss.js"
8
+ },
9
+ "scripts": {
10
+ "start": "npm run client:build && node bin/aboss.js",
11
+ "server": "node bin/aboss.js",
12
+ "dev": "node bin/aboss.js --no-open",
13
+ "client:dev": "cd client && npx vite",
14
+ "client:build": "cd client && npx vite build",
15
+ "test": "node --test server/**/*.test.js",
16
+ "prepublishOnly": "npm run client:build"
17
+ },
18
+ "files": [
19
+ "bin/",
20
+ "server/**/*.js",
21
+ "!server/**/*.test.js",
22
+ "!server/**/__fixtures__/**",
23
+ "!server/**/__tests__/**",
24
+ "!server/test-utils/**",
25
+ "client/dist/",
26
+ "README.md"
27
+ ],
28
+ "keywords": [
29
+ "ai",
30
+ "agent",
31
+ "analytics",
32
+ "opencode",
33
+ "claude-code",
34
+ "developer-tools"
35
+ ],
36
+ "author": "Felix",
37
+ "license": "MIT",
38
+ "repository": {
39
+ "type": "git",
40
+ "url": "git+https://github.com/2026hackathon/AgentBoss.git"
41
+ },
42
+ "homepage": "https://github.com/2026hackathon/AgentBoss#readme",
43
+ "bugs": {
44
+ "url": "https://github.com/2026hackathon/AgentBoss/issues"
45
+ },
46
+ "dependencies": {
47
+ "express": "^4.21.0",
48
+ "open": "^10.1.0",
49
+ "sql.js": "^1.12.0",
50
+ "sqlite3": "^6.0.1",
51
+ "uuid": "^11.1.0"
52
+ },
53
+ "devDependencies": {
54
+ "@vitejs/plugin-react": "^4.3.0",
55
+ "react": "^18.3.0",
56
+ "react-dom": "^18.3.0",
57
+ "react-router-dom": "^6.28.0",
58
+ "recharts": "^2.15.0",
59
+ "vite": "^5.4.0"
60
+ },
61
+ "engines": {
62
+ "node": ">=18.0.0"
63
+ }
64
+ }
@@ -0,0 +1,258 @@
1
+ /**
2
+ * Daily Summary Aggregation for Agent Boss
3
+ *
4
+ * Computes daily_summary and hourly_activity rows for a given date by
5
+ * aggregating data from unified_session and unified_message.
6
+ *
7
+ * @author Felix
8
+ */
9
+
10
+ const {
11
+ getSessionsByDate,
12
+ getMessagesBySession,
13
+ upsertDailySummary,
14
+ upsertHourlyActivity,
15
+ } = require('../db/queries');
16
+
17
+ // ---------------------------------------------------------------------------
18
+ // Helpers
19
+ // ---------------------------------------------------------------------------
20
+
21
+ /**
22
+ * Group an array of objects by a key property.
23
+ *
24
+ * @param {Object[]} items
25
+ * @param {string} key
26
+ * @returns {Object<string, Object[]>}
27
+ */
28
+ function groupBy(items, key) {
29
+ const groups = {};
30
+ for (const item of items) {
31
+ const k = item[key] || 'unknown';
32
+ if (!groups[k]) groups[k] = [];
33
+ groups[k].push(item);
34
+ }
35
+ return groups;
36
+ }
37
+
38
+ /**
39
+ * Extract the hour (0–23) from an ISO timestamp string.
40
+ *
41
+ * @param {string} timestamp ISO 8601 datetime
42
+ * @returns {number}
43
+ */
44
+ function getHour(timestamp) {
45
+ if (!timestamp) return 0;
46
+ const d = new Date(timestamp);
47
+ return d.getHours();
48
+ }
49
+
50
+ /**
51
+ * Sum a numeric field across an array of objects.
52
+ *
53
+ * @param {Object[]} items
54
+ * @param {string} field
55
+ * @returns {number}
56
+ */
57
+ function sumField(items, field) {
58
+ return items.reduce((acc, item) => acc + (item[field] || 0), 0);
59
+ }
60
+
61
+ /**
62
+ * Find the hour with the most messages from a flat message array.
63
+ *
64
+ * @param {Object[]} messages
65
+ * @returns {number|null} Hour 0–23, or null if no messages
66
+ */
67
+ function findPeakHour(messages) {
68
+ if (messages.length === 0) return null;
69
+
70
+ const hourCounts = new Array(24).fill(0);
71
+ for (const msg of messages) {
72
+ const h = getHour(msg.timestamp);
73
+ hourCounts[h]++;
74
+ }
75
+
76
+ let peakHour = 0;
77
+ let peakCount = 0;
78
+ for (let h = 0; h < 24; h++) {
79
+ if (hourCounts[h] > peakCount) {
80
+ peakCount = hourCounts[h];
81
+ peakHour = h;
82
+ }
83
+ }
84
+
85
+ return peakCount > 0 ? peakHour : null;
86
+ }
87
+
88
+ // ---------------------------------------------------------------------------
89
+ // Source-level summary builder
90
+ // ---------------------------------------------------------------------------
91
+
92
+ /**
93
+ * Build a daily_summary object for a group of sessions belonging to one source.
94
+ *
95
+ * @param {Object} db
96
+ * @param {string} date
97
+ * @param {string} source
98
+ * @param {Object[]} sessions
99
+ * @returns {Object} daily_summary row
100
+ */
101
+ function buildSourceSummary(db, date, source, sessions) {
102
+ // Collect all messages for the sessions in this group
103
+ const allMessages = [];
104
+ for (const sess of sessions) {
105
+ const msgs = getMessagesBySession(db, sess.id);
106
+ allMessages.push(...msgs);
107
+ }
108
+
109
+ // Compute aggregate values
110
+ const session_count = sessions.length;
111
+ const message_count = sumField(sessions, 'message_count');
112
+ const tool_call_count = sumField(sessions, 'tool_call_count');
113
+ const tokens_input = sumField(sessions, 'tokens_input');
114
+ const tokens_output = sumField(sessions, 'tokens_output');
115
+ const tokens_reasoning = sumField(sessions, 'tokens_reasoning');
116
+ const tokens_cache_read = sumField(sessions, 'tokens_cache_read');
117
+ const tokens_cache_write = sumField(sessions, 'tokens_cache_write');
118
+ const cost_usd = sessions.reduce((acc, s) => acc + (s.cost_usd || 0), 0);
119
+ const error_count = sumField(sessions, 'error_count');
120
+ const additions = sumField(sessions, 'summary_additions');
121
+ const deletions = sumField(sessions, 'summary_deletions');
122
+ const active_minutes = sumField(sessions, 'active_minutes');
123
+ const revert_count = sessions.filter((s) => s.reverted).length;
124
+
125
+ // Timestamps: first and last activity
126
+ const startedTimes = sessions
127
+ .map((s) => s.started_at)
128
+ .filter(Boolean)
129
+ .sort();
130
+ const endedTimes = sessions
131
+ .map((s) => s.ended_at || s.started_at)
132
+ .filter(Boolean)
133
+ .sort();
134
+
135
+ const first_activity_at = startedTimes[0] || null;
136
+ const last_activity_at = endedTimes[endedTimes.length - 1] || null;
137
+
138
+ // Peak hour: find hour with most messages
139
+ const peak_hour = findPeakHour(allMessages);
140
+
141
+ return {
142
+ id: `${date}_${source}`,
143
+ date,
144
+ source,
145
+ session_count,
146
+ message_count,
147
+ tool_call_count,
148
+ tokens_input,
149
+ tokens_output,
150
+ tokens_reasoning,
151
+ tokens_cache_read,
152
+ tokens_cache_write,
153
+ cost_usd,
154
+ first_activity_at,
155
+ last_activity_at,
156
+ active_minutes,
157
+ peak_hour,
158
+ error_count,
159
+ revert_count,
160
+ additions,
161
+ deletions,
162
+ };
163
+ }
164
+
165
+ // ---------------------------------------------------------------------------
166
+ // Hourly activity builder
167
+ // ---------------------------------------------------------------------------
168
+
169
+ /**
170
+ * Compute hourly_activity rows for a date from all sessions/messages.
171
+ *
172
+ * @param {Object} db
173
+ * @param {string} date
174
+ * @param {Object[]} allSessions All sessions for this date
175
+ */
176
+ function computeHourlyActivity(db, date, allSessions) {
177
+ // Group sessions by source
178
+ const sourceGroups = groupBy(allSessions, 'source');
179
+ const sources = [...Object.keys(sourceGroups), 'all'];
180
+
181
+ for (const source of sources) {
182
+ const sessions = source === 'all' ? allSessions : sourceGroups[source];
183
+ if (!sessions || sessions.length === 0) continue;
184
+
185
+ // Collect all messages
186
+ const allMessages = [];
187
+ for (const sess of sessions) {
188
+ const msgs = getMessagesBySession(db, sess.id);
189
+ allMessages.push(...msgs);
190
+ }
191
+
192
+ // Count per hour
193
+ for (let hour = 0; hour < 24; hour++) {
194
+ const msgsInHour = allMessages.filter(
195
+ (m) => getHour(m.timestamp) === hour
196
+ );
197
+ const errorMsgsInHour = msgsInHour.filter((m) => m.is_error);
198
+
199
+ // Sessions active in this hour: session whose time range overlaps the hour
200
+ const hourStart = new Date(`${date}T${String(hour).padStart(2, '0')}:00:00`);
201
+ const hourEnd = new Date(`${date}T${String(hour).padStart(2, '0')}:59:59`);
202
+ const activeSessions = sessions.filter((s) => {
203
+ const start = new Date(s.started_at);
204
+ const end = s.ended_at ? new Date(s.ended_at) : start;
205
+ return start <= hourEnd && end >= hourStart;
206
+ });
207
+
208
+ // Only write rows where there is activity
209
+ if (msgsInHour.length > 0 || activeSessions.length > 0) {
210
+ upsertHourlyActivity(db, {
211
+ date,
212
+ hour,
213
+ source,
214
+ message_count: msgsInHour.length,
215
+ session_count: activeSessions.length,
216
+ error_count: errorMsgsInHour.length,
217
+ tool_call_count: 0, // Would need tool call data per hour for precision
218
+ });
219
+ }
220
+ }
221
+ }
222
+ }
223
+
224
+ // ---------------------------------------------------------------------------
225
+ // Main
226
+ // ---------------------------------------------------------------------------
227
+
228
+ /**
229
+ * Aggregate session data into daily_summary for a specific date.
230
+ *
231
+ * @param {object} db sql.js Database instance
232
+ * @param {string} date YYYY-MM-DD
233
+ */
234
+ function aggregateDailySummary(db, date) {
235
+ // 1. Get all sessions for this date
236
+ const allSessions = getSessionsByDate(db, date);
237
+ if (allSessions.length === 0) return;
238
+
239
+ // 2. Group by source
240
+ const sourceGroups = groupBy(allSessions, 'source');
241
+
242
+ // 3. Write per-source summaries
243
+ for (const [source, sessions] of Object.entries(sourceGroups)) {
244
+ const summary = buildSourceSummary(db, date, source, sessions);
245
+ upsertDailySummary(db, summary);
246
+ }
247
+
248
+ // 4. Write "all" source summary
249
+ const allSummary = buildSourceSummary(db, date, 'all', allSessions);
250
+ allSummary.id = `${date}_all`;
251
+ allSummary.source = 'all';
252
+ upsertDailySummary(db, allSummary);
253
+
254
+ // 5. Calculate hourly activity
255
+ computeHourlyActivity(db, date, allSessions);
256
+ }
257
+
258
+ module.exports = { aggregateDailySummary };
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Task difficulty classifier.
3
+ *
4
+ * Buckets a unified_session into one of {1, 2, 3, 4} so that the v2
5
+ * capability thresholds can be normalised against task scale. See
6
+ * docs/superpowers/specs/2026-06-13-capability-model-v2.md §3 for the
7
+ * rule table and rationale.
8
+ *
9
+ * The classifier is intentionally pure: it does not touch the database
10
+ * and takes its inputs from a single session row plus a small set of
11
+ * aggregates the caller already has on hand. This keeps it cheap to
12
+ * recompute (which we do every time a session is re-scored).
13
+ *
14
+ * @author Felix
15
+ */
16
+
17
+ 'use strict';
18
+
19
+ /** Bucket constants — exported so scorers can reference them by name. */
20
+ const TRIVIAL = 1;
21
+ const ROUTINE = 2;
22
+ const COMPLEX = 3;
23
+ const HEAVY = 4;
24
+
25
+ const LABELS = {
26
+ [TRIVIAL]: 'trivial',
27
+ [ROUTINE]: 'routine',
28
+ [COMPLEX]: 'complex',
29
+ [HEAVY]: 'heavy',
30
+ };
31
+
32
+ /**
33
+ * @typedef {Object} DifficultyInput
34
+ * @property {number=} messageCount
35
+ * @property {number=} toolCallCount
36
+ * @property {number=} durationMinutes
37
+ * @property {number=} totalTokens input + output + reasoning
38
+ * @property {number=} filesChanged number of distinct files written
39
+ * @property {boolean|number=} reverted session was reverted (1 / true)
40
+ */
41
+
42
+ /**
43
+ * Classify a session into a difficulty bucket (1-4).
44
+ *
45
+ * Higher buckets always win, even if the lower-bucket rule still
46
+ * matches. This guards against a 50-message session being labelled
47
+ * "trivial" just because its `duration < 5min` (e.g. autopilot bursts).
48
+ *
49
+ * @param {DifficultyInput} input
50
+ * @returns {{ bucket: 1|2|3|4, label: string, reasons: string[] }}
51
+ */
52
+ function classify(input) {
53
+ const i = input || {};
54
+ const msgs = Number(i.messageCount || 0);
55
+ const tools = Number(i.toolCallCount || 0);
56
+ const dur = Number(i.durationMinutes || 0);
57
+ const tokens = Number(i.totalTokens || 0);
58
+ const files = Number(i.filesChanged || 0);
59
+ const reverted = Boolean(i.reverted);
60
+
61
+ const reasons = [];
62
+
63
+ // ---- HEAVY (4) ----------------------------------------------------
64
+ if (msgs > 40) reasons.push(`HEAVY: messages>${40} (=${msgs})`);
65
+ if (tools > 40) reasons.push(`HEAVY: tool_calls>${40} (=${tools})`);
66
+ if (tokens > 200_000) reasons.push(`HEAVY: tokens>200k (=${tokens})`);
67
+ if (dur > 120) reasons.push(`HEAVY: duration>120min (=${dur})`);
68
+ if (reverted) reasons.push('HEAVY: session reverted');
69
+
70
+ if (reasons.length) return { bucket: HEAVY, label: LABELS[HEAVY], reasons };
71
+
72
+ // ---- COMPLEX (3) --------------------------------------------------
73
+ if (msgs >= 16 && msgs <= 40) reasons.push(`COMPLEX: messages in 16-40 (=${msgs})`);
74
+ if (tools >= 13 && tools <= 40) reasons.push(`COMPLEX: tool_calls in 13-40 (=${tools})`);
75
+ if (files > 3) reasons.push(`COMPLEX: files_changed>3 (=${files})`);
76
+ if (dur >= 30 && dur <= 120) reasons.push(`COMPLEX: duration 30-120min (=${dur})`);
77
+
78
+ if (reasons.length) return { bucket: COMPLEX, label: LABELS[COMPLEX], reasons };
79
+
80
+ // ---- ROUTINE (2) --------------------------------------------------
81
+ if (msgs >= 5 && msgs <= 15) reasons.push(`ROUTINE: messages 5-15 (=${msgs})`);
82
+ if (tools >= 3 && tools <= 12) reasons.push(`ROUTINE: tool_calls 3-12 (=${tools})`);
83
+ if (dur >= 5 && dur < 30) reasons.push(`ROUTINE: duration 5-30min (=${dur})`);
84
+
85
+ if (reasons.length) return { bucket: ROUTINE, label: LABELS[ROUTINE], reasons };
86
+
87
+ // ---- TRIVIAL (1) — default ---------------------------------------
88
+ reasons.push(
89
+ `TRIVIAL: messages<5 (${msgs}), tools<=2 (${tools}), duration<5min (${dur})`
90
+ );
91
+ return { bucket: TRIVIAL, label: LABELS[TRIVIAL], reasons };
92
+ }
93
+
94
+ /**
95
+ * Convenience: classify directly from a unified_session row joined with
96
+ * its aggregate counts. The caller is responsible for computing
97
+ * `filesChanged` (we don't go to the DB here on purpose — keeps this
98
+ * module pure and dependency-free for unit tests).
99
+ *
100
+ * @param {Object} session unified_session row
101
+ * @param {Object} [extras] optional pre-computed extras
102
+ * @param {number} [extras.filesChanged]
103
+ * @returns {{ bucket: number, label: string, reasons: string[] }}
104
+ */
105
+ function classifySession(session, extras = {}) {
106
+ if (!session) return classify({});
107
+ return classify({
108
+ messageCount: session.message_count,
109
+ toolCallCount: session.tool_call_count,
110
+ durationMinutes: session.active_minutes ?? session.duration_minutes,
111
+ totalTokens:
112
+ (session.tokens_input || 0) +
113
+ (session.tokens_output || 0) +
114
+ (session.tokens_reasoning || 0),
115
+ filesChanged: extras.filesChanged ?? session.summary_files,
116
+ reverted: session.reverted,
117
+ });
118
+ }
119
+
120
+ module.exports = {
121
+ classify,
122
+ classifySession,
123
+ // bucket constants — handy for `const { HEAVY } = require('./difficulty')`
124
+ TRIVIAL,
125
+ ROUTINE,
126
+ COMPLEX,
127
+ HEAVY,
128
+ LABELS,
129
+ };
@@ -0,0 +1,172 @@
1
+ /**
2
+ * E1 — AI Knowledge Coverage.
3
+ *
4
+ * Captures whether the AI knows your stack well enough.
5
+ *
6
+ * Two paths:
7
+ * • LLM judge — uses cli-runner to ask another agent to audit the
8
+ * assistant outputs. Cached in
9
+ * session_analysis.llm_judge_v2.
10
+ * • Rule fallback — keyword spotting in the user follow-up text to
11
+ * infer "AI said something the user had to correct".
12
+ *
13
+ * @author Felix
14
+ */
15
+
16
+ 'use strict';
17
+
18
+ const {
19
+ fetchMessages,
20
+ matchesAny,
21
+ STALENESS_PATTERNS,
22
+ } = require('../text-signals');
23
+ const { explainIndicator, rollupDimension, scoreToLevel, E1 } = require('../thresholds-v2');
24
+ const { mergeIndicator, dimensionSource } = require('./llm-merge');
25
+ const { makeEvidence } = require('../evidence-builder');
26
+
27
+ const BEST_PRACTICE_PATTERNS = [
28
+ /标准做法/, /最佳实践/, /应该用/, /更好的方式/, /推荐使用/, /建议(用|换)/,
29
+ /best practice/i, /should use/i, /recommended (way|approach)/i, /a better way/i,
30
+ ];
31
+
32
+ function analyzeRules(db, session, difficulty = 2) {
33
+ const messages = fetchMessages(db, session.id);
34
+ const haveText = messages.some((m) => m.text && m.text.length > 0);
35
+
36
+ let domain_errors = null;
37
+ let staleness = null;
38
+ let best_practice = null;
39
+ let assistantTurns = 0;
40
+ let correctionsAfterAssistant = 0;
41
+ let stalenessHits = 0;
42
+ let bestPracticeHits = 0;
43
+
44
+ if (haveText) {
45
+ for (let i = 0; i < messages.length; i++) {
46
+ const m = messages[i];
47
+ if (m.role === 'assistant') assistantTurns++;
48
+ if (m.role === 'user' && m.text) {
49
+ const prev = i > 0 ? messages[i - 1] : null;
50
+ const followsAssistant = prev && prev.role === 'assistant';
51
+ if (followsAssistant) {
52
+ if (matchesAny(m.text, STALENESS_PATTERNS)) {
53
+ correctionsAfterAssistant++;
54
+ stalenessHits++;
55
+ } else if (matchesAny(m.text, BEST_PRACTICE_PATTERNS)) {
56
+ bestPracticeHits++;
57
+ }
58
+ }
59
+ }
60
+ }
61
+
62
+ if (assistantTurns > 0) {
63
+ domain_errors = correctionsAfterAssistant / assistantTurns;
64
+ best_practice = 1 - bestPracticeHits / assistantTurns;
65
+ }
66
+ staleness = stalenessHits;
67
+ }
68
+
69
+ const deE = explainIndicator(E1.domain_errors, domain_errors, difficulty);
70
+ const stE = explainIndicator(E1.staleness, staleness, difficulty);
71
+ const bpE = explainIndicator(E1.best_practice, best_practice, difficulty);
72
+
73
+ const subScores = {
74
+ domain_errors: deE.score,
75
+ staleness: stE.score,
76
+ best_practice: bpE.score,
77
+ };
78
+ const subLevels = {
79
+ domain_errors: deE.level,
80
+ staleness: stE.level,
81
+ best_practice: bpE.level,
82
+ };
83
+
84
+ const subEvidence = buildSubEvidence(
85
+ { de: deE, st: stE, bp: bpE },
86
+ {
87
+ mode: 'rules',
88
+ haveText,
89
+ assistantTurns,
90
+ correctionsAfterAssistant,
91
+ stalenessHits,
92
+ bestPracticeHits,
93
+ },
94
+ difficulty
95
+ );
96
+
97
+ const score = rollupDimension('E1', subScores);
98
+ const level = scoreToLevel(score);
99
+
100
+ return {
101
+ subScores,
102
+ subLevels,
103
+ subEvidence,
104
+ raw: { domain_errors, staleness, best_practice, assistantTurns, correctionsAfterAssistant, stalenessHits, bestPracticeHits },
105
+ score,
106
+ level,
107
+ judgeSource: 'rules',
108
+ llmJudge: null,
109
+ };
110
+ }
111
+
112
+ /**
113
+ * Public entry — synchronous. `llmCell` is llmJudge.E1 (or null).
114
+ */
115
+ function analyze(db, session, difficulty = 2, llmCell = null) {
116
+ const ruleResult = analyzeRules(db, session, difficulty);
117
+ if (!llmCell) return ruleResult;
118
+
119
+ const cell = llmCell;
120
+ const m = {
121
+ domain_errors: mergeIndicator(cell.domain_errors, ruleResult.subScores.domain_errors, ruleResult.subLevels.domain_errors),
122
+ staleness: mergeIndicator(cell.staleness, ruleResult.subScores.staleness, ruleResult.subLevels.staleness),
123
+ best_practice: mergeIndicator(cell.best_practice, ruleResult.subScores.best_practice, ruleResult.subLevels.best_practice),
124
+ };
125
+
126
+ const subScores = { domain_errors: m.domain_errors.score, staleness: m.staleness.score, best_practice: m.best_practice.score };
127
+ const subLevels = { domain_errors: m.domain_errors.level, staleness: m.staleness.level, best_practice: m.best_practice.level };
128
+
129
+ const subEvidence = {
130
+ domain_errors: { ...ruleResult.subEvidence.domain_errors, what: m.domain_errors.evidence || ruleResult.subEvidence.domain_errors.what, level: subLevels.domain_errors, score: subScores.domain_errors },
131
+ staleness: { ...ruleResult.subEvidence.staleness, what: m.staleness.evidence || ruleResult.subEvidence.staleness.what, level: subLevels.staleness, score: subScores.staleness },
132
+ best_practice: { ...ruleResult.subEvidence.best_practice, what: m.best_practice.evidence || ruleResult.subEvidence.best_practice.what, level: subLevels.best_practice, score: subScores.best_practice },
133
+ };
134
+
135
+ const score = rollupDimension('E1', subScores);
136
+ const level = scoreToLevel(score);
137
+ const judgeSource = dimensionSource([m.domain_errors.source, m.staleness.source, m.best_practice.source]);
138
+
139
+ return { subScores, subLevels, subEvidence, raw: { llmCell: cell, ruleRaw: ruleResult.raw }, score, level, judgeSource, llmJudge: null };
140
+ }
141
+
142
+ function buildSubEvidence(expls, ctx, difficulty) {
143
+ const { de, st, bp } = expls;
144
+
145
+ // rules mode
146
+ const { haveText, assistantTurns, correctionsAfterAssistant, stalenessHits, bestPracticeHits } = ctx;
147
+ return {
148
+ domain_errors: makeEvidence({
149
+ key: 'domain_errors', label: '领域错误率',
150
+ what: haveText && assistantTurns > 0
151
+ ? `规则版:紧跟 AI 回复后的用户消息中含纠错关键词("废弃"、"不存在"、"应该是"等)的比例:${correctionsAfterAssistant} / ${assistantTurns} 个助手回合。`
152
+ : '规则版:消息文本缺失或无 AI 回合,无法评估(未启用 LLM judge)。',
153
+ expl: de, unit: '%', difficulty,
154
+ }),
155
+ staleness: makeEvidence({
156
+ key: 'staleness', label: '知识时效性',
157
+ what: haveText
158
+ ? `规则版:用户消息中含过时/废弃关键词命中 ${stalenessHits} 次。`
159
+ : '规则版:消息文本缺失,无法识别过时引用。',
160
+ expl: st, unit: '次', difficulty,
161
+ }),
162
+ best_practice: makeEvidence({
163
+ key: 'best_practice', label: '最佳实践采纳率',
164
+ what: haveText && assistantTurns > 0
165
+ ? `规则版:1 - (用户提示"应该用 X"的比例) = 1 - ${bestPracticeHits} / ${assistantTurns}。开启 LLM judge 可获得真实 LLM 评估。`
166
+ : '规则版:消息文本缺失或无 AI 回合,使用默认估算。',
167
+ expl: bp, unit: '%', difficulty,
168
+ }),
169
+ };
170
+ }
171
+
172
+ module.exports = { analyze, analyzeRules };