agentboss 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +34 -0
  2. package/bin/aboss.js +288 -0
  3. package/client/dist/assets/index-C1wFD_Vo.css +1 -0
  4. package/client/dist/assets/index-DBj1Ujlx.js +137 -0
  5. package/client/dist/index.html +34 -0
  6. package/package.json +64 -0
  7. package/server/analysis/daily-aggregator.js +258 -0
  8. package/server/analysis/difficulty.js +129 -0
  9. package/server/analysis/dimensions/ai-knowledge.js +172 -0
  10. package/server/analysis/dimensions/ai-tools.js +161 -0
  11. package/server/analysis/dimensions/judgement.js +107 -0
  12. package/server/analysis/dimensions/llm-merge.js +57 -0
  13. package/server/analysis/dimensions/output-quality.js +167 -0
  14. package/server/analysis/dimensions/problem-definition.js +104 -0
  15. package/server/analysis/dimensions/system-thinking.js +225 -0
  16. package/server/analysis/evidence-builder.js +104 -0
  17. package/server/analysis/job.js +273 -0
  18. package/server/analysis/report-builder.js +581 -0
  19. package/server/analysis/scoring-v2.js +72 -0
  20. package/server/analysis/text-signals.js +179 -0
  21. package/server/analysis/thresholds-v2.js +358 -0
  22. package/server/api/advice.js +124 -0
  23. package/server/api/analysis.js +141 -0
  24. package/server/api/execution.js +330 -0
  25. package/server/api/metrics.js +277 -0
  26. package/server/api/overview.js +308 -0
  27. package/server/api/project.js +255 -0
  28. package/server/api/reports.js +125 -0
  29. package/server/api/sessions.js +118 -0
  30. package/server/api/settings.js +119 -0
  31. package/server/db/connection.js +175 -0
  32. package/server/db/queries.js +1051 -0
  33. package/server/db/schema.js +487 -0
  34. package/server/etl/active-time.js +150 -0
  35. package/server/etl/backfill-subagents.js +178 -0
  36. package/server/etl/claude-code.js +826 -0
  37. package/server/etl/detect.js +341 -0
  38. package/server/etl/judge-filter.js +117 -0
  39. package/server/etl/opencode.js +606 -0
  40. package/server/execution/job.js +662 -0
  41. package/server/execution/prompt.js +227 -0
  42. package/server/execution/runner.js +218 -0
  43. package/server/index.js +94 -0
  44. package/server/llm/advice-prompt.js +339 -0
  45. package/server/llm/advice.js +384 -0
  46. package/server/llm/analysis-prompt.js +162 -0
  47. package/server/llm/cli-runner.js +249 -0
  48. package/server/llm/judge-prompts.js +179 -0
  49. package/server/llm/judge.js +118 -0
  50. package/server/llm/project-advice-prompt.js +332 -0
  51. package/server/llm/project-advice.js +491 -0
  52. package/server/llm/session-analyzer.js +122 -0
  53. package/server/utils/project.js +80 -0
@@ -0,0 +1,581 @@
1
+ /**
2
+ * Report builder for Agent Boss API responses.
3
+ *
4
+ * Assembles structured report payloads by fetching v2 dimension scores,
5
+ * daily summaries, and session lists from boss.db.
6
+ *
7
+ * @author Felix
8
+ */
9
+
10
+ const {
11
+ queryAll,
12
+ getSessionsByDateRange,
13
+ getSessionById,
14
+ getAnalysisBySession,
15
+ getDailySummaries,
16
+ getAnalysisState,
17
+ getOverviewTopProjects,
18
+ } = require('../db/queries');
19
+ const { mapTopProjects } = require('../utils/project');
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // Date helpers
23
+ // ---------------------------------------------------------------------------
24
+
25
+ /**
26
+ * Format a Date as YYYY-MM-DD.
27
+ * @param {Date} d
28
+ * @returns {string}
29
+ */
30
+ function _fmt(d) {
31
+ const y = d.getFullYear();
32
+ const m = String(d.getMonth() + 1).padStart(2, '0');
33
+ const day = String(d.getDate()).padStart(2, '0');
34
+ return `${y}-${m}-${day}`;
35
+ }
36
+
37
+ /**
38
+ * Get yesterday's date as YYYY-MM-DD.
39
+ * @returns {string}
40
+ */
41
+ function _yesterday() {
42
+ const d = new Date();
43
+ d.setDate(d.getDate() - 1);
44
+ return _fmt(d);
45
+ }
46
+
47
+ /**
48
+ * Get the Monday of the current ISO week (or the week containing the given
49
+ * date).
50
+ * @param {Date} [d]
51
+ * @returns {string}
52
+ */
53
+ function _mondayOf(d) {
54
+ const dt = d ? new Date(d) : new Date();
55
+ const day = dt.getDay(); // 0=Sun … 6=Sat
56
+ const diff = day === 0 ? 6 : day - 1;
57
+ dt.setDate(dt.getDate() - diff);
58
+ return _fmt(dt);
59
+ }
60
+
61
+ /**
62
+ * Get the date N days ago as YYYY-MM-DD.
63
+ * @param {number} n
64
+ * @returns {string}
65
+ */
66
+ function _daysAgo(n) {
67
+ const d = new Date();
68
+ d.setDate(d.getDate() - n);
69
+ return _fmt(d);
70
+ }
71
+
72
+ // ---------------------------------------------------------------------------
73
+ // Shared helpers
74
+ // ---------------------------------------------------------------------------
75
+
76
+ /**
77
+ * Compute current v2 dimensions (H1/H2/E1/E2/O1 averaged across the
78
+ * date range; H3 from the rolling aggregator over the same window).
79
+ *
80
+ * @param {object} db
81
+ * @param {string} fromDate
82
+ * @param {string} toDate
83
+ * @returns {{ H1:number|null, H2:number|null, H3:number|null, E1:number|null, E2:number|null, O1:number|null }}
84
+ */
85
+ function getCurrentDimensionsV2(db, fromDate, toDate) {
86
+ const sessions = getSessionsByDateRange(db, fromDate, toDate, undefined, 10000, 0);
87
+
88
+ const acc = { H1: { s: 0, n: 0 }, H2: { s: 0, n: 0 }, E1: { s: 0, n: 0 }, E2: { s: 0, n: 0 }, O1: { s: 0, n: 0 } };
89
+
90
+ for (const s of sessions) {
91
+ const a = getAnalysisBySession(db, s.id);
92
+ if (!a || a.status !== 'done') continue;
93
+ for (const [k, col] of [['H1','score_h1'],['H2','score_h2'],['E1','score_e1'],['E2','score_e2'],['O1','score_o1']]) {
94
+ if (a[col] != null) { acc[k].s += a[col]; acc[k].n++; }
95
+ }
96
+ }
97
+
98
+ const result = {};
99
+ for (const [k, v] of Object.entries(acc)) {
100
+ result[k] = v.n > 0 ? Math.round((v.s / v.n) * 10) / 10 : null;
101
+ }
102
+
103
+ // H3 is a rolling aggregate — compute on demand for this window.
104
+ let h3 = null;
105
+ try {
106
+ const { analyzeRange } = require('./dimensions/system-thinking');
107
+ const r = analyzeRange(db, fromDate, toDate);
108
+ h3 = r.score;
109
+ } catch (_) { h3 = null; }
110
+ result.H3 = h3;
111
+
112
+ // ENV = "AI 能力环境诊断" = average of E1 (knowledge) and E2 (tools).
113
+ // Only computed when at least one side is present.
114
+ if (result.E1 != null && result.E2 != null) {
115
+ result.ENV = Math.round(((result.E1 + result.E2) / 2) * 10) / 10;
116
+ } else if (result.E1 != null) {
117
+ result.ENV = result.E1;
118
+ } else if (result.E2 != null) {
119
+ result.ENV = result.E2;
120
+ } else {
121
+ result.ENV = null;
122
+ }
123
+
124
+ return result;
125
+ }
126
+
127
+ /**
128
+ * Combine E1 + E2 into the v2.1 "AI 能力环境诊断" composite for a single
129
+ * session. Returns null if both inputs are null.
130
+ *
131
+ * @param {number|null} e1
132
+ * @param {number|null} e2
133
+ * @returns {number|null}
134
+ */
135
+ function envScore(e1, e2) {
136
+ if (e1 == null && e2 == null) return null;
137
+ if (e1 == null) return e2;
138
+ if (e2 == null) return e1;
139
+ return Math.round(((e1 + e2) / 2) * 10) / 10;
140
+ }
141
+
142
+ /**
143
+ * Map a 0-100 score to the same L1-L4 buckets the rest of v2 uses
144
+ * (≥85 L4 · ≥65 L3 · ≥40 L2 · else L1). Mirrors thresholds-v2#scoreToLevel
145
+ * — duplicated here to avoid pulling a server/analysis dep into the
146
+ * report builder.
147
+ *
148
+ * @param {number|null} score
149
+ * @returns {1|2|3|4|null}
150
+ */
151
+ function levelFromScore(score) {
152
+ if (score == null) return null;
153
+ if (score >= 85) return 4;
154
+ if (score >= 65) return 3;
155
+ if (score >= 40) return 2;
156
+ return 1;
157
+ }
158
+
159
+ /**
160
+ * Build stats summary from daily_summary row(s) or compute from sessions.
161
+ * @param {object} db
162
+ * @param {string} fromDate
163
+ * @param {string} toDate
164
+ * @returns {object}
165
+ */
166
+ function buildStats(db, fromDate, toDate) {
167
+ const summaries = getDailySummaries(db, fromDate, toDate);
168
+
169
+ let sessions = 0;
170
+ let cost = 0;
171
+ let activeMinutes = 0;
172
+ let totalTokens = 0;
173
+ let errors = 0;
174
+
175
+ for (const s of summaries) {
176
+ sessions += s.session_count || 0;
177
+ cost += s.cost_usd || 0;
178
+ activeMinutes += s.active_minutes || 0;
179
+ totalTokens += (s.tokens_input || 0) + (s.tokens_output || 0) + (s.tokens_reasoning || 0);
180
+ errors += s.error_count || 0;
181
+ }
182
+
183
+ return {
184
+ sessions,
185
+ cost: Math.round(cost * 100) / 100,
186
+ activeMinutes,
187
+ totalTokens,
188
+ errors,
189
+ avgCost: sessions > 0 ? Math.round((cost / sessions) * 100) / 100 : 0,
190
+ avgActiveMinutes: sessions > 0 ? Math.round(activeMinutes / sessions) : 0,
191
+ avgTokens: sessions > 0 ? Math.round(totalTokens / sessions) : 0,
192
+ avgErrors: sessions > 0 ? Math.round((errors / sessions) * 100) / 100 : 0,
193
+ };
194
+ }
195
+
196
+ /**
197
+ * Build session list with per-session scores and metadata.
198
+ *
199
+ * Subagents (parent_session_id IS NOT NULL) are skipped here so the
200
+ * "会话列表" UI shows top-level work only. Aggregate stats on the
201
+ * same page (stats, dimensions, daily_summary, collab bill) are
202
+ * computed by other queries that still include them — see the comment
203
+ * on the parent_session_id column in schema.js for the rationale.
204
+ *
205
+ * @param {object} db
206
+ * @param {string} fromDate
207
+ * @param {string} toDate
208
+ * @returns {object[]}
209
+ */
210
+ function buildSessionList(db, fromDate, toDate) {
211
+ const sessions = getSessionsByDateRange(db, fromDate, toDate, undefined, 10000, 0);
212
+ const list = [];
213
+
214
+ for (const s of sessions) {
215
+ if (s.parent_session_id) continue; // skip subagents
216
+ const analysis = getAnalysisBySession(db, s.id);
217
+ list.push({
218
+ id: s.id,
219
+ title: s.title || '(untitled)',
220
+ source: s.source,
221
+ // date/startedAt let the UI group sessions by day (weekly report)
222
+ date: s.date,
223
+ startedAt: s.started_at,
224
+ cost: Math.round((s.cost_usd || 0) * 100) / 100,
225
+ duration: s.duration_minutes || 0,
226
+ // v2 main-axis scores (UI averages H1/H2/H3/ENV/O1 into a single
227
+ // composite column). ENV is derived from E1 + E2 client-side via
228
+ // its own column would explode the row width.
229
+ scoreH1: analysis ? analysis.score_h1 : null,
230
+ scoreH2: analysis ? analysis.score_h2 : null,
231
+ scoreH3: analysis ? analysis.score_h3 : null,
232
+ scoreEnv: analysis ? envScore(analysis.score_e1, analysis.score_e2) : null,
233
+ scoreO1: analysis ? analysis.score_o1 : null,
234
+ status: analysis ? analysis.status : 'pending',
235
+ });
236
+ }
237
+
238
+ return list;
239
+ }
240
+
241
+ /**
242
+ * Build analysis status from the analysis_state table and session data.
243
+ * @param {object} db
244
+ * @param {string} fromDate
245
+ * @param {string} toDate
246
+ * @returns {{ status: string, analyzedCount: number, totalCount: number }}
247
+ */
248
+ function buildAnalysisStatus(db, fromDate, toDate) {
249
+ const state = getAnalysisState(db);
250
+ const sessions = getSessionsByDateRange(db, fromDate, toDate, undefined, 10000, 0);
251
+ let analyzed = 0;
252
+
253
+ for (const s of sessions) {
254
+ const a = getAnalysisBySession(db, s.id);
255
+ if (a && a.status === 'done') analyzed++;
256
+ }
257
+
258
+ return {
259
+ status: state ? state.status : 'idle',
260
+ analyzedCount: analyzed,
261
+ totalCount: sessions.length,
262
+ };
263
+ }
264
+
265
+ // ---------------------------------------------------------------------------
266
+ // Public API
267
+ // ---------------------------------------------------------------------------
268
+
269
+ /**
270
+ * Build yesterday report data.
271
+ * @param {object} db sql.js Database instance
272
+ * @returns {object} Report payload for the API
273
+ */
274
+ function buildYesterdayReport(db) {
275
+ const date = _yesterday();
276
+ const fromDate = date;
277
+ const toDate = date;
278
+
279
+ // 1. Stats
280
+ const stats = buildStats(db, fromDate, toDate);
281
+
282
+ // 2. Dimensions v2 (H1/H2/H3/E1/E2/O1)
283
+ const currentV2 = getCurrentDimensionsV2(db, fromDate, toDate);
284
+
285
+ // 3. Sessions
286
+ const sessionList = buildSessionList(db, fromDate, toDate);
287
+
288
+ // 4. Top projects (canonical-key de-duped, top 5 by cost). Pull a
289
+ // larger candidate pool so collapsing duplicates leaves enough rows.
290
+ const topProjectsRaw = getOverviewTopProjects(db, fromDate, toDate, 40);
291
+ const topProjects = mapTopProjects(topProjectsRaw, 5);
292
+
293
+ // 5. Analysis status
294
+ const analysisStatus = buildAnalysisStatus(db, fromDate, toDate);
295
+
296
+ return {
297
+ date,
298
+ stats,
299
+ dimensionsV2: { current: currentV2 },
300
+ sessions: sessionList,
301
+ topProjects,
302
+ analysisStatus,
303
+ };
304
+ }
305
+
306
+ /**
307
+ * Build weekly report data.
308
+ * @param {object} db
309
+ * @param {string} [weekStart] ISO date of Monday, defaults to current week
310
+ * @returns {object}
311
+ */
312
+ function buildWeeklyReport(db, weekStart) {
313
+ const monday = weekStart || _mondayOf();
314
+ const sundayDate = new Date(monday + 'T00:00:00');
315
+ sundayDate.setDate(sundayDate.getDate() + 6);
316
+ const sunday = _fmt(sundayDate);
317
+
318
+ // 1. Stats
319
+ const stats = buildStats(db, monday, sunday);
320
+
321
+ // 2. Dimensions v2 — H1..O1 over the week, H3 over the same rolling window.
322
+ const currentV2 = getCurrentDimensionsV2(db, monday, sunday);
323
+
324
+ // 3. Sessions
325
+ const sessionList = buildSessionList(db, monday, sunday);
326
+
327
+ // 4. Daily breakdown (one summary per day)
328
+ const dailyBreakdown = getDailySummaries(db, monday, sunday).map((s) => ({
329
+ date: s.date,
330
+ sessions: s.session_count || 0,
331
+ cost: Math.round((s.cost_usd || 0) * 100) / 100,
332
+ activeMinutes: s.active_minutes || 0,
333
+ errors: s.error_count || 0,
334
+ }));
335
+
336
+ // 5. Top projects within the week (canonical-key de-duped, top 5 by cost).
337
+ const topProjectsRaw = getOverviewTopProjects(db, monday, sunday, 40);
338
+ const topProjects = mapTopProjects(topProjectsRaw, 5);
339
+
340
+ // 6. Analysis status
341
+ const analysisStatus = buildAnalysisStatus(db, monday, sunday);
342
+
343
+ return {
344
+ weekStart: monday,
345
+ weekEnd: sunday,
346
+ stats,
347
+ dimensionsV2: { current: currentV2 },
348
+ sessions: sessionList,
349
+ topProjects,
350
+ dailyBreakdown,
351
+ analysisStatus,
352
+ };
353
+ }
354
+
355
+ /**
356
+ * Build session detail data.
357
+ * @param {object} db
358
+ * @param {string} sessionId
359
+ * @returns {object|null} null if session not found
360
+ */
361
+ function buildSessionDetail(db, sessionId) {
362
+ const session = getSessionById(db, sessionId);
363
+ if (!session) return null;
364
+
365
+ const analysis = getAnalysisBySession(db, sessionId);
366
+
367
+ // v2: parse sub_scores_v2 if present.
368
+ let subScoresV2 = null;
369
+ let subLevelsV2 = null;
370
+ let subEvidenceV2 = null;
371
+ if (analysis && analysis.sub_scores_v2) {
372
+ try {
373
+ const parsed = JSON.parse(analysis.sub_scores_v2);
374
+ subScoresV2 = parsed.subScores || null;
375
+ subLevelsV2 = parsed.subLevels || null;
376
+ subEvidenceV2 = parsed.subEvidence || null;
377
+ } catch (_e) { /* ignore */ }
378
+ }
379
+
380
+ // v2: LLM judge payload — raw 0..1 values, per-field scoring evidence
381
+ // (details) and the one-line rationale, keyed e1/o1.
382
+ let llmJudgeV2 = null;
383
+ if (analysis && analysis.llm_judge_v2) {
384
+ try { llmJudgeV2 = JSON.parse(analysis.llm_judge_v2); }
385
+ catch (_e) { /* ignore */ }
386
+ }
387
+
388
+ // Per-session AI advice (see server/llm/advice.js). Bundled into the
389
+ // session-detail response so the SPA renders cached suggestions on the
390
+ // first paint without a second round-trip. null if never generated.
391
+ let advice = null;
392
+ if (analysis && analysis.llm_advice) {
393
+ try { advice = JSON.parse(analysis.llm_advice); }
394
+ catch (_e) { /* ignore */ }
395
+ }
396
+
397
+ // Tool call breakdown
398
+ const toolBreakdown = queryAll(
399
+ db,
400
+ `SELECT tool_name, COUNT(*) AS total,
401
+ SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS errors
402
+ FROM unified_tool_call
403
+ WHERE session_id = ?
404
+ GROUP BY tool_name
405
+ ORDER BY total DESC`,
406
+ [sessionId]
407
+ );
408
+
409
+ // Per-tool sample invocations (up to N most recent) for UI hover details.
410
+ // We fetch a flat list ordered by tool then time-desc, then bucket in JS so
411
+ // we don't issue one query per tool.
412
+ const SAMPLE_LIMIT_PER_TOOL = 5;
413
+ const allCalls = queryAll(
414
+ db,
415
+ `SELECT tool_name, status, error_message, target_file, timestamp
416
+ FROM unified_tool_call
417
+ WHERE session_id = ?
418
+ ORDER BY tool_name ASC, timestamp DESC`,
419
+ [sessionId]
420
+ );
421
+ const samplesByTool = {};
422
+ for (const c of allCalls) {
423
+ const list = samplesByTool[c.tool_name] || (samplesByTool[c.tool_name] = []);
424
+ if (list.length < SAMPLE_LIMIT_PER_TOOL) {
425
+ list.push({
426
+ timestamp: c.timestamp,
427
+ status: c.status,
428
+ errorMessage: c.error_message,
429
+ targetFile: c.target_file,
430
+ });
431
+ }
432
+ }
433
+
434
+ // Message timeline summary + full transcript (for the "原始对话" panel).
435
+ // `text` is the 4KB-capped payload the ETL writes; tool calls live in
436
+ // unified_tool_call and we mix them into the same timeline below.
437
+ const messages = queryAll(
438
+ db,
439
+ `SELECT id, role, content_length, is_error, timestamp, text, model_id, tokens_input, tokens_output
440
+ FROM unified_message
441
+ WHERE session_id = ?
442
+ ORDER BY timestamp`,
443
+ [sessionId]
444
+ );
445
+
446
+ const userMessages = messages.filter((m) => m.role === 'user');
447
+ const assistantMessages = messages.filter((m) => m.role === 'assistant');
448
+
449
+ // Interleave messages and tool calls by timestamp so the transcript
450
+ // reads in true chronological order.
451
+ const toolCallsForTimeline = queryAll(
452
+ db,
453
+ `SELECT id, tool_name, status, error_message, target_file, timestamp
454
+ FROM unified_tool_call
455
+ WHERE session_id = ?
456
+ ORDER BY timestamp`,
457
+ [sessionId]
458
+ );
459
+
460
+ const transcript = [
461
+ ...messages.map((m) => ({
462
+ kind: 'message',
463
+ id: m.id,
464
+ role: m.role,
465
+ text: m.text || '',
466
+ isError: !!m.is_error,
467
+ timestamp: m.timestamp,
468
+ contentLength: m.content_length || 0,
469
+ modelId: m.model_id || null,
470
+ tokensInput: m.tokens_input || 0,
471
+ tokensOutput: m.tokens_output || 0,
472
+ })),
473
+ ...toolCallsForTimeline.map((t) => ({
474
+ kind: 'tool',
475
+ id: t.id,
476
+ tool: t.tool_name,
477
+ status: t.status,
478
+ errorMessage: t.error_message,
479
+ targetFile: t.target_file,
480
+ timestamp: t.timestamp,
481
+ })),
482
+ ].sort((a, b) => (a.timestamp < b.timestamp ? -1 : a.timestamp > b.timestamp ? 1 : 0));
483
+
484
+ return {
485
+ id: session.id,
486
+ source: session.source,
487
+ date: session.date,
488
+ title: session.title || '(untitled)',
489
+ project: session.project,
490
+ model: session.model,
491
+ startedAt: session.started_at,
492
+ endedAt: session.ended_at,
493
+ durationMinutes: session.duration_minutes || 0,
494
+ activeMinutes: session.active_minutes || 0,
495
+ cost: Math.round((session.cost_usd || 0) * 100) / 100,
496
+ tokens: {
497
+ input: session.tokens_input || 0,
498
+ output: session.tokens_output || 0,
499
+ reasoning: session.tokens_reasoning || 0,
500
+ cacheRead: session.tokens_cache_read || 0,
501
+ cacheWrite: session.tokens_cache_write || 0,
502
+ },
503
+ messageCount: session.message_count || 0,
504
+ errorCount: session.error_count || 0,
505
+ toolCallCount: session.tool_call_count || 0,
506
+ reverted: !!session.reverted,
507
+ summary: {
508
+ additions: session.summary_additions || 0,
509
+ deletions: session.summary_deletions || 0,
510
+ files: session.summary_files || 0,
511
+ },
512
+ // v2 capability model — see docs/superpowers/specs/2026-06-13-…
513
+ // ENV is a *display* dimension that fuses E1 + E2 into "AI 能力环境
514
+ // 诊断" so the UI can show 5 axes instead of 6. E1 / E2 stay in the
515
+ // payload for callers who want the raw breakdown.
516
+ dimensionsV2: analysis
517
+ ? {
518
+ H1: analysis.score_h1,
519
+ H2: analysis.score_h2,
520
+ H3: analysis.score_h3,
521
+ E1: analysis.score_e1,
522
+ E2: analysis.score_e2,
523
+ ENV: envScore(analysis.score_e1, analysis.score_e2),
524
+ O1: analysis.score_o1,
525
+ }
526
+ : null,
527
+ levelsV2: analysis
528
+ ? {
529
+ H1: analysis.level_h1,
530
+ H2: analysis.level_h2,
531
+ H3: analysis.level_h3,
532
+ E1: analysis.level_e1,
533
+ E2: analysis.level_e2,
534
+ // ENV level derived from its score band (≥85 L4, ≥65 L3, ≥40 L2)
535
+ ENV: levelFromScore(envScore(analysis.score_e1, analysis.score_e2)),
536
+ O1: analysis.level_o1,
537
+ }
538
+ : null,
539
+ subScoresV2,
540
+ subLevelsV2,
541
+ subEvidenceV2,
542
+ llmJudgeV2,
543
+ advice,
544
+ difficulty: analysis ? analysis.difficulty : null,
545
+ judgeSource: analysis ? analysis.judge_source : null,
546
+ analysisStatus: analysis ? analysis.status : 'pending',
547
+ toolBreakdown: toolBreakdown.map((t) => ({
548
+ tool: t.tool_name,
549
+ total: t.total,
550
+ errors: t.errors,
551
+ samples: samplesByTool[t.tool_name] || [],
552
+ })),
553
+ transcript,
554
+ messageSummary: {
555
+ userCount: userMessages.length,
556
+ assistantCount: assistantMessages.length,
557
+ avgUserLength:
558
+ userMessages.length > 0
559
+ ? Math.round(
560
+ userMessages.reduce((s, m) => s + (m.content_length || 0), 0) /
561
+ userMessages.length
562
+ )
563
+ : 0,
564
+ avgAssistantLength:
565
+ assistantMessages.length > 0
566
+ ? Math.round(
567
+ assistantMessages.reduce((s, m) => s + (m.content_length || 0), 0) /
568
+ assistantMessages.length
569
+ )
570
+ : 0,
571
+ errorMessages: messages.filter((m) => m.is_error).length,
572
+ },
573
+ };
574
+ }
575
+
576
+ module.exports = {
577
+ buildYesterdayReport,
578
+ buildWeeklyReport,
579
+ buildSessionDetail,
580
+ getCurrentDimensionsV2,
581
+ };
@@ -0,0 +1,72 @@
1
+ 'use strict';
2
+
3
+ const { classifySession } = require('./difficulty');
4
+ const problemDef = require('./dimensions/problem-definition');
5
+ const judgement = require('./dimensions/judgement');
6
+ const aiTools = require('./dimensions/ai-tools');
7
+ const aiKnowledge = require('./dimensions/ai-knowledge');
8
+ const outputQuality = require('./dimensions/output-quality');
9
+ const { analyzeSessionLLM } = require('../llm/session-analyzer');
10
+ const { dimensionSource } = require('./dimensions/llm-merge');
11
+ const { rollupDimension, scoreToLevel } = require('./thresholds-v2');
12
+
13
+ /**
14
+ * Score a single session through the v2.1 dimensions.
15
+ * ONE consolidated LLM call per session returns BOTH the dimension scores
16
+ * AND the collaboration advice. All difficulties (including trivial) are
17
+ * assessed; the prompt is difficulty-conditioned so short sessions are
18
+ * graded on an easier curve.
19
+ *
20
+ * @param {object} db
21
+ * @param {object} session
22
+ * @param {object} [opts] { force?: boolean } bypass the analyzer cache
23
+ */
24
+ async function analyzeSessionV2(db, session, opts = {}) {
25
+ const difficulty = classifySession(session);
26
+
27
+ // E2 (tool coverage) is structural — always computed.
28
+ const e2 = safe(() => aiTools.analyze(db, session, difficulty.bucket));
29
+
30
+ // One consolidated LLM call → { scores, advice } (null if disabled/failed).
31
+ let llm = null;
32
+ try { llm = await analyzeSessionLLM(db, session, { force: opts.force === true }); }
33
+ catch (_) { llm = null; }
34
+ const cells = (llm && llm.scores) || {};
35
+
36
+ const h1 = safe(() => problemDef.analyze(db, session, difficulty.bucket, cells.H1));
37
+ const h2 = safe(() => judgement.analyze(db, session, difficulty.bucket, cells.H2));
38
+ const e1 = safe(() => aiKnowledge.analyze(db, session, difficulty.bucket, cells.E1));
39
+ const o1 = safe(() => outputQuality.analyze(db, session, difficulty.bucket, cells.O1));
40
+
41
+ const judgeSrc = dimensionSource([h1, h2, e1, o1].map((r) => r && r.judgeSource));
42
+ return assemble(difficulty, { h1, h2, e1, e2, o1 }, llm, judgeSrc);
43
+ }
44
+
45
+ /** Build the standard result object from the per-dimension scorer outputs. */
46
+ function assemble(difficulty, dims, llm, judgeSource) {
47
+ const { h1, h2, e1, e2, o1 } = dims;
48
+ const pick = (r, f) => (r ? r[f] : null);
49
+
50
+ const subScores = { H1: pick(h1, 'subScores'), H2: pick(h2, 'subScores'), H3: null, E1: pick(e1, 'subScores'), E2: pick(e2, 'subScores'), O1: pick(o1, 'subScores') };
51
+ const subLevels = { H1: pick(h1, 'subLevels'), H2: pick(h2, 'subLevels'), H3: null, E1: pick(e1, 'subLevels'), E2: pick(e2, 'subLevels'), O1: pick(o1, 'subLevels') };
52
+ const subEvidence = { H1: pick(h1, 'subEvidence'), H2: pick(h2, 'subEvidence'), H3: null, E1: pick(e1, 'subEvidence'), E2: pick(e2, 'subEvidence'), O1: pick(o1, 'subEvidence') };
53
+
54
+ const scores = { H1: pick(h1, 'score'), H2: pick(h2, 'score'), H3: null, E1: pick(e1, 'score'), E2: pick(e2, 'score'), O1: pick(o1, 'score') };
55
+ const levels = { H1: scoreToLevel(scores.H1), H2: scoreToLevel(scores.H2), H3: null, E1: scoreToLevel(scores.E1), E2: scoreToLevel(scores.E2), O1: scoreToLevel(scores.O1) };
56
+
57
+ return {
58
+ difficulty, subScores, subLevels, subEvidence, scores, levels, judgeSource,
59
+ // Full analyzer payload (scores+advice) cached verbatim in llm_judge_v2.
60
+ llmJudge: llm || null,
61
+ // Raw advice + the meta the advice normaliser needs (job persists to llm_advice).
62
+ llmAdvice: llm && llm.advice ? llm.advice : null,
63
+ llmAdviceMeta: llm ? { msgCount: llm.msgCount, cli: llm.cli } : null,
64
+ };
65
+ }
66
+
67
+ function safe(fn) {
68
+ try { return fn(); }
69
+ catch (err) { console.warn('[scoring-v2] dimension error:', err.message); return null; }
70
+ }
71
+
72
+ module.exports = { analyzeSessionV2, rollupDimension };