agentboss 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/bin/aboss.js +288 -0
- package/client/dist/assets/index-C1wFD_Vo.css +1 -0
- package/client/dist/assets/index-DBj1Ujlx.js +137 -0
- package/client/dist/index.html +34 -0
- package/package.json +64 -0
- package/server/analysis/daily-aggregator.js +258 -0
- package/server/analysis/difficulty.js +129 -0
- package/server/analysis/dimensions/ai-knowledge.js +172 -0
- package/server/analysis/dimensions/ai-tools.js +161 -0
- package/server/analysis/dimensions/judgement.js +107 -0
- package/server/analysis/dimensions/llm-merge.js +57 -0
- package/server/analysis/dimensions/output-quality.js +167 -0
- package/server/analysis/dimensions/problem-definition.js +104 -0
- package/server/analysis/dimensions/system-thinking.js +225 -0
- package/server/analysis/evidence-builder.js +104 -0
- package/server/analysis/job.js +273 -0
- package/server/analysis/report-builder.js +581 -0
- package/server/analysis/scoring-v2.js +72 -0
- package/server/analysis/text-signals.js +179 -0
- package/server/analysis/thresholds-v2.js +358 -0
- package/server/api/advice.js +124 -0
- package/server/api/analysis.js +141 -0
- package/server/api/execution.js +330 -0
- package/server/api/metrics.js +277 -0
- package/server/api/overview.js +308 -0
- package/server/api/project.js +255 -0
- package/server/api/reports.js +125 -0
- package/server/api/sessions.js +118 -0
- package/server/api/settings.js +119 -0
- package/server/db/connection.js +175 -0
- package/server/db/queries.js +1051 -0
- package/server/db/schema.js +487 -0
- package/server/etl/active-time.js +150 -0
- package/server/etl/backfill-subagents.js +178 -0
- package/server/etl/claude-code.js +826 -0
- package/server/etl/detect.js +341 -0
- package/server/etl/judge-filter.js +117 -0
- package/server/etl/opencode.js +606 -0
- package/server/execution/job.js +662 -0
- package/server/execution/prompt.js +227 -0
- package/server/execution/runner.js +218 -0
- package/server/index.js +94 -0
- package/server/llm/advice-prompt.js +339 -0
- package/server/llm/advice.js +384 -0
- package/server/llm/analysis-prompt.js +162 -0
- package/server/llm/cli-runner.js +249 -0
- package/server/llm/judge-prompts.js +179 -0
- package/server/llm/judge.js +118 -0
- package/server/llm/project-advice-prompt.js +332 -0
- package/server/llm/project-advice.js +491 -0
- package/server/llm/session-analyzer.js +122 -0
- package/server/utils/project.js +80 -0
|
@@ -0,0 +1,581 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Report builder for Agent Boss API responses.
|
|
3
|
+
*
|
|
4
|
+
* Assembles structured report payloads by fetching v2 dimension scores,
|
|
5
|
+
* daily summaries, and session lists from boss.db.
|
|
6
|
+
*
|
|
7
|
+
* @author Felix
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
const {
|
|
11
|
+
queryAll,
|
|
12
|
+
getSessionsByDateRange,
|
|
13
|
+
getSessionById,
|
|
14
|
+
getAnalysisBySession,
|
|
15
|
+
getDailySummaries,
|
|
16
|
+
getAnalysisState,
|
|
17
|
+
getOverviewTopProjects,
|
|
18
|
+
} = require('../db/queries');
|
|
19
|
+
const { mapTopProjects } = require('../utils/project');
|
|
20
|
+
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Date helpers
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Format a Date as YYYY-MM-DD.
|
|
27
|
+
* @param {Date} d
|
|
28
|
+
* @returns {string}
|
|
29
|
+
*/
|
|
30
|
+
function _fmt(d) {
|
|
31
|
+
const y = d.getFullYear();
|
|
32
|
+
const m = String(d.getMonth() + 1).padStart(2, '0');
|
|
33
|
+
const day = String(d.getDate()).padStart(2, '0');
|
|
34
|
+
return `${y}-${m}-${day}`;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Get yesterday's date as YYYY-MM-DD.
|
|
39
|
+
* @returns {string}
|
|
40
|
+
*/
|
|
41
|
+
function _yesterday() {
|
|
42
|
+
const d = new Date();
|
|
43
|
+
d.setDate(d.getDate() - 1);
|
|
44
|
+
return _fmt(d);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Get the Monday of the current ISO week (or the week containing the given
|
|
49
|
+
* date).
|
|
50
|
+
* @param {Date} [d]
|
|
51
|
+
* @returns {string}
|
|
52
|
+
*/
|
|
53
|
+
function _mondayOf(d) {
|
|
54
|
+
const dt = d ? new Date(d) : new Date();
|
|
55
|
+
const day = dt.getDay(); // 0=Sun … 6=Sat
|
|
56
|
+
const diff = day === 0 ? 6 : day - 1;
|
|
57
|
+
dt.setDate(dt.getDate() - diff);
|
|
58
|
+
return _fmt(dt);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Get the date N days ago as YYYY-MM-DD.
|
|
63
|
+
* @param {number} n
|
|
64
|
+
* @returns {string}
|
|
65
|
+
*/
|
|
66
|
+
function _daysAgo(n) {
|
|
67
|
+
const d = new Date();
|
|
68
|
+
d.setDate(d.getDate() - n);
|
|
69
|
+
return _fmt(d);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
// Shared helpers
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Compute current v2 dimensions (H1/H2/E1/E2/O1 averaged across the
|
|
78
|
+
* date range; H3 from the rolling aggregator over the same window).
|
|
79
|
+
*
|
|
80
|
+
* @param {object} db
|
|
81
|
+
* @param {string} fromDate
|
|
82
|
+
* @param {string} toDate
|
|
83
|
+
* @returns {{ H1:number|null, H2:number|null, H3:number|null, E1:number|null, E2:number|null, O1:number|null }}
|
|
84
|
+
*/
|
|
85
|
+
function getCurrentDimensionsV2(db, fromDate, toDate) {
|
|
86
|
+
const sessions = getSessionsByDateRange(db, fromDate, toDate, undefined, 10000, 0);
|
|
87
|
+
|
|
88
|
+
const acc = { H1: { s: 0, n: 0 }, H2: { s: 0, n: 0 }, E1: { s: 0, n: 0 }, E2: { s: 0, n: 0 }, O1: { s: 0, n: 0 } };
|
|
89
|
+
|
|
90
|
+
for (const s of sessions) {
|
|
91
|
+
const a = getAnalysisBySession(db, s.id);
|
|
92
|
+
if (!a || a.status !== 'done') continue;
|
|
93
|
+
for (const [k, col] of [['H1','score_h1'],['H2','score_h2'],['E1','score_e1'],['E2','score_e2'],['O1','score_o1']]) {
|
|
94
|
+
if (a[col] != null) { acc[k].s += a[col]; acc[k].n++; }
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const result = {};
|
|
99
|
+
for (const [k, v] of Object.entries(acc)) {
|
|
100
|
+
result[k] = v.n > 0 ? Math.round((v.s / v.n) * 10) / 10 : null;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// H3 is a rolling aggregate — compute on demand for this window.
|
|
104
|
+
let h3 = null;
|
|
105
|
+
try {
|
|
106
|
+
const { analyzeRange } = require('./dimensions/system-thinking');
|
|
107
|
+
const r = analyzeRange(db, fromDate, toDate);
|
|
108
|
+
h3 = r.score;
|
|
109
|
+
} catch (_) { h3 = null; }
|
|
110
|
+
result.H3 = h3;
|
|
111
|
+
|
|
112
|
+
// ENV = "AI 能力环境诊断" = average of E1 (knowledge) and E2 (tools).
|
|
113
|
+
// Only computed when at least one side is present.
|
|
114
|
+
if (result.E1 != null && result.E2 != null) {
|
|
115
|
+
result.ENV = Math.round(((result.E1 + result.E2) / 2) * 10) / 10;
|
|
116
|
+
} else if (result.E1 != null) {
|
|
117
|
+
result.ENV = result.E1;
|
|
118
|
+
} else if (result.E2 != null) {
|
|
119
|
+
result.ENV = result.E2;
|
|
120
|
+
} else {
|
|
121
|
+
result.ENV = null;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return result;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Combine E1 + E2 into the v2.1 "AI 能力环境诊断" composite for a single
|
|
129
|
+
* session. Returns null if both inputs are null.
|
|
130
|
+
*
|
|
131
|
+
* @param {number|null} e1
|
|
132
|
+
* @param {number|null} e2
|
|
133
|
+
* @returns {number|null}
|
|
134
|
+
*/
|
|
135
|
+
function envScore(e1, e2) {
|
|
136
|
+
if (e1 == null && e2 == null) return null;
|
|
137
|
+
if (e1 == null) return e2;
|
|
138
|
+
if (e2 == null) return e1;
|
|
139
|
+
return Math.round(((e1 + e2) / 2) * 10) / 10;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Map a 0-100 score to the same L1-L4 buckets the rest of v2 uses
|
|
144
|
+
* (≥85 L4 · ≥65 L3 · ≥40 L2 · else L1). Mirrors thresholds-v2#scoreToLevel
|
|
145
|
+
* — duplicated here to avoid pulling a server/analysis dep into the
|
|
146
|
+
* report builder.
|
|
147
|
+
*
|
|
148
|
+
* @param {number|null} score
|
|
149
|
+
* @returns {1|2|3|4|null}
|
|
150
|
+
*/
|
|
151
|
+
function levelFromScore(score) {
|
|
152
|
+
if (score == null) return null;
|
|
153
|
+
if (score >= 85) return 4;
|
|
154
|
+
if (score >= 65) return 3;
|
|
155
|
+
if (score >= 40) return 2;
|
|
156
|
+
return 1;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Build stats summary from daily_summary row(s) or compute from sessions.
|
|
161
|
+
* @param {object} db
|
|
162
|
+
* @param {string} fromDate
|
|
163
|
+
* @param {string} toDate
|
|
164
|
+
* @returns {object}
|
|
165
|
+
*/
|
|
166
|
+
function buildStats(db, fromDate, toDate) {
|
|
167
|
+
const summaries = getDailySummaries(db, fromDate, toDate);
|
|
168
|
+
|
|
169
|
+
let sessions = 0;
|
|
170
|
+
let cost = 0;
|
|
171
|
+
let activeMinutes = 0;
|
|
172
|
+
let totalTokens = 0;
|
|
173
|
+
let errors = 0;
|
|
174
|
+
|
|
175
|
+
for (const s of summaries) {
|
|
176
|
+
sessions += s.session_count || 0;
|
|
177
|
+
cost += s.cost_usd || 0;
|
|
178
|
+
activeMinutes += s.active_minutes || 0;
|
|
179
|
+
totalTokens += (s.tokens_input || 0) + (s.tokens_output || 0) + (s.tokens_reasoning || 0);
|
|
180
|
+
errors += s.error_count || 0;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return {
|
|
184
|
+
sessions,
|
|
185
|
+
cost: Math.round(cost * 100) / 100,
|
|
186
|
+
activeMinutes,
|
|
187
|
+
totalTokens,
|
|
188
|
+
errors,
|
|
189
|
+
avgCost: sessions > 0 ? Math.round((cost / sessions) * 100) / 100 : 0,
|
|
190
|
+
avgActiveMinutes: sessions > 0 ? Math.round(activeMinutes / sessions) : 0,
|
|
191
|
+
avgTokens: sessions > 0 ? Math.round(totalTokens / sessions) : 0,
|
|
192
|
+
avgErrors: sessions > 0 ? Math.round((errors / sessions) * 100) / 100 : 0,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Build session list with per-session scores and metadata.
|
|
198
|
+
*
|
|
199
|
+
* Subagents (parent_session_id IS NOT NULL) are skipped here so the
|
|
200
|
+
* "会话列表" UI shows top-level work only. Aggregate stats on the
|
|
201
|
+
* same page (stats, dimensions, daily_summary, collab bill) are
|
|
202
|
+
* computed by other queries that still include them — see the comment
|
|
203
|
+
* on the parent_session_id column in schema.js for the rationale.
|
|
204
|
+
*
|
|
205
|
+
* @param {object} db
|
|
206
|
+
* @param {string} fromDate
|
|
207
|
+
* @param {string} toDate
|
|
208
|
+
* @returns {object[]}
|
|
209
|
+
*/
|
|
210
|
+
function buildSessionList(db, fromDate, toDate) {
|
|
211
|
+
const sessions = getSessionsByDateRange(db, fromDate, toDate, undefined, 10000, 0);
|
|
212
|
+
const list = [];
|
|
213
|
+
|
|
214
|
+
for (const s of sessions) {
|
|
215
|
+
if (s.parent_session_id) continue; // skip subagents
|
|
216
|
+
const analysis = getAnalysisBySession(db, s.id);
|
|
217
|
+
list.push({
|
|
218
|
+
id: s.id,
|
|
219
|
+
title: s.title || '(untitled)',
|
|
220
|
+
source: s.source,
|
|
221
|
+
// date/startedAt let the UI group sessions by day (weekly report)
|
|
222
|
+
date: s.date,
|
|
223
|
+
startedAt: s.started_at,
|
|
224
|
+
cost: Math.round((s.cost_usd || 0) * 100) / 100,
|
|
225
|
+
duration: s.duration_minutes || 0,
|
|
226
|
+
// v2 main-axis scores (UI averages H1/H2/H3/ENV/O1 into a single
|
|
227
|
+
// composite column). ENV is derived from E1 + E2 client-side via
|
|
228
|
+
// its own column would explode the row width.
|
|
229
|
+
scoreH1: analysis ? analysis.score_h1 : null,
|
|
230
|
+
scoreH2: analysis ? analysis.score_h2 : null,
|
|
231
|
+
scoreH3: analysis ? analysis.score_h3 : null,
|
|
232
|
+
scoreEnv: analysis ? envScore(analysis.score_e1, analysis.score_e2) : null,
|
|
233
|
+
scoreO1: analysis ? analysis.score_o1 : null,
|
|
234
|
+
status: analysis ? analysis.status : 'pending',
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
return list;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Build analysis status from the analysis_state table and session data.
|
|
243
|
+
* @param {object} db
|
|
244
|
+
* @param {string} fromDate
|
|
245
|
+
* @param {string} toDate
|
|
246
|
+
* @returns {{ status: string, analyzedCount: number, totalCount: number }}
|
|
247
|
+
*/
|
|
248
|
+
function buildAnalysisStatus(db, fromDate, toDate) {
|
|
249
|
+
const state = getAnalysisState(db);
|
|
250
|
+
const sessions = getSessionsByDateRange(db, fromDate, toDate, undefined, 10000, 0);
|
|
251
|
+
let analyzed = 0;
|
|
252
|
+
|
|
253
|
+
for (const s of sessions) {
|
|
254
|
+
const a = getAnalysisBySession(db, s.id);
|
|
255
|
+
if (a && a.status === 'done') analyzed++;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return {
|
|
259
|
+
status: state ? state.status : 'idle',
|
|
260
|
+
analyzedCount: analyzed,
|
|
261
|
+
totalCount: sessions.length,
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// ---------------------------------------------------------------------------
|
|
266
|
+
// Public API
|
|
267
|
+
// ---------------------------------------------------------------------------
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Build yesterday report data.
|
|
271
|
+
* @param {object} db sql.js Database instance
|
|
272
|
+
* @returns {object} Report payload for the API
|
|
273
|
+
*/
|
|
274
|
+
function buildYesterdayReport(db) {
|
|
275
|
+
const date = _yesterday();
|
|
276
|
+
const fromDate = date;
|
|
277
|
+
const toDate = date;
|
|
278
|
+
|
|
279
|
+
// 1. Stats
|
|
280
|
+
const stats = buildStats(db, fromDate, toDate);
|
|
281
|
+
|
|
282
|
+
// 2. Dimensions v2 (H1/H2/H3/E1/E2/O1)
|
|
283
|
+
const currentV2 = getCurrentDimensionsV2(db, fromDate, toDate);
|
|
284
|
+
|
|
285
|
+
// 3. Sessions
|
|
286
|
+
const sessionList = buildSessionList(db, fromDate, toDate);
|
|
287
|
+
|
|
288
|
+
// 4. Top projects (canonical-key de-duped, top 5 by cost). Pull a
|
|
289
|
+
// larger candidate pool so collapsing duplicates leaves enough rows.
|
|
290
|
+
const topProjectsRaw = getOverviewTopProjects(db, fromDate, toDate, 40);
|
|
291
|
+
const topProjects = mapTopProjects(topProjectsRaw, 5);
|
|
292
|
+
|
|
293
|
+
// 5. Analysis status
|
|
294
|
+
const analysisStatus = buildAnalysisStatus(db, fromDate, toDate);
|
|
295
|
+
|
|
296
|
+
return {
|
|
297
|
+
date,
|
|
298
|
+
stats,
|
|
299
|
+
dimensionsV2: { current: currentV2 },
|
|
300
|
+
sessions: sessionList,
|
|
301
|
+
topProjects,
|
|
302
|
+
analysisStatus,
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Build weekly report data.
|
|
308
|
+
* @param {object} db
|
|
309
|
+
* @param {string} [weekStart] ISO date of Monday, defaults to current week
|
|
310
|
+
* @returns {object}
|
|
311
|
+
*/
|
|
312
|
+
function buildWeeklyReport(db, weekStart) {
|
|
313
|
+
const monday = weekStart || _mondayOf();
|
|
314
|
+
const sundayDate = new Date(monday + 'T00:00:00');
|
|
315
|
+
sundayDate.setDate(sundayDate.getDate() + 6);
|
|
316
|
+
const sunday = _fmt(sundayDate);
|
|
317
|
+
|
|
318
|
+
// 1. Stats
|
|
319
|
+
const stats = buildStats(db, monday, sunday);
|
|
320
|
+
|
|
321
|
+
// 2. Dimensions v2 — H1..O1 over the week, H3 over the same rolling window.
|
|
322
|
+
const currentV2 = getCurrentDimensionsV2(db, monday, sunday);
|
|
323
|
+
|
|
324
|
+
// 3. Sessions
|
|
325
|
+
const sessionList = buildSessionList(db, monday, sunday);
|
|
326
|
+
|
|
327
|
+
// 4. Daily breakdown (one summary per day)
|
|
328
|
+
const dailyBreakdown = getDailySummaries(db, monday, sunday).map((s) => ({
|
|
329
|
+
date: s.date,
|
|
330
|
+
sessions: s.session_count || 0,
|
|
331
|
+
cost: Math.round((s.cost_usd || 0) * 100) / 100,
|
|
332
|
+
activeMinutes: s.active_minutes || 0,
|
|
333
|
+
errors: s.error_count || 0,
|
|
334
|
+
}));
|
|
335
|
+
|
|
336
|
+
// 5. Top projects within the week (canonical-key de-duped, top 5 by cost).
|
|
337
|
+
const topProjectsRaw = getOverviewTopProjects(db, monday, sunday, 40);
|
|
338
|
+
const topProjects = mapTopProjects(topProjectsRaw, 5);
|
|
339
|
+
|
|
340
|
+
// 6. Analysis status
|
|
341
|
+
const analysisStatus = buildAnalysisStatus(db, monday, sunday);
|
|
342
|
+
|
|
343
|
+
return {
|
|
344
|
+
weekStart: monday,
|
|
345
|
+
weekEnd: sunday,
|
|
346
|
+
stats,
|
|
347
|
+
dimensionsV2: { current: currentV2 },
|
|
348
|
+
sessions: sessionList,
|
|
349
|
+
topProjects,
|
|
350
|
+
dailyBreakdown,
|
|
351
|
+
analysisStatus,
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* Build session detail data.
|
|
357
|
+
* @param {object} db
|
|
358
|
+
* @param {string} sessionId
|
|
359
|
+
* @returns {object|null} null if session not found
|
|
360
|
+
*/
|
|
361
|
+
function buildSessionDetail(db, sessionId) {
|
|
362
|
+
const session = getSessionById(db, sessionId);
|
|
363
|
+
if (!session) return null;
|
|
364
|
+
|
|
365
|
+
const analysis = getAnalysisBySession(db, sessionId);
|
|
366
|
+
|
|
367
|
+
// v2: parse sub_scores_v2 if present.
|
|
368
|
+
let subScoresV2 = null;
|
|
369
|
+
let subLevelsV2 = null;
|
|
370
|
+
let subEvidenceV2 = null;
|
|
371
|
+
if (analysis && analysis.sub_scores_v2) {
|
|
372
|
+
try {
|
|
373
|
+
const parsed = JSON.parse(analysis.sub_scores_v2);
|
|
374
|
+
subScoresV2 = parsed.subScores || null;
|
|
375
|
+
subLevelsV2 = parsed.subLevels || null;
|
|
376
|
+
subEvidenceV2 = parsed.subEvidence || null;
|
|
377
|
+
} catch (_e) { /* ignore */ }
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// v2: LLM judge payload — raw 0..1 values, per-field scoring evidence
|
|
381
|
+
// (details) and the one-line rationale, keyed e1/o1.
|
|
382
|
+
let llmJudgeV2 = null;
|
|
383
|
+
if (analysis && analysis.llm_judge_v2) {
|
|
384
|
+
try { llmJudgeV2 = JSON.parse(analysis.llm_judge_v2); }
|
|
385
|
+
catch (_e) { /* ignore */ }
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// Per-session AI advice (see server/llm/advice.js). Bundled into the
|
|
389
|
+
// session-detail response so the SPA renders cached suggestions on the
|
|
390
|
+
// first paint without a second round-trip. null if never generated.
|
|
391
|
+
let advice = null;
|
|
392
|
+
if (analysis && analysis.llm_advice) {
|
|
393
|
+
try { advice = JSON.parse(analysis.llm_advice); }
|
|
394
|
+
catch (_e) { /* ignore */ }
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// Tool call breakdown
|
|
398
|
+
const toolBreakdown = queryAll(
|
|
399
|
+
db,
|
|
400
|
+
`SELECT tool_name, COUNT(*) AS total,
|
|
401
|
+
SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS errors
|
|
402
|
+
FROM unified_tool_call
|
|
403
|
+
WHERE session_id = ?
|
|
404
|
+
GROUP BY tool_name
|
|
405
|
+
ORDER BY total DESC`,
|
|
406
|
+
[sessionId]
|
|
407
|
+
);
|
|
408
|
+
|
|
409
|
+
// Per-tool sample invocations (up to N most recent) for UI hover details.
|
|
410
|
+
// We fetch a flat list ordered by tool then time-desc, then bucket in JS so
|
|
411
|
+
// we don't issue one query per tool.
|
|
412
|
+
const SAMPLE_LIMIT_PER_TOOL = 5;
|
|
413
|
+
const allCalls = queryAll(
|
|
414
|
+
db,
|
|
415
|
+
`SELECT tool_name, status, error_message, target_file, timestamp
|
|
416
|
+
FROM unified_tool_call
|
|
417
|
+
WHERE session_id = ?
|
|
418
|
+
ORDER BY tool_name ASC, timestamp DESC`,
|
|
419
|
+
[sessionId]
|
|
420
|
+
);
|
|
421
|
+
const samplesByTool = {};
|
|
422
|
+
for (const c of allCalls) {
|
|
423
|
+
const list = samplesByTool[c.tool_name] || (samplesByTool[c.tool_name] = []);
|
|
424
|
+
if (list.length < SAMPLE_LIMIT_PER_TOOL) {
|
|
425
|
+
list.push({
|
|
426
|
+
timestamp: c.timestamp,
|
|
427
|
+
status: c.status,
|
|
428
|
+
errorMessage: c.error_message,
|
|
429
|
+
targetFile: c.target_file,
|
|
430
|
+
});
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// Message timeline summary + full transcript (for the "原始对话" panel).
|
|
435
|
+
// `text` is the 4KB-capped payload the ETL writes; tool calls live in
|
|
436
|
+
// unified_tool_call and we mix them into the same timeline below.
|
|
437
|
+
const messages = queryAll(
|
|
438
|
+
db,
|
|
439
|
+
`SELECT id, role, content_length, is_error, timestamp, text, model_id, tokens_input, tokens_output
|
|
440
|
+
FROM unified_message
|
|
441
|
+
WHERE session_id = ?
|
|
442
|
+
ORDER BY timestamp`,
|
|
443
|
+
[sessionId]
|
|
444
|
+
);
|
|
445
|
+
|
|
446
|
+
const userMessages = messages.filter((m) => m.role === 'user');
|
|
447
|
+
const assistantMessages = messages.filter((m) => m.role === 'assistant');
|
|
448
|
+
|
|
449
|
+
// Interleave messages and tool calls by timestamp so the transcript
|
|
450
|
+
// reads in true chronological order.
|
|
451
|
+
const toolCallsForTimeline = queryAll(
|
|
452
|
+
db,
|
|
453
|
+
`SELECT id, tool_name, status, error_message, target_file, timestamp
|
|
454
|
+
FROM unified_tool_call
|
|
455
|
+
WHERE session_id = ?
|
|
456
|
+
ORDER BY timestamp`,
|
|
457
|
+
[sessionId]
|
|
458
|
+
);
|
|
459
|
+
|
|
460
|
+
const transcript = [
|
|
461
|
+
...messages.map((m) => ({
|
|
462
|
+
kind: 'message',
|
|
463
|
+
id: m.id,
|
|
464
|
+
role: m.role,
|
|
465
|
+
text: m.text || '',
|
|
466
|
+
isError: !!m.is_error,
|
|
467
|
+
timestamp: m.timestamp,
|
|
468
|
+
contentLength: m.content_length || 0,
|
|
469
|
+
modelId: m.model_id || null,
|
|
470
|
+
tokensInput: m.tokens_input || 0,
|
|
471
|
+
tokensOutput: m.tokens_output || 0,
|
|
472
|
+
})),
|
|
473
|
+
...toolCallsForTimeline.map((t) => ({
|
|
474
|
+
kind: 'tool',
|
|
475
|
+
id: t.id,
|
|
476
|
+
tool: t.tool_name,
|
|
477
|
+
status: t.status,
|
|
478
|
+
errorMessage: t.error_message,
|
|
479
|
+
targetFile: t.target_file,
|
|
480
|
+
timestamp: t.timestamp,
|
|
481
|
+
})),
|
|
482
|
+
].sort((a, b) => (a.timestamp < b.timestamp ? -1 : a.timestamp > b.timestamp ? 1 : 0));
|
|
483
|
+
|
|
484
|
+
return {
|
|
485
|
+
id: session.id,
|
|
486
|
+
source: session.source,
|
|
487
|
+
date: session.date,
|
|
488
|
+
title: session.title || '(untitled)',
|
|
489
|
+
project: session.project,
|
|
490
|
+
model: session.model,
|
|
491
|
+
startedAt: session.started_at,
|
|
492
|
+
endedAt: session.ended_at,
|
|
493
|
+
durationMinutes: session.duration_minutes || 0,
|
|
494
|
+
activeMinutes: session.active_minutes || 0,
|
|
495
|
+
cost: Math.round((session.cost_usd || 0) * 100) / 100,
|
|
496
|
+
tokens: {
|
|
497
|
+
input: session.tokens_input || 0,
|
|
498
|
+
output: session.tokens_output || 0,
|
|
499
|
+
reasoning: session.tokens_reasoning || 0,
|
|
500
|
+
cacheRead: session.tokens_cache_read || 0,
|
|
501
|
+
cacheWrite: session.tokens_cache_write || 0,
|
|
502
|
+
},
|
|
503
|
+
messageCount: session.message_count || 0,
|
|
504
|
+
errorCount: session.error_count || 0,
|
|
505
|
+
toolCallCount: session.tool_call_count || 0,
|
|
506
|
+
reverted: !!session.reverted,
|
|
507
|
+
summary: {
|
|
508
|
+
additions: session.summary_additions || 0,
|
|
509
|
+
deletions: session.summary_deletions || 0,
|
|
510
|
+
files: session.summary_files || 0,
|
|
511
|
+
},
|
|
512
|
+
// v2 capability model — see docs/superpowers/specs/2026-06-13-…
|
|
513
|
+
// ENV is a *display* dimension that fuses E1 + E2 into "AI 能力环境
|
|
514
|
+
// 诊断" so the UI can show 5 axes instead of 6. E1 / E2 stay in the
|
|
515
|
+
// payload for callers who want the raw breakdown.
|
|
516
|
+
dimensionsV2: analysis
|
|
517
|
+
? {
|
|
518
|
+
H1: analysis.score_h1,
|
|
519
|
+
H2: analysis.score_h2,
|
|
520
|
+
H3: analysis.score_h3,
|
|
521
|
+
E1: analysis.score_e1,
|
|
522
|
+
E2: analysis.score_e2,
|
|
523
|
+
ENV: envScore(analysis.score_e1, analysis.score_e2),
|
|
524
|
+
O1: analysis.score_o1,
|
|
525
|
+
}
|
|
526
|
+
: null,
|
|
527
|
+
levelsV2: analysis
|
|
528
|
+
? {
|
|
529
|
+
H1: analysis.level_h1,
|
|
530
|
+
H2: analysis.level_h2,
|
|
531
|
+
H3: analysis.level_h3,
|
|
532
|
+
E1: analysis.level_e1,
|
|
533
|
+
E2: analysis.level_e2,
|
|
534
|
+
// ENV level derived from its score band (≥85 L4, ≥65 L3, ≥40 L2)
|
|
535
|
+
ENV: levelFromScore(envScore(analysis.score_e1, analysis.score_e2)),
|
|
536
|
+
O1: analysis.level_o1,
|
|
537
|
+
}
|
|
538
|
+
: null,
|
|
539
|
+
subScoresV2,
|
|
540
|
+
subLevelsV2,
|
|
541
|
+
subEvidenceV2,
|
|
542
|
+
llmJudgeV2,
|
|
543
|
+
advice,
|
|
544
|
+
difficulty: analysis ? analysis.difficulty : null,
|
|
545
|
+
judgeSource: analysis ? analysis.judge_source : null,
|
|
546
|
+
analysisStatus: analysis ? analysis.status : 'pending',
|
|
547
|
+
toolBreakdown: toolBreakdown.map((t) => ({
|
|
548
|
+
tool: t.tool_name,
|
|
549
|
+
total: t.total,
|
|
550
|
+
errors: t.errors,
|
|
551
|
+
samples: samplesByTool[t.tool_name] || [],
|
|
552
|
+
})),
|
|
553
|
+
transcript,
|
|
554
|
+
messageSummary: {
|
|
555
|
+
userCount: userMessages.length,
|
|
556
|
+
assistantCount: assistantMessages.length,
|
|
557
|
+
avgUserLength:
|
|
558
|
+
userMessages.length > 0
|
|
559
|
+
? Math.round(
|
|
560
|
+
userMessages.reduce((s, m) => s + (m.content_length || 0), 0) /
|
|
561
|
+
userMessages.length
|
|
562
|
+
)
|
|
563
|
+
: 0,
|
|
564
|
+
avgAssistantLength:
|
|
565
|
+
assistantMessages.length > 0
|
|
566
|
+
? Math.round(
|
|
567
|
+
assistantMessages.reduce((s, m) => s + (m.content_length || 0), 0) /
|
|
568
|
+
assistantMessages.length
|
|
569
|
+
)
|
|
570
|
+
: 0,
|
|
571
|
+
errorMessages: messages.filter((m) => m.is_error).length,
|
|
572
|
+
},
|
|
573
|
+
};
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
module.exports = {
|
|
577
|
+
buildYesterdayReport,
|
|
578
|
+
buildWeeklyReport,
|
|
579
|
+
buildSessionDetail,
|
|
580
|
+
getCurrentDimensionsV2,
|
|
581
|
+
};
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { classifySession } = require('./difficulty');
|
|
4
|
+
const problemDef = require('./dimensions/problem-definition');
|
|
5
|
+
const judgement = require('./dimensions/judgement');
|
|
6
|
+
const aiTools = require('./dimensions/ai-tools');
|
|
7
|
+
const aiKnowledge = require('./dimensions/ai-knowledge');
|
|
8
|
+
const outputQuality = require('./dimensions/output-quality');
|
|
9
|
+
const { analyzeSessionLLM } = require('../llm/session-analyzer');
|
|
10
|
+
const { dimensionSource } = require('./dimensions/llm-merge');
|
|
11
|
+
const { rollupDimension, scoreToLevel } = require('./thresholds-v2');
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Score a single session through the v2.1 dimensions.
|
|
15
|
+
* ONE consolidated LLM call per session returns BOTH the dimension scores
|
|
16
|
+
* AND the collaboration advice. All difficulties (including trivial) are
|
|
17
|
+
* assessed; the prompt is difficulty-conditioned so short sessions are
|
|
18
|
+
* graded on an easier curve.
|
|
19
|
+
*
|
|
20
|
+
* @param {object} db
|
|
21
|
+
* @param {object} session
|
|
22
|
+
* @param {object} [opts] { force?: boolean } bypass the analyzer cache
|
|
23
|
+
*/
|
|
24
|
+
async function analyzeSessionV2(db, session, opts = {}) {
|
|
25
|
+
const difficulty = classifySession(session);
|
|
26
|
+
|
|
27
|
+
// E2 (tool coverage) is structural — always computed.
|
|
28
|
+
const e2 = safe(() => aiTools.analyze(db, session, difficulty.bucket));
|
|
29
|
+
|
|
30
|
+
// One consolidated LLM call → { scores, advice } (null if disabled/failed).
|
|
31
|
+
let llm = null;
|
|
32
|
+
try { llm = await analyzeSessionLLM(db, session, { force: opts.force === true }); }
|
|
33
|
+
catch (_) { llm = null; }
|
|
34
|
+
const cells = (llm && llm.scores) || {};
|
|
35
|
+
|
|
36
|
+
const h1 = safe(() => problemDef.analyze(db, session, difficulty.bucket, cells.H1));
|
|
37
|
+
const h2 = safe(() => judgement.analyze(db, session, difficulty.bucket, cells.H2));
|
|
38
|
+
const e1 = safe(() => aiKnowledge.analyze(db, session, difficulty.bucket, cells.E1));
|
|
39
|
+
const o1 = safe(() => outputQuality.analyze(db, session, difficulty.bucket, cells.O1));
|
|
40
|
+
|
|
41
|
+
const judgeSrc = dimensionSource([h1, h2, e1, o1].map((r) => r && r.judgeSource));
|
|
42
|
+
return assemble(difficulty, { h1, h2, e1, e2, o1 }, llm, judgeSrc);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Build the standard result object from the per-dimension scorer outputs. */
|
|
46
|
+
function assemble(difficulty, dims, llm, judgeSource) {
|
|
47
|
+
const { h1, h2, e1, e2, o1 } = dims;
|
|
48
|
+
const pick = (r, f) => (r ? r[f] : null);
|
|
49
|
+
|
|
50
|
+
const subScores = { H1: pick(h1, 'subScores'), H2: pick(h2, 'subScores'), H3: null, E1: pick(e1, 'subScores'), E2: pick(e2, 'subScores'), O1: pick(o1, 'subScores') };
|
|
51
|
+
const subLevels = { H1: pick(h1, 'subLevels'), H2: pick(h2, 'subLevels'), H3: null, E1: pick(e1, 'subLevels'), E2: pick(e2, 'subLevels'), O1: pick(o1, 'subLevels') };
|
|
52
|
+
const subEvidence = { H1: pick(h1, 'subEvidence'), H2: pick(h2, 'subEvidence'), H3: null, E1: pick(e1, 'subEvidence'), E2: pick(e2, 'subEvidence'), O1: pick(o1, 'subEvidence') };
|
|
53
|
+
|
|
54
|
+
const scores = { H1: pick(h1, 'score'), H2: pick(h2, 'score'), H3: null, E1: pick(e1, 'score'), E2: pick(e2, 'score'), O1: pick(o1, 'score') };
|
|
55
|
+
const levels = { H1: scoreToLevel(scores.H1), H2: scoreToLevel(scores.H2), H3: null, E1: scoreToLevel(scores.E1), E2: scoreToLevel(scores.E2), O1: scoreToLevel(scores.O1) };
|
|
56
|
+
|
|
57
|
+
return {
|
|
58
|
+
difficulty, subScores, subLevels, subEvidence, scores, levels, judgeSource,
|
|
59
|
+
// Full analyzer payload (scores+advice) cached verbatim in llm_judge_v2.
|
|
60
|
+
llmJudge: llm || null,
|
|
61
|
+
// Raw advice + the meta the advice normaliser needs (job persists to llm_advice).
|
|
62
|
+
llmAdvice: llm && llm.advice ? llm.advice : null,
|
|
63
|
+
llmAdviceMeta: llm ? { msgCount: llm.msgCount, cli: llm.cli } : null,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function safe(fn) {
|
|
68
|
+
try { return fn(); }
|
|
69
|
+
catch (err) { console.warn('[scoring-v2] dimension error:', err.message); return null; }
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
module.exports = { analyzeSessionV2, rollupDimension };
|