agentboss 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +34 -0
  2. package/bin/aboss.js +288 -0
  3. package/client/dist/assets/index-C1wFD_Vo.css +1 -0
  4. package/client/dist/assets/index-DBj1Ujlx.js +137 -0
  5. package/client/dist/index.html +34 -0
  6. package/package.json +64 -0
  7. package/server/analysis/daily-aggregator.js +258 -0
  8. package/server/analysis/difficulty.js +129 -0
  9. package/server/analysis/dimensions/ai-knowledge.js +172 -0
  10. package/server/analysis/dimensions/ai-tools.js +161 -0
  11. package/server/analysis/dimensions/judgement.js +107 -0
  12. package/server/analysis/dimensions/llm-merge.js +57 -0
  13. package/server/analysis/dimensions/output-quality.js +167 -0
  14. package/server/analysis/dimensions/problem-definition.js +104 -0
  15. package/server/analysis/dimensions/system-thinking.js +225 -0
  16. package/server/analysis/evidence-builder.js +104 -0
  17. package/server/analysis/job.js +273 -0
  18. package/server/analysis/report-builder.js +581 -0
  19. package/server/analysis/scoring-v2.js +72 -0
  20. package/server/analysis/text-signals.js +179 -0
  21. package/server/analysis/thresholds-v2.js +358 -0
  22. package/server/api/advice.js +124 -0
  23. package/server/api/analysis.js +141 -0
  24. package/server/api/execution.js +330 -0
  25. package/server/api/metrics.js +277 -0
  26. package/server/api/overview.js +308 -0
  27. package/server/api/project.js +255 -0
  28. package/server/api/reports.js +125 -0
  29. package/server/api/sessions.js +118 -0
  30. package/server/api/settings.js +119 -0
  31. package/server/db/connection.js +175 -0
  32. package/server/db/queries.js +1051 -0
  33. package/server/db/schema.js +487 -0
  34. package/server/etl/active-time.js +150 -0
  35. package/server/etl/backfill-subagents.js +178 -0
  36. package/server/etl/claude-code.js +826 -0
  37. package/server/etl/detect.js +341 -0
  38. package/server/etl/judge-filter.js +117 -0
  39. package/server/etl/opencode.js +606 -0
  40. package/server/execution/job.js +662 -0
  41. package/server/execution/prompt.js +227 -0
  42. package/server/execution/runner.js +218 -0
  43. package/server/index.js +94 -0
  44. package/server/llm/advice-prompt.js +339 -0
  45. package/server/llm/advice.js +384 -0
  46. package/server/llm/analysis-prompt.js +162 -0
  47. package/server/llm/cli-runner.js +249 -0
  48. package/server/llm/judge-prompts.js +179 -0
  49. package/server/llm/judge.js +118 -0
  50. package/server/llm/project-advice-prompt.js +332 -0
  51. package/server/llm/project-advice.js +491 -0
  52. package/server/llm/session-analyzer.js +122 -0
  53. package/server/utils/project.js +80 -0
@@ -0,0 +1,384 @@
1
+ /**
2
+ * High-level entry point for per-session AI advice generation.
3
+ *
4
+ * Pipeline (see docs/superpowers/specs/2026-06-13-session-advice-design.md §3.2):
5
+ *
6
+ * 1. settings gate — reuses enable_llm_judge
7
+ * 2. cache check — session_analysis.llm_advice with VERSION + msgCount keys
8
+ * 3. CLI detection — opencode > claude
9
+ * 4. context assembly — session row + messages + tool-call summary
10
+ * (NO scores, NO sub-scores — see advice-prompt.js
11
+ * header for why)
12
+ * 5. truncateContext — keep prompt within 80 KB
13
+ * 6. runJudge under withSlot — 90 s timeout, JSON parsing, sentinel
14
+ * 7. persist — UPDATE session_analysis SET llm_advice = ?
15
+ *
16
+ * Failures bubble up as `{ ok: false, reason }` so the API layer can map
17
+ * them onto HTTP codes; nothing in here throws on normal failures.
18
+ *
19
+ * @author Felix
20
+ */
21
+
22
+ 'use strict';
23
+
24
+ const {
25
+ detectAvailableCli,
26
+ runJudge,
27
+ withSlot,
28
+ } = require('./cli-runner');
29
+ const {
30
+ ADVICE_PROMPT_VERSION,
31
+ buildAdvicePrompt,
32
+ truncateContext,
33
+ } = require('./advice-prompt');
34
+ const {
35
+ queryAll,
36
+ queryOne,
37
+ getSessionById,
38
+ getMessagesBySession,
39
+ } = require('../db/queries');
40
+ const { saveDb } = require('../db/connection');
41
+
42
+ // ---------------------------------------------------------------------------
43
+ // Settings gate — reuse the existing LLM-judge toggle so users don't have
44
+ // to flip a second switch. Tiny in-process cache (10 s) avoids hitting
45
+ // the DB for every button click.
46
+ // ---------------------------------------------------------------------------
47
+
48
+ let _settingsCache = null;
49
+ let _settingsCacheAt = 0;
50
+ const SETTINGS_TTL_MS = 10_000;
51
+
52
+ function getSettings(db) {
53
+ const now = Date.now();
54
+ if (_settingsCache && now - _settingsCacheAt < SETTINGS_TTL_MS) {
55
+ return _settingsCache;
56
+ }
57
+ const rows = db.exec(
58
+ "SELECT key, value FROM user_settings WHERE key = 'enable_llm_judge'"
59
+ );
60
+ let enable = false;
61
+ if (rows[0]) {
62
+ for (const [, v] of rows[0].values) {
63
+ enable = String(v) === '1' || String(v).toLowerCase() === 'true';
64
+ }
65
+ }
66
+ _settingsCache = { enable_llm_judge: enable };
67
+ _settingsCacheAt = now;
68
+ return _settingsCache;
69
+ }
70
+
71
+ /** Public: drop the settings cache (used by PUT /api/settings). */
72
+ function invalidateAdviceSettingsCache() {
73
+ _settingsCache = null;
74
+ }
75
+
76
+ // ---------------------------------------------------------------------------
77
+ // Cache load / store
78
+ // ---------------------------------------------------------------------------
79
+
80
+ /**
81
+ * Return the cached llm_advice JSON for a session, or null if missing /
82
+ * unparseable. Does NOT enforce version/msgCount freshness — that's
83
+ * the caller's job (we want loadAdvice to be a pure read for the API
84
+ * GET /api/advice/session/:id route as well).
85
+ */
86
+ function loadAdvice(db, sessionId) {
87
+ const row = queryOne(
88
+ db,
89
+ 'SELECT llm_advice FROM session_analysis WHERE session_id = ?',
90
+ [sessionId]
91
+ );
92
+ if (!row || !row.llm_advice) return null;
93
+ try { return JSON.parse(row.llm_advice); }
94
+ catch { return null; }
95
+ }
96
+
97
+ /**
98
+ * Decide whether a cached payload is still usable. Mirrors judge.js.
99
+ */
100
+ function isCacheFresh(cache, currentMsgCount) {
101
+ return !!cache
102
+ && cache.v === ADVICE_PROMPT_VERSION
103
+ && cache.msgCount === currentMsgCount;
104
+ }
105
+
106
+ /**
107
+ * Persist the payload to session_analysis.llm_advice.
108
+ *
109
+ * We DON'T use upsertSessionAnalysis because that would overwrite every
110
+ * other column; instead we INSERT a new row if missing, otherwise UPDATE
111
+ * only this single column. saveDb() flushes the in-memory sql.js DB to
112
+ * disk so a crash before the next auto-save still keeps the result.
113
+ */
114
+ function storeAdvice(db, sessionId, payload) {
115
+ const exists = queryOne(
116
+ db,
117
+ 'SELECT 1 FROM session_analysis WHERE session_id = ?',
118
+ [sessionId]
119
+ );
120
+ const json = JSON.stringify(payload);
121
+ if (exists) {
122
+ db.run(
123
+ 'UPDATE session_analysis SET llm_advice = ? WHERE session_id = ?',
124
+ [json, sessionId]
125
+ );
126
+ } else {
127
+ db.run(
128
+ `INSERT INTO session_analysis (session_id, status, llm_advice)
129
+ VALUES (?, 'pending', ?)`,
130
+ [sessionId, json]
131
+ );
132
+ }
133
+ // Best-effort flush; ignore failure (auto-save will catch up).
134
+ try { saveDb(); } catch { /* noop */ }
135
+ }
136
+
137
+ // ---------------------------------------------------------------------------
138
+ // Context assembly
139
+ // ---------------------------------------------------------------------------
140
+
141
+ /**
142
+ * Build the AdviceContext object that buildAdvicePrompt() expects.
143
+ *
144
+ * Pulls:
145
+ * - unified_session row — meta only (model, duration, tokens, cost)
146
+ * - unified_message rows — full transcript (with .text payload —
147
+ * may be NULL on legacy ETL data)
148
+ * - tool-call breakdown — Top-20 by call count
149
+ *
150
+ * Deliberately does NOT pull anything from session_analysis: no scores,
151
+ * no sub-scores, no difficulty. See advice-prompt.js header for why
152
+ * — we want the model to evaluate the conversation directly, not to
153
+ * launder our rule-based numbers through an LLM.
154
+ */
155
+ function assembleContext(db, sessionId) {
156
+ const session = getSessionById(db, sessionId);
157
+ if (!session) return null;
158
+
159
+ const messagesRaw = getMessagesBySession(db, sessionId);
160
+ const messages = messagesRaw.map((m) => ({
161
+ role: m.role,
162
+ text: m.text || '',
163
+ }));
164
+ const userCount = messages.filter((m) => m.role === 'user').length;
165
+ const assistantCount = messages.filter((m) => m.role === 'assistant').length;
166
+
167
+ // Tool calls: aggregate to Top-20 with counts and error counts.
168
+ const toolRows = queryAll(
169
+ db,
170
+ `SELECT tool_name,
171
+ COUNT(*) AS count,
172
+ SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS error_count,
173
+ MAX(COALESCE(target_file, '')) AS args_preview
174
+ FROM unified_tool_call
175
+ WHERE session_id = ?
176
+ GROUP BY tool_name
177
+ ORDER BY count DESC
178
+ LIMIT 20`,
179
+ [sessionId]
180
+ );
181
+ const toolBreakdown = toolRows.map((t) => ({
182
+ name: t.tool_name,
183
+ count: t.count,
184
+ errorCount: t.error_count,
185
+ avgDurationMs: 0, // not currently tracked by ETL — keep field for spec parity
186
+ argsPreview: t.args_preview || '',
187
+ }));
188
+
189
+ return {
190
+ session: {
191
+ id: session.id,
192
+ model: session.model,
193
+ // difficulty intentionally omitted — it's a derived/computed value
194
+ // that the model could mistake for a score. Surface only facts.
195
+ difficulty: null,
196
+ durationMinutes: session.duration_minutes || 0,
197
+ cost: session.cost_usd || 0,
198
+ tokens: {
199
+ input: session.tokens_input || 0,
200
+ output: session.tokens_output || 0,
201
+ reasoning: session.tokens_reasoning || 0,
202
+ cacheRead: session.tokens_cache_read || 0,
203
+ cacheWrite: session.tokens_cache_write || 0,
204
+ },
205
+ errorCount: session.error_count || 0,
206
+ toolCallCount: session.tool_call_count || 0,
207
+ messageCount: session.message_count || messages.length,
208
+ userCount,
209
+ assistantCount,
210
+ reverted: !!session.reverted,
211
+ },
212
+ toolBreakdown,
213
+ messages,
214
+ truncated: false,
215
+ omittedMessages: 0,
216
+ };
217
+ }
218
+
219
+ // ---------------------------------------------------------------------------
220
+ // Public: generateAdvice
221
+ // ---------------------------------------------------------------------------
222
+
223
+ /**
224
+ * Produce (or return cached) advice for a session.
225
+ *
226
+ * @param {Object} db
227
+ * @param {string} sessionId
228
+ * @param {Object} [opts]
229
+ * @param {boolean}[opts.force=false] - bypass the cache and always re-judge
230
+ * @returns {Promise<
231
+ * { ok: true, data: object, fromCache: boolean }
232
+ * | { ok: false, reason: 'no-session'|'llm-disabled'|'no-cli'|'timeout'|'bad-json'|'spawn-error'|'exit-non-zero'|'internal',
233
+ * error?: string }
234
+ * >}
235
+ */
236
+ async function generateAdvice(db, sessionId, opts = {}) {
237
+ const force = opts.force === true;
238
+ const log = (...a) => {
239
+ if (process.env.ABOSS_ADVICE_DEBUG === '1') console.error('[advice]', sessionId, ...a);
240
+ };
241
+
242
+ try {
243
+ // 1. settings gate
244
+ const settings = getSettings(db);
245
+ if (!settings.enable_llm_judge) { log('disabled'); return { ok: false, reason: 'llm-disabled' }; }
246
+
247
+ // 2. assemble ctx first — we need msgCount for the freshness check
248
+ const ctxFull = assembleContext(db, sessionId);
249
+ if (!ctxFull) { log('no-session'); return { ok: false, reason: 'no-session' }; }
250
+ const msgCount = ctxFull.messages.length;
251
+
252
+ if (!force) {
253
+ const cached = loadAdvice(db, sessionId);
254
+ if (isCacheFresh(cached, msgCount)) {
255
+ log('cache hit');
256
+ return { ok: true, data: cached, fromCache: true };
257
+ }
258
+ }
259
+
260
+ // 3. CLI detection
261
+ const cli = await detectAvailableCli();
262
+ if (!cli) { log('no CLI'); return { ok: false, reason: 'no-cli' }; }
263
+
264
+ // 4–5. truncate + build prompt
265
+ const ctx = truncateContext(ctxFull);
266
+ const prompt = buildAdvicePrompt(ctx);
267
+ log('spawning', cli.name, 'prompt bytes=', prompt.length, 'truncated=', ctx.truncated);
268
+
269
+ // 6. run under concurrency slot — 90 s is plenty for prompts up to ~80 KB
270
+ const result = await withSlot(() => runJudge({ prompt, timeoutMs: 90_000 }));
271
+ log('result ok=', result.ok, 'reason=', result.reason);
272
+
273
+ if (!result.ok) {
274
+ // Map cli-runner reasons 1:1 — caller (api layer) translates to HTTP code.
275
+ return { ok: false, reason: result.reason, error: result.error };
276
+ }
277
+
278
+ const normalised = normaliseAdvicePayload(result.data, {
279
+ msgCount,
280
+ cli: result.cli,
281
+ truncated: ctx.truncated,
282
+ omittedMessages: ctx.omittedMessages,
283
+ });
284
+
285
+ // 7. persist
286
+ storeAdvice(db, sessionId, normalised);
287
+
288
+ return { ok: true, data: normalised, fromCache: false };
289
+ } catch (err) {
290
+ log('internal error', err && err.message);
291
+ return { ok: false, reason: 'internal', error: err && err.message };
292
+ }
293
+ }
294
+
295
+ // ---------------------------------------------------------------------------
296
+ // Payload normalisation
297
+ // ---------------------------------------------------------------------------
298
+
299
+ const ALL_CATEGORIES = ['cost', 'accuracy', 'context', 'skills', 'workflow'];
300
+ const ALL_SEVERITIES = ['high', 'medium', 'low'];
301
+ const ALL_EXECUTORS = ['opencode', 'claude', 'manual'];
302
+ const ALL_CWD_HINTS = ['project_root'];
303
+
304
+ /**
305
+ * Be lenient about what the model produces:
306
+ * - missing summary/rationale → empty strings (UI hides empties)
307
+ * - missing category arrays → empty arrays
308
+ * - unknown severities → 'low'
309
+ * - items without evidence → dropped (spec §8 rule)
310
+ * - extra string fields kept → forward-compat
311
+ *
312
+ * v4 (advice prompt VERSION 4) added per-item execution hints:
313
+ * - actionable (bool)
314
+ * - executor ('opencode' | 'claude' | 'manual')
315
+ * - cwd_hint ('project_root')
316
+ * Each defaults / normalises to a safe value if the model omits or
317
+ * garbles them. Inconsistent pairs (manual + actionable=true) are
318
+ * forced to a consistent state so the UI doesn't render an action
319
+ * button that the server would then reject.
320
+ *
321
+ * Also stamps the bookkeeping fields (v, msgCount, cli, cachedAt,
322
+ * truncated, omittedMessages) so the cache layer can do its job.
323
+ */
324
+ function normaliseAdvicePayload(raw, meta) {
325
+ const cats = (raw && typeof raw.categories === 'object' && raw.categories) || {};
326
+ const categories = {};
327
+ for (const key of ALL_CATEGORIES) {
328
+ const arr = Array.isArray(cats[key]) ? cats[key] : [];
329
+ categories[key] = arr
330
+ .map(normaliseItem)
331
+ .filter((it) => it && it.evidence); // drop evidence-less items
332
+ }
333
+ return {
334
+ v: ADVICE_PROMPT_VERSION,
335
+ msgCount: meta.msgCount,
336
+ cli: meta.cli,
337
+ cachedAt: new Date().toISOString(),
338
+ truncated: meta.truncated || false,
339
+ omittedMessages: meta.omittedMessages || 0,
340
+ summary: typeof raw?.summary === 'string' ? raw.summary : '',
341
+ categories,
342
+ rationale: typeof raw?.rationale === 'string' ? raw.rationale : '',
343
+ };
344
+ }
345
+
346
+ function normaliseItem(it) {
347
+ if (!it || typeof it !== 'object') return null;
348
+ const severity = ALL_SEVERITIES.includes(it.severity) ? it.severity : 'low';
349
+
350
+ let executor = ALL_EXECUTORS.includes(it.executor) ? it.executor : 'manual';
351
+ let actionable = it.actionable === true;
352
+ const cwd_hint = ALL_CWD_HINTS.includes(it.cwd_hint) ? it.cwd_hint : 'project_root';
353
+
354
+ // Force consistency: a manual item cannot be actionable, and an
355
+ // actionable item cannot have executor=manual (would render a button
356
+ // that the API would then reject as NOT_ACTIONABLE).
357
+ if (executor === 'manual') actionable = false;
358
+ if (actionable && executor === 'manual') executor = 'opencode';
359
+
360
+ return {
361
+ severity,
362
+ title: typeof it.title === 'string' ? it.title.trim() : '',
363
+ why: typeof it.why === 'string' ? it.why.trim() : '',
364
+ action: typeof it.action === 'string' ? it.action.trim() : '',
365
+ evidence: typeof it.evidence === 'string' ? it.evidence.trim() : '',
366
+ actionable,
367
+ executor,
368
+ cwd_hint,
369
+ };
370
+ }
371
+
372
+ // ---------------------------------------------------------------------------
373
+ // Exports
374
+ // ---------------------------------------------------------------------------
375
+
376
+ module.exports = {
377
+ generateAdvice,
378
+ loadAdvice,
379
+ isCacheFresh,
380
+ invalidateAdviceSettingsCache,
381
+ // exported for tests / debugging:
382
+ assembleContext,
383
+ normaliseAdvicePayload,
384
+ };
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Unified session-analysis prompt — one LLM call that returns BOTH the
3
+ * v2.1 capability scores (H1/H2/E1/O1) AND the per-session collaboration
4
+ * advice, as a single strict-JSON object:
5
+ *
6
+ * { "scores": { H1:{clarity,converge,drift cells}, H2:{...}, E1:{...}, O1:{...} },
7
+ * "advice": { summary, categories:{cost,accuracy,context,skills,workflow}, rationale } }
8
+ *
9
+ * Replaces the two separate calls (judge-prompts.buildSessionJudgePrompt +
10
+ * advice-prompt.buildAdvicePrompt).
11
+ *
12
+ * CRITICAL — score laundering guard: the model produces the scores AND the
13
+ * advice in the same context, but the advice section MUST NOT reference any
14
+ * score / level / dimension key. Earlier (separate-call) experience showed
15
+ * the model otherwise echoes the numbers back as fake "evidence". The
16
+ * `advice` rules below repeat this prohibition; the two JSON sections are
17
+ * independent.
18
+ *
19
+ * @author Felix
20
+ */
21
+
22
+ 'use strict';
23
+
24
+ const { truncateContext } = require('./advice-prompt');
25
+
26
+ /** First line marker so the resulting CLI session is filtered by the ETL
27
+ * (registered in server/etl/judge-filter.js). */
28
+ const ANALYSIS_SENTINEL = '[ABOSS-ANALYZE]';
29
+
30
+ /** Bump when the JSON output contract changes.
31
+ * v2: sub-indicator cells return a granular 0–100 `score` (was `level`). */
32
+ const ANALYSIS_PROMPT_VERSION = 2;
33
+
34
+ const CATEGORIES = ['cost', 'accuracy', 'context', 'skills', 'workflow'];
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // Context formatting (mirrors advice-prompt; kept local so this module is
38
+ // self-contained and changes here can't break advice generation).
39
+ // ---------------------------------------------------------------------------
40
+
41
+ function fmtNum(n) {
42
+ if (n == null || Number.isNaN(n)) return '–';
43
+ if (typeof n !== 'number') return String(n);
44
+ if (Number.isInteger(n)) return n.toLocaleString('en-US');
45
+ return n.toFixed(3);
46
+ }
47
+
48
+ function fmtToolTable(tools) {
49
+ if (!Array.isArray(tools) || tools.length === 0) return '(无工具调用)';
50
+ return tools.slice(0, 20).map((t) =>
51
+ ` ${(t.name || '?').padEnd(20)} count=${String(t.count ?? 0).padStart(4)} ` +
52
+ `err=${String(t.errorCount ?? 0).padStart(3)} ` +
53
+ `args="${(t.argsPreview || '').replace(/\s+/g, ' ').slice(0, 120)}"`
54
+ ).join('\n');
55
+ }
56
+
57
+ function fmtMessages(messages) {
58
+ if (!Array.isArray(messages) || messages.length === 0) return '(无消息)';
59
+ return messages
60
+ .filter((m) => m.text != null && m.text !== '')
61
+ .map((m) => `[${(m.role || '?').toUpperCase().padEnd(9)}] ${m.text}`)
62
+ .join('\n---\n');
63
+ }
64
+
65
+ // ---------------------------------------------------------------------------
66
+ // Rubric (scores) — difficulty-conditioned L1–L4 anchors.
67
+ // ---------------------------------------------------------------------------
68
+
69
+ const RUBRIC = `
70
+ H1 立意(把模糊需求收敛成可执行的精确问题的功力)
71
+ clarity 初始指令清晰度 · converge 收敛效率 · drift 方向稳定性
72
+ L4 开局即给出目标+约束+验收,几乎无需追问;L3 信息基本充分,少量澄清;
73
+ L2 需多轮补全;L1 一句话甩任务、反复改方向。
74
+ H2 判断(不盲从、敢质疑、敢推翻 vs 橡皮图章式照单全收)
75
+ challenge 合理质疑 · override 该推翻时推翻 · accept_rate 采纳前是否有判断
76
+ L4 在该质疑处质疑、在合理处高效采纳,质疑有依据;L3 大体审视偶有盲从;
77
+ L2 多数直接采纳少量质疑;L1 几乎全程"好的/继续"式盲从。
78
+ E1 知识(AI 对该技术栈的掌握)
79
+ domain_errors 领域错误(越少越好)· staleness 过时引用(越少越好)· best_practice 最佳实践
80
+ L4 无错误、无过时、全程最佳实践;L3 偶有小瑕疵;L2 多处问题;L1 频繁错误/过时。
81
+ O1 产出(结果是否真的可用)
82
+ first_take 一次采纳 · code_style 代码规范 · completeness 边界/异常/测试完备
83
+ L4 几乎一次过且规范完备;L3 小修即可;L2 需明显返工;L1 大量返工或缺失完备性。
84
+ `;
85
+
86
+ // ---------------------------------------------------------------------------
87
+ // Prompt
88
+ // ---------------------------------------------------------------------------
89
+
90
+ /**
91
+ * Build the unified analysis prompt.
92
+ *
93
+ * @param {object} ctx advice-style context (assembleContext output) plus a
94
+ * numeric `difficulty` (1-4). Shape:
95
+ * { session:{model,difficulty,durationMinutes,cost,tokens,errorCount,
96
+ * toolCallCount,messageCount,userCount,assistantCount,reverted},
97
+ * toolBreakdown:[...], messages:[{role,text}], truncated, omittedMessages }
98
+ * @returns {string}
99
+ */
100
+ function buildSessionAnalysisPrompt(ctx) {
101
+ const s = ctx.session || {};
102
+ const t = s.tokens || {};
103
+ const difficulty = s.difficulty ?? ctx.difficulty ?? '?';
104
+ const truncatedNote =
105
+ ctx.truncated === 'hard' ? '(注意:会话很长,已强力截断。)'
106
+ : ctx.truncated ? `(注意:中段已省略 ${ctx.omittedMessages} 条消息。)`
107
+ : '';
108
+
109
+ return `${ANALYSIS_SENTINEL}(内部标记,忽略本行)
110
+ 你是一名严格、客观的"人机协作"审计员兼协作教练。下面是一段开发者与 AI 编程助手的会话原文与基础统计。
111
+ 请一次性完成两件事,返回**单个严格 JSON 对象**,不要任何额外文字 / markdown:
112
+ (A) scores —— 按 rubric 给五维能力打分;(B) advice —— 给出可执行的协作改进建议。
113
+
114
+ ================ (A) 评分 rubric(难度越高合格线越宽松,本会话难度 ${difficulty}/4) ================
115
+ ${RUBRIC}
116
+ scores 每个子指标返回三元组:
117
+ - score: 0–100 的数值,**可含一位小数**(如 78.5);证据不足或对话太短无法判断 → null(不要硬给低分)。
118
+ 档位↔分数区间:L4 = 85–100 · L3 = 65–84 · L2 = 40–64 · L1 = 0–39;
119
+ 请在区间内按实际好坏细分到具体分值,**不要只给 25/55/80/95 这种档位中点或整十的死板分**。
120
+ - confidence: 0.0–1.0。
121
+ - evidence: 引用对话中的具体表现(哪条消息、什么内容),说明为何是这个分值。没有证据写"未发现相关证据"。
122
+ 评分客观性:① 不因输出更长/更礼貌而加分;② 证据不足必须 null;③ 难度低不等于做得差。
123
+
124
+ ================ (B) advice —— 只评"如何使用 AI",不评业务对错 ================
125
+ 评估开发者的协作方式(提问清晰度、上下文准备、工具/模型使用、流程节奏、是否该用 skill/subagent、成本)。
126
+ 不要评对话里讨论的具体技术/代码是否正确(那属于代码评审)。
127
+ **关键禁令**:advice 部分禁止引用、推测或编造任何"分数/等级/Lx/子分/H1/H2/E1/O1"字眼;
128
+ 每条 evidence 必须是对话事实(如"第 3 条用户消息只说'改一下',未给文件路径"),不得换算成分数。
129
+ 5 个类别:cost 省钱 · accuracy 协作层提准确(暴露假设/要求自检/加验证,非业务对错)· context 上下文准备 ·
130
+ skills 推荐 opencode skill/subagent · workflow 流程节奏。
131
+ AdviceItem: { severity: high|medium|low, title:≤20字, why:1句, action:下次怎么做(协作动作), evidence:对话事实,
132
+ actionable:bool, executor: opencode|claude|manual, cwd_hint: "project_root" }
133
+ 硬规则:5 个类别键必须存在(无内容给空数组);AdviceItem 总数 ≤ 6,按 severity 由高到低;
134
+ executor='manual' 时 actionable 必须 false。
135
+
136
+ ================ 会话基础(只作事实参考) ================
137
+ 模型 ${s.model || '未知'} · 难度 ${difficulty}/4 · 时长 ${fmtNum(s.durationMinutes)} 分钟 · 已回退 ${s.reverted ? '是' : '否'}
138
+ 消息 ${fmtNum(s.messageCount)}(用户 ${fmtNum(s.userCount)} / 助手 ${fmtNum(s.assistantCount)})· 工具 ${fmtNum(s.toolCallCount)} 次 错误 ${fmtNum(s.errorCount)}
139
+ Token in ${fmtNum(t.input)} / out ${fmtNum(t.output)} / reasoning ${fmtNum(t.reasoning)} / cacheR ${fmtNum(t.cacheRead)} / cacheW ${fmtNum(t.cacheWrite)} · 成本 $${typeof s.cost === 'number' ? s.cost.toFixed(4) : '–'}
140
+
141
+ 工具使用 Top 20:
142
+ ${fmtToolTable(ctx.toolBreakdown)}
143
+
144
+ 消息全文 ${truncatedNote}:
145
+ ${fmtMessages(ctx.messages)}
146
+
147
+ ================ 输出 JSON(仅此对象) ================
148
+ {"scores":{
149
+ "H1":{"clarity":{"score":78.5,"confidence":0.8,"evidence":"…"},"converge":{"score":90,"confidence":0.9,"evidence":"…"},"drift":{"score":null,"confidence":0,"evidence":"无法判断"}},
150
+ "H2":{"challenge":{…},"override":{…},"accept_rate":{…}},
151
+ "E1":{"domain_errors":{…},"staleness":{…},"best_practice":{…}},
152
+ "O1":{"first_take":{…},"code_style":{…},"completeness":{…}}},
153
+ "advice":{"summary":"≤60字只谈协作","categories":{"cost":[],"accuracy":[],"context":[],"skills":[],"workflow":[]},"rationale":"≤80字只谈协作"}}`;
154
+ }
155
+
156
+ module.exports = {
157
+ ANALYSIS_SENTINEL,
158
+ ANALYSIS_PROMPT_VERSION,
159
+ CATEGORIES,
160
+ buildSessionAnalysisPrompt,
161
+ truncateContext,
162
+ };