agentboss 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,147 @@
1
+ /**
2
+ * MCP / built-in tool classifier.
3
+ *
4
+ * Neither OpenCode's nor Claude Code's session DB tags tool calls with a
5
+ * provenance field — both stash the bare tool name ('read', 'bash',
6
+ * 'atlassian_getJiraIssue', …) into a single string. But the two
7
+ * agents both follow a NAMING CONVENTION for MCP tools that we can pick
8
+ * up with a tiny rule set:
9
+ *
10
+ * • Claude Code: `mcp__<server>__<tool>` (double underscore)
11
+ * • OpenCode: `<server>_<toolName>` (single underscore;
12
+ * server name is the lowercased MCP server key)
13
+ *
14
+ * Built-in tools across both agents are short, lowercase, no separators
15
+ * (`read`, `bash`, `glob`, `todowrite`, …). When in doubt we fall back
16
+ * to a whitelist — anything OUTSIDE the whitelist with a separator we
17
+ * call MCP. Names that contain no separator and aren't whitelisted are
18
+ * reported as 'unknown' so the caller (currently: LLM prompts) can
19
+ * decide to mention the uncertainty or skip.
20
+ *
21
+ * No persistent state — pure functions, safe to call per-row.
22
+ *
23
+ * Source of truth for whitelist updates:
24
+ * - OpenCode built-ins observed in production boss.db:
25
+ * read, bash, edit, glob, grep, write, todowrite, question,
26
+ * task, skill, websearch
27
+ * - Claude Code built-ins (per docs):
28
+ * Read, Write, Edit, Bash, Glob, Grep, LS, NotebookEdit,
29
+ * NotebookRead, Task, TodoWrite, WebFetch, WebSearch
30
+ *
31
+ * Compared case-insensitively.
32
+ *
33
+ * @author Felix
34
+ */
35
+
36
+ 'use strict';
37
+
38
+ const BUILTIN_NAMES = new Set([
39
+ // OpenCode (observed)
40
+ 'read', 'bash', 'edit', 'glob', 'grep', 'write', 'todowrite',
41
+ 'question', 'task', 'skill', 'websearch',
42
+ // Claude Code (per public docs — kept lowercase for case-insensitive
43
+ // matching; the on-disk casing may vary slightly between versions)
44
+ 'ls', 'webfetch', 'notebookedit', 'notebookread',
45
+ ]);
46
+
47
+ /**
48
+ * Classify one tool name.
49
+ *
50
+ * @param {string} name raw tool_name from unified_tool_call
51
+ * @returns {{ kind: 'builtin'|'mcp'|'unknown', server: string|null, tool: string|null }}
52
+ */
53
+ function classifyTool(name) {
54
+ if (typeof name !== 'string' || !name) {
55
+ return { kind: 'unknown', server: null, tool: null };
56
+ }
57
+ const lower = name.toLowerCase();
58
+
59
+ // 1. Claude Code MCP convention: mcp__<server>__<tool>
60
+ if (lower.startsWith('mcp__')) {
61
+ const rest = name.slice(5);
62
+ const idx = rest.indexOf('__');
63
+ if (idx > 0) {
64
+ return {
65
+ kind: 'mcp',
66
+ server: rest.slice(0, idx),
67
+ tool: rest.slice(idx + 2),
68
+ };
69
+ }
70
+ // mcp__foo (no second separator) — still definitely MCP
71
+ return { kind: 'mcp', server: rest, tool: '' };
72
+ }
73
+
74
+ // 2. Built-in whitelist (covers both agents).
75
+ if (BUILTIN_NAMES.has(lower)) {
76
+ return { kind: 'builtin', server: null, tool: lower };
77
+ }
78
+
79
+ // 3. OpenCode MCP convention: <server>_<toolName>
80
+ // Single underscore, not in whitelist, both halves non-empty.
81
+ const us = name.indexOf('_');
82
+ if (us > 0 && us < name.length - 1) {
83
+ return {
84
+ kind: 'mcp',
85
+ server: name.slice(0, us).toLowerCase(),
86
+ tool: name.slice(us + 1),
87
+ };
88
+ }
89
+
90
+ // 4. Anything else (no separator + not whitelisted): unknown.
91
+ // Could be a newer built-in we haven't catalogued yet, or a
92
+ // custom subagent. We don't want to wrongly accuse it of being
93
+ // MCP, so we punt to the caller.
94
+ return { kind: 'unknown', server: null, tool: name };
95
+ }
96
+
97
+ /**
98
+ * Aggregate a list of raw tool-usage rows into per-MCP-server stats.
99
+ *
100
+ * @param {{tool_name:string, count:number, error_count:number}[]} rows
101
+ * @returns {{
102
+ * servers: { server:string, calls:number, errors:number, tools:string[] }[],
103
+ * builtinCalls: number,
104
+ * mcpCalls: number,
105
+ * unknownCalls: number,
106
+ * }}
107
+ */
108
+ function summariseMcpUsage(rows) {
109
+ const byServer = new Map();
110
+ let builtinCalls = 0;
111
+ let mcpCalls = 0;
112
+ let unknownCalls = 0;
113
+
114
+ for (const r of rows || []) {
115
+ const c = Number(r.count || 0);
116
+ const e = Number(r.error_count || 0);
117
+ const k = classifyTool(r.tool_name);
118
+ if (k.kind === 'builtin') { builtinCalls += c; continue; }
119
+ if (k.kind === 'unknown') { unknownCalls += c; continue; }
120
+ mcpCalls += c;
121
+ const key = k.server || '(?)';
122
+ if (!byServer.has(key)) {
123
+ byServer.set(key, { server: key, calls: 0, errors: 0, tools: new Set() });
124
+ }
125
+ const s = byServer.get(key);
126
+ s.calls += c;
127
+ s.errors += e;
128
+ if (k.tool) s.tools.add(k.tool);
129
+ }
130
+
131
+ const servers = Array.from(byServer.values())
132
+ .map((s) => ({
133
+ server: s.server,
134
+ calls: s.calls,
135
+ errors: s.errors,
136
+ tools: Array.from(s.tools).slice(0, 8),
137
+ }))
138
+ .sort((a, b) => b.calls - a.calls);
139
+
140
+ return { servers, builtinCalls, mcpCalls, unknownCalls };
141
+ }
142
+
143
+ module.exports = {
144
+ classifyTool,
145
+ summariseMcpUsage,
146
+ BUILTIN_NAMES,
147
+ };
@@ -209,6 +209,47 @@ function fmtWindow(ctx) {
209
209
  return `${ctx.windowFrom} → ${ctx.windowTo}`;
210
210
  }
211
211
 
212
+ /**
213
+ * Render the cross-session MCP-server usage block. Returns null when
214
+ * there's no signal at all (no MCP calls AND no built-in calls), so the
215
+ * caller can skip the entire section.
216
+ *
217
+ * Output is intentionally compact — the LLM only needs the aggregate
218
+ * picture; per-session tool tables already live in each session block
219
+ * (well, they don't; we deliberately *don't* feed raw transcripts), so
220
+ * this is the project's only window onto tool reality.
221
+ */
222
+ function fmtMcpUsage(usage) {
223
+ if (!usage) return null;
224
+ const { servers = [], builtinCalls = 0, mcpCalls = 0, unknownCalls = 0 } = usage;
225
+ if (!builtinCalls && !mcpCalls && !unknownCalls) return null;
226
+
227
+ const lines = [];
228
+ lines.push(
229
+ `总调用: 内置 ${builtinCalls} · MCP ${mcpCalls}` +
230
+ (unknownCalls ? ` · 未分类 ${unknownCalls}` : '')
231
+ );
232
+
233
+ if (servers.length === 0) {
234
+ lines.push('MCP 服务器: (无)');
235
+ } else {
236
+ lines.push('按 MCP 服务器(calls 降序):');
237
+ for (const s of servers.slice(0, 10)) {
238
+ const toolList = (s.tools || []).slice(0, 6).join(', ');
239
+ lines.push(
240
+ ` - ${s.server.padEnd(16)} ` +
241
+ `calls=${String(s.calls).padStart(4)} ` +
242
+ `err=${String(s.errors).padStart(3)}` +
243
+ (toolList ? ` tools=[${toolList}]` : '')
244
+ );
245
+ }
246
+ if (servers.length > 10) {
247
+ lines.push(` …(其余 ${servers.length - 10} 个 MCP 服务器省略)`);
248
+ }
249
+ }
250
+ return lines.join('\n');
251
+ }
252
+
212
253
  // ---------------------------------------------------------------------------
213
254
  // Prompt
214
255
  // ---------------------------------------------------------------------------
@@ -221,6 +262,7 @@ function fmtWindow(ctx) {
221
262
  */
222
263
  function buildProjectAdvicePrompt(ctx) {
223
264
  const stats = ctx.stats || {};
265
+ const mcpBlock = fmtMcpUsage(ctx.mcpUsage);
224
266
 
225
267
  const truncatedNote =
226
268
  ctx.truncated === 'hard'
@@ -254,6 +296,7 @@ function buildProjectAdvicePrompt(ctx) {
254
296
  - 是否应建立 / 完善 opencode skill 或 subagent
255
297
  - 工具使用习惯(是否反复使用低效组合)
256
298
  - 项目级流程瓶颈(测试节奏、回退频率)
299
+ - **MCP 服务器的取舍**(详见下「MCP 服务器使用」一节)
257
300
 
258
301
  不要谈:
259
302
  - 任何具体业务 / 代码层面的对错
@@ -284,7 +327,7 @@ AdviceItem(项目级):
284
327
  "why": "1-2 句话,说明这个问题在多个会话中如何重复或累积",
285
328
  "action": "1 句话,项目级别可落地的改变(skill / 配置 / 流程)",
286
329
  "evidence": "必须引用具体会话证据,例:出现于 7/12 个会话(sess-abc, sess-def, ...)",
287
- "actionable": true | false,
330
+ "actionable": true | false, // 见硬规则 6
288
331
  "executor": "opencode" | "claude" | "manual",
289
332
  "cwd_hint": "project_root"
290
333
  }
@@ -299,10 +342,48 @@ AdviceItem(项目级):
299
342
  4. 出现「分数」「等级」「Lx」「子分」「H1」「H2」「E1」「O1」字眼一律违规。
300
343
  5. action 必须是「项目级」可落地动作(写一个 skill / 改一个配置 / 形成
301
344
  一条惯例),不是「下次开场更具体一些」这种纯口头建议。
302
- 6. actionable / executor / cwd_hint 规则与单 session 版本相同;manual
303
- actionable 必须 false。
304
- 7. 如果没有发现任何值得 project 级别报告的问题(全是个例),把所有
305
- categories 设为空数组,summary 写「未发现项目级别的系统性协作问题」。
345
+
346
+ 6. **actionable=true 必须是「AI 在用户电脑上真的会落盘的改动」**。
347
+ 只有同时满足下面三点才能填 true:
348
+
349
+ a. 明确产出物在文件系统里——新建/修改某个具体的
350
+ \`.opencode/skills/<name>/SKILL.md\` / 配置文件 / 模板 / 脚本 /
351
+ 文档。一句话能说清"AI 将创建/修改 \`<相对路径>\`"。
352
+ b. 不依赖人类专属知识——AI 看着项目根目录就能做完;不需要密码、
353
+ 内部 wiki、外部账号、决策权、跨部门沟通。
354
+ c. 是项目级可复用的改变(本来就是项目级 advice 的核心场景)。
355
+
356
+ 只要任一点不满足 → actionable=false, executor='manual'。
357
+
358
+ **反例(下面这些必须 actionable=false / executor='manual')**:
359
+ - 「项目里大家以后都先写 README 再写代码」
360
+ —— 是改人的习惯,AI 没法落盘。
361
+ - 「换更便宜的默认模型」「关闭 reasoning」
362
+ —— 模型档位是用户在 client / 终端设的,不是 AI 改文件。
363
+ - 「定期回顾会话」「定期评审 token 成本」
364
+ —— 流程性建议,需要人来做。
365
+ - 「和团队沟通 AI 使用规范」「开个分享会」
366
+ —— 显然是人的事。
367
+ - **典型 actionable=true 的例子**:
368
+ · 在 \`.opencode/skills/<name>/SKILL.md\` 创建一个 skill
369
+ 自动化"开场附文件路径"这件事;
370
+ · 在仓库根目录新建一份 \`AGENTS.md\` 写下项目惯例;
371
+ · 修改 \`.editorconfig\` / lint 配置补齐缺失规则。
372
+
373
+ 7. executor:
374
+ - 落盘类动作(写 skill、加配置、写文档文件) → 'opencode' 或 'claude';
375
+ - 一切让人类调整行为/认知/沟通/决策的建议 → 'manual'。
376
+ 不会判断时,**默认填 'manual'**——错填 manual 只是少一个按钮;
377
+ 错填 opencode 会让用户点了按钮后 AI 干一些莫名其妙的事。
378
+
379
+ 8. cwd_hint: 目前只能填 "project_root"。
380
+
381
+ 9. actionable 与 executor 必须一致:executor='manual' ⇔ actionable=false;
382
+ executor 是 'opencode'/'claude' ⇔ actionable=true。
383
+ **不允许出现 executor='opencode' 且 actionable=false 的组合**。
384
+
385
+ 10. 如果没有发现任何值得 project 级别报告的问题(全是个例),把所有
386
+ categories 设为空数组,summary 写「未发现项目级别的系统性协作问题」。
306
387
 
307
388
  # 项目基础
308
389
 
@@ -314,7 +395,26 @@ AdviceItem(项目级):
314
395
  总 token: ${fmtNum(stats.totalTokens)}
315
396
  错误总数: ${fmtNum(stats.totalErrors)}
316
397
 
317
- # 各会话的 per-session 结论 ${truncatedNote}
398
+ ${mcpBlock ? `# MCP 服务器使用(跨该项目所有会话累计)
399
+
400
+ ${mcpBlock}
401
+
402
+ # 关于 MCP 的分析角度(仅在数据真的体现出问题时谈)
403
+
404
+ - **某个 MCP 错误率高**(err / calls 偏高):证据足够时,建议项目级地
405
+ 调整该服务器的使用方式(改 query 模板、改用其它来源、加 wrapper skill)。
406
+ - **某个 MCP 调用频次极高且都来自少数会话**:可能是反复在拉同一类数据,
407
+ 考虑包成一个 skill 或落到本地缓存文件。
408
+ - **MCP 与内置工具调用比例失衡**(例如 MCP 调用为 0 但项目明显需要 Jira /
409
+ GitHub 数据,推断自会话 summary 里"上网搜"或"手抄"等迹象),
410
+ 建议引入对应 MCP 服务器并写在惯例文档。
411
+ - **多个 MCP 服务器各只用 1-2 次**:可能是探索性试用,不必报告;只在
412
+ 错误率高或明显错配时谈。
413
+
414
+ 证据里点名具体 MCP 服务器名(例如 \`atlassian\`)和涉及的会话数。
415
+ 不要为了凑够 MCP 内容硬挑;没有信号就不要写 MCP 相关 AdviceItem。
416
+
417
+ ` : ''}# 各会话的 per-session 结论 ${truncatedNote}
318
418
 
319
419
  ${fmtSessionBlocks(ctx.sessions || [])}
320
420
 
@@ -39,7 +39,8 @@ const {
39
39
  truncateContext,
40
40
  annotateContext,
41
41
  } = require('./project-advice-prompt');
42
- const { loadAdvice } = require('./advice');
42
+ const { loadAdvice, looksLikeHumanAction } = require('./advice');
43
+ const { summariseMcpUsage } = require('./mcp-classify');
43
44
  const {
44
45
  queryAll,
45
46
  queryOne,
@@ -254,6 +255,43 @@ function isCacheFresh(cached, currentIds) {
254
255
  // Stats aggregation
255
256
  // ---------------------------------------------------------------------------
256
257
 
258
+ /**
259
+ * Aggregate tool usage across the given sessions and split it into
260
+ * MCP-server-grouped + built-in counts. Used only at project-level
261
+ * prompt assembly (session-level prompt already has its own per-session
262
+ * tool table; classification there happens inside the LLM).
263
+ *
264
+ * Returns whatever summariseMcpUsage returns, or an "empty" shape when
265
+ * there are no tool calls (callers can decide to skip the prompt
266
+ * section instead of writing "(无)").
267
+ *
268
+ * @param {object} db
269
+ * @param {string[]} sessionIds
270
+ * @returns {{
271
+ * servers: { server, calls, errors, tools }[],
272
+ * builtinCalls: number,
273
+ * mcpCalls: number,
274
+ * unknownCalls: number,
275
+ * }}
276
+ */
277
+ function summariseProjectMcpUsage(db, sessionIds) {
278
+ if (!sessionIds || !sessionIds.length) {
279
+ return { servers: [], builtinCalls: 0, mcpCalls: 0, unknownCalls: 0 };
280
+ }
281
+ const placeholders = sessionIds.map(() => '?').join(',');
282
+ const rows = queryAll(
283
+ db,
284
+ `SELECT tool_name,
285
+ COUNT(*) AS count,
286
+ SUM(CASE WHEN status='error' THEN 1 ELSE 0 END) AS error_count
287
+ FROM unified_tool_call
288
+ WHERE session_id IN (${placeholders})
289
+ GROUP BY tool_name`,
290
+ sessionIds
291
+ );
292
+ return summariseMcpUsage(rows);
293
+ }
294
+
257
295
  function summariseStats(db, sessions) {
258
296
  let totalCost = 0;
259
297
  let totalTokens = 0;
@@ -381,9 +419,16 @@ async function generateProjectAdvice(db, opts = {}) {
381
419
 
382
420
  // 6. assemble + truncate
383
421
  const stats = summariseStats(db, withAdvice);
422
+ // Cross-session MCP server usage — fed into the prompt so the LLM
423
+ // can spot under/over-used MCP servers at project scope. We use ALL
424
+ // resolved sessions (raw), not just the ones with advice cached:
425
+ // even sessions without per-session advice still contribute real
426
+ // tool-call data to the picture.
427
+ const mcpUsage = summariseProjectMcpUsage(db, raw.map((r) => r.id));
384
428
  const ctx = annotateContext({
385
429
  project, scope, windowFrom, windowTo,
386
430
  stats,
431
+ mcpUsage,
387
432
  sessions: withAdvice,
388
433
  });
389
434
  const trimmed = truncateContext(ctx);
@@ -469,11 +514,19 @@ function normaliseItem(it) {
469
514
  const cwd_hint = ALL_CWD_HINTS.includes(it.cwd_hint) ? it.cwd_hint : 'project_root';
470
515
  if (executor === 'manual') actionable = false;
471
516
  if (actionable && executor === 'manual') executor = 'opencode';
517
+
518
+ // Same human-action safety net as the session-level normalizer.
519
+ const action = typeof it.action === 'string' ? it.action : '';
520
+ if (actionable && looksLikeHumanAction(action)) {
521
+ actionable = false;
522
+ executor = 'manual';
523
+ }
524
+
472
525
  return {
473
526
  severity,
474
527
  title: typeof it.title === 'string' ? it.title.trim() : '',
475
528
  why: typeof it.why === 'string' ? it.why.trim() : '',
476
- action: typeof it.action === 'string' ? it.action.trim() : '',
529
+ action: action.trim(),
477
530
  evidence: typeof it.evidence === 'string' ? it.evidence.trim() : '',
478
531
  actionable,
479
532
  executor,