agentboss 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,6 @@
1
1
  'use strict';
2
2
 
3
3
  const { classifySession } = require('./difficulty');
4
- const problemDef = require('./dimensions/problem-definition');
5
- const judgement = require('./dimensions/judgement');
6
- const outputQuality = require('./dimensions/output-quality');
7
4
  const { analyzeSessionLLM } = require('../llm/session-analyzer');
8
5
  const { dimensionSource, mergeIndicator } = require('./dimensions/llm-merge');
9
6
  const { rollupDimension, scoreToLevel } = require('./thresholds-v2');
@@ -28,18 +25,24 @@ async function analyzeSessionV2(db, session, opts = {}) {
28
25
  catch (_) { llm = null; }
29
26
  const cells = (llm && llm.scores) || {};
30
27
 
31
- const h1 = safe(() => problemDef.analyze(db, session, difficulty.bucket, cells.H1));
32
- const h2 = safe(() => judgement.analyze(db, session, difficulty.bucket, cells.H2));
28
+ // All five dimensions are fully LLM-judged — no rule fallback. A
29
+ // sub-indicator the LLM can't score stays null ("未评估") rather than
30
+ // degrading to a keyword heuristic.
31
+ const h1 = safe(() => buildLlmDimension(cells.H1, H1_LABELS, 'H1'));
32
+ const h2 = safe(() => buildLlmDimension(cells.H2, H2_LABELS, 'H2'));
33
33
  const h3 = safe(() => buildLlmDimension(normaliseH3(cells.H3), H3_LABELS, 'H3'));
34
- // ENV — merged AI-capability environment (knowledge / tooling / currency),
35
- // fully LLM-judged. Persisted into the legacy E1 slot (E2 retired).
34
+ // ENV — merged AI-capability environment (knowledge / tooling / currency).
35
+ // Persisted into the legacy E1 slot (E2 retired).
36
36
  const env = safe(() => buildLlmDimension(cells.ENV, ENV_LABELS, 'ENV'));
37
- const o1 = safe(() => outputQuality.analyze(db, session, difficulty.bucket, cells.O1));
37
+ const o1 = safe(() => buildLlmDimension(cells.O1, O1_LABELS, 'O1'));
38
38
 
39
39
  const judgeSrc = dimensionSource([h1, h2, h3, env, o1].map((r) => r && r.judgeSource));
40
40
  return assemble(difficulty, { h1, h2, h3, e1: env, e2: null, o1 }, llm, judgeSrc);
41
41
  }
42
42
 
43
+ const H1_LABELS = { clarity: '初始指令清晰度', converge: '任务收敛轮次', drift: '方向变更次数' };
44
+ const H2_LABELS = { challenge: '合理质疑率', override: '推翻率', accept_rate: '顺从/采纳判断' };
45
+ const O1_LABELS = { first_take: '一次采纳率', code_style: '代码规范性', completeness: '方案完备性' };
43
46
  const H3_LABELS = { abstraction: '抽象层级', reuse: '复用意识', standard: '规范约束' };
44
47
  const ENV_LABELS = { knowledge: '知识覆盖', tooling: '工具运用', currency: '时效性' };
45
48
 
@@ -75,6 +78,7 @@ function buildLlmDimension(group, labels, dimKey) {
75
78
  key: k, label: labels[k],
76
79
  what: m.evidence || '未发现相关证据',
77
80
  score: m.score, level: m.level,
81
+ source: m.source,
78
82
  };
79
83
  sources.push(m.source);
80
84
  if (m.score != null) any = true;
@@ -4,6 +4,8 @@
4
4
  * POST /api/execution/start { sessionId, adviceKey, executor?, ephemeral? }
5
5
  * POST /api/execution/project/start { project, scope, from?, to?, adviceKey, executor?, ephemeral? }
6
6
  * POST /api/execution/cancel/:runId
7
+ * GET /api/execution/preview ?sessionId=&adviceKey=&executor=
8
+ * GET /api/execution/project/preview ?project=&scope=&from=&to=&adviceKey=&executor=
7
9
  * GET /api/execution/:runId ?full=1 → return full stdout/stderr
8
10
  * GET /api/execution/advice/:sessionId
9
11
  * GET /api/execution/project/advice ?project=&scope=&from=&to=
@@ -229,6 +231,98 @@ module.exports = function (db) {
229
231
  res.json({ ok: true, data: { run: projectRun(row) } });
230
232
  });
231
233
 
234
+ // -------------------------------------------------------------------------
235
+ // Preview — return the exact prompt + cwd + CLI command that "执行"
236
+ // would invoke. Lets the UI show users *what* the auto-executor is
237
+ // about to do, and serves as a manual fallback (copy the prompt and
238
+ // paste it into your own OpenCode / Claude Code session).
239
+ //
240
+ // Registered BEFORE the catch-all GET /:runId so adviceKey-based
241
+ // lookups don't collide with run-id lookups.
242
+ // -------------------------------------------------------------------------
243
+
244
+ router.get('/preview', (req, res) => {
245
+ const sessionId = req.query.sessionId;
246
+ const adviceKey = req.query.adviceKey;
247
+ const executor = req.query.executor;
248
+ if (typeof sessionId !== 'string' || !sessionId.trim()) {
249
+ return res.status(400).json({
250
+ ok: false, error: { code: 'BAD_REQUEST', message: 'sessionId required' },
251
+ });
252
+ }
253
+ if (typeof adviceKey !== 'string' || !adviceKey.trim()) {
254
+ return res.status(400).json({
255
+ ok: false, error: { code: 'BAD_REQUEST', message: 'adviceKey required' },
256
+ });
257
+ }
258
+ if (executor !== undefined && !VALID_EXECUTORS.includes(executor)) {
259
+ return res.status(400).json({
260
+ ok: false, error: { code: 'BAD_REQUEST', message: 'executor must be opencode or claude' },
261
+ });
262
+ }
263
+ const r = job.previewExecution(db, { sessionId, adviceKey, executor });
264
+ if (!r.ok) return failure(res, r.reason, r);
265
+ res.json({
266
+ ok: true,
267
+ data: {
268
+ scope: r.scope,
269
+ adviceKey: r.adviceKey,
270
+ executor: r.executor,
271
+ project: r.project,
272
+ projectExists: r.projectExists,
273
+ cli: r.cli,
274
+ prompt: r.prompt,
275
+ item: r.item,
276
+ },
277
+ });
278
+ });
279
+
280
+ router.get('/project/preview', (req, res) => {
281
+ const project = req.query.project;
282
+ const scope = req.query.scope;
283
+ const from = req.query.from || '';
284
+ const to = req.query.to || '';
285
+ const adviceKey = req.query.adviceKey;
286
+ const executor = req.query.executor;
287
+ if (typeof project !== 'string' || !project.trim()) {
288
+ return res.status(400).json({
289
+ ok: false, error: { code: 'BAD_REQUEST', message: 'project required' },
290
+ });
291
+ }
292
+ if (typeof scope !== 'string' || !['daily', 'weekly', 'all'].includes(scope)) {
293
+ return res.status(400).json({
294
+ ok: false, error: { code: 'BAD_REQUEST', message: 'scope must be daily|weekly|all' },
295
+ });
296
+ }
297
+ if (typeof adviceKey !== 'string' || !adviceKey.trim()) {
298
+ return res.status(400).json({
299
+ ok: false, error: { code: 'BAD_REQUEST', message: 'adviceKey required' },
300
+ });
301
+ }
302
+ if (executor !== undefined && !VALID_EXECUTORS.includes(executor)) {
303
+ return res.status(400).json({
304
+ ok: false, error: { code: 'BAD_REQUEST', message: 'executor must be opencode or claude' },
305
+ });
306
+ }
307
+ const r = job.previewProjectExecution(db, {
308
+ project, scope, windowFrom: from, windowTo: to, adviceKey, executor,
309
+ });
310
+ if (!r.ok) return failure(res, r.reason, r);
311
+ res.json({
312
+ ok: true,
313
+ data: {
314
+ scope: r.scope,
315
+ adviceKey: r.adviceKey,
316
+ executor: r.executor,
317
+ project: r.project,
318
+ projectExists: r.projectExists,
319
+ cli: r.cli,
320
+ prompt: r.prompt,
321
+ item: r.item,
322
+ },
323
+ });
324
+ });
325
+
232
326
  // -------------------------------------------------------------------------
233
327
  // Project-level start + list
234
328
  //
@@ -325,8 +325,11 @@ function initDatabase(db) {
325
325
  ['currency_rate', '1'],
326
326
  ['idle_threshold_minutes', '5'],
327
327
  ['llm_tool_preference', 'auto'],
328
- // v2: opt-in LLM judge for E1/O1 dimensions
329
- ['enable_llm_judge', '0'],
328
+ // v2: LLM judge for E1/O1 dimensions. On by default for new
329
+ // installs — existing users' explicit '0' is preserved by the
330
+ // INSERT OR IGNORE seed below, so toggling this only flips fresh
331
+ // boss.db files.
332
+ ['enable_llm_judge', '1'],
330
333
  ];
331
334
 
332
335
  const stmt = db.prepare(
@@ -27,6 +27,7 @@ const {
27
27
  getEtlState,
28
28
  updateEtlState,
29
29
  } = require('../db/queries');
30
+ const { canonicalProject } = require('../utils/project');
30
31
 
31
32
  // ---------------------------------------------------------------------------
32
33
  // Constants
@@ -210,7 +211,10 @@ function mapSession(row, msgCount, errCount, toolCount, agg = {}) {
210
211
  tokens_cache_read: agg.tokens_cache_read || 0,
211
212
  tokens_cache_write: agg.tokens_cache_write || 0,
212
213
  cost_usd: agg.cost_usd || 0,
213
- project: row.directory || null,
214
+ // OpenCode sometimes records Windows drives as "C//felix/code/X" (the
215
+ // colon got dropped). canonicalProject re-inserts the ":" so downstream
216
+ // cwd / whitelist / fs.existsSync checks work. See server/utils/project.js.
217
+ project: canonicalProject(row.directory) || null,
214
218
  title: row.title || null,
215
219
  model: agg.model_id || null,
216
220
  error_count: errCount,
@@ -297,6 +297,140 @@ function cleanupOrphans(db) {
297
297
  return orphans.length;
298
298
  }
299
299
 
300
+ // ---------------------------------------------------------------------------
301
+ // Public: previewExecution / previewProjectExecution
302
+ //
303
+ // Lookup-only counterpart of startExecution. Returns exactly the prompt
304
+ // text we would have piped through stdin to the executor, along with the
305
+ // resolved cwd, executor name and the shell command we would run. Used
306
+ // by the UI to let the user see (and copy) the full instruction before
307
+ // hitting "执行", and as a manual fallback for cases the auto path can't
308
+ // serve (CLI missing, item.actionable=false, …).
309
+ //
310
+ // Stays close to startExecution's resolution order so they can't drift:
311
+ // 1. session / project + advice cache lookup
312
+ // 2. AdviceItem lookup
313
+ // 3. executor pick (default opencode, falls back to item.executor)
314
+ // 4. prompt assembly via the same builder used in spawnRunAsync
315
+ //
316
+ // We deliberately don't enforce actionable / whitelist / project-exists
317
+ // here — the UI wants to show this info even when the auto-run path is
318
+ // blocked.
319
+ // ---------------------------------------------------------------------------
320
+
321
+ function describeCliCommand(executor, cwd) {
322
+ // What spawnRunAsync → runExecutor will actually invoke. Mirrors
323
+ // server/execution/runner.js (`opencode run` / `claude -p`). The
324
+ // prompt arrives on stdin so we surface that fact in the command.
325
+ if (executor === 'claude') {
326
+ return {
327
+ bin: 'claude',
328
+ args: ['-p'],
329
+ cwd,
330
+ stdinIsPrompt: true,
331
+ shellHint: `cd ${cwd} && claude -p < prompt.txt`,
332
+ };
333
+ }
334
+ return {
335
+ bin: 'opencode',
336
+ args: ['run'],
337
+ cwd,
338
+ stdinIsPrompt: true,
339
+ shellHint: `cd ${cwd} && opencode run < prompt.txt`,
340
+ };
341
+ }
342
+
343
+ function previewExecution(db, { sessionId, adviceKey, executor: executorOpt }) {
344
+ const session = getSessionById(db, sessionId);
345
+ if (!session) return { ok: false, reason: 'no-session' };
346
+
347
+ const advice = loadAdvice(db, sessionId);
348
+ if (!advice) return { ok: false, reason: 'no-advice' };
349
+
350
+ const item = findAdviceItem(advice, adviceKey);
351
+ if (!item) return { ok: false, reason: 'no-advice-item' };
352
+
353
+ const executor = executorOpt || item.executor || 'opencode';
354
+ // Normalise — session.project may carry an OpenCode-source path like
355
+ // "C//felix/code/X" that won't pass fs.existsSync; canonicalProject
356
+ // re-inserts the missing ":". Mirrors previewProjectExecution.
357
+ const project = canonicalProject(session.project || '') || '';
358
+ const recentUserMessages = fetchRecentUserMessages(db, sessionId);
359
+
360
+ const prompt = buildExecutionPrompt({
361
+ advice: item,
362
+ session: {
363
+ project,
364
+ title: session.title,
365
+ model: session.model,
366
+ durationMinutes: session.duration_minutes,
367
+ messageCount: session.message_count,
368
+ },
369
+ recentUserMessages,
370
+ });
371
+
372
+ return {
373
+ ok: true,
374
+ scope: 'session',
375
+ adviceKey,
376
+ executor,
377
+ item,
378
+ project,
379
+ projectExists: isValidProjectPath(project),
380
+ cli: describeCliCommand(executor, project),
381
+ prompt,
382
+ };
383
+ }
384
+
385
+ function previewProjectExecution(db, {
386
+ project: projectRaw, scope, windowFrom = '', windowTo = '',
387
+ adviceKey, executor: executorOpt,
388
+ }) {
389
+ const project = canonicalProject(projectRaw || '');
390
+ if (!project) return { ok: false, reason: 'no-project' };
391
+ if (!scope || (scope !== 'all' && (!windowFrom || !windowTo))) {
392
+ return { ok: false, reason: 'no-window' };
393
+ }
394
+
395
+ const cached = loadProjectAdvice(
396
+ db, project, scope,
397
+ scope === 'all' ? '' : windowFrom,
398
+ scope === 'all' ? '' : windowTo
399
+ );
400
+ if (!cached || !cached.payload) return { ok: false, reason: 'no-advice' };
401
+
402
+ const item = findAdviceItem(cached.payload, adviceKey);
403
+ if (!item) return { ok: false, reason: 'no-advice-item' };
404
+
405
+ const executor = executorOpt || item.executor || 'opencode';
406
+
407
+ const prompt = buildProjectExecutionPrompt({
408
+ advice: item,
409
+ project: {
410
+ path: project,
411
+ scope,
412
+ windowFrom: scope === 'all' ? '' : windowFrom,
413
+ windowTo: scope === 'all' ? '' : windowTo,
414
+ sessionCount: cached.sessionCount,
415
+ },
416
+ crossSessionPatterns: Array.isArray(cached.payload.crossSessionPatterns)
417
+ ? cached.payload.crossSessionPatterns
418
+ : [],
419
+ });
420
+
421
+ return {
422
+ ok: true,
423
+ scope: 'project',
424
+ adviceKey,
425
+ executor,
426
+ item,
427
+ project,
428
+ projectExists: isValidProjectPath(project),
429
+ cli: describeCliCommand(executor, project),
430
+ prompt,
431
+ };
432
+ }
433
+
300
434
  // ---------------------------------------------------------------------------
301
435
  // Public: startExecution
302
436
  // ---------------------------------------------------------------------------
@@ -344,7 +478,10 @@ async function startExecution(db, opts) {
344
478
  }
345
479
 
346
480
  // 5. Project path validity.
347
- const project = session.project;
481
+ // Normalise the same way preview does so cwd, whitelist comparison
482
+ // and fs.existsSync all agree. OpenCode-source paths can come in
483
+ // as "C//felix/code/X" (colon dropped); canonicalProject fixes it.
484
+ const project = canonicalProject(session.project || '') || '';
348
485
  if (!isValidProjectPath(project)) {
349
486
  return { ok: false, reason: 'invalid-project-path', extra: { project } };
350
487
  }
@@ -385,7 +522,7 @@ async function startExecution(db, opts) {
385
522
  const promptBuilder = () => buildExecutionPrompt({
386
523
  advice: item,
387
524
  session: {
388
- project: session.project,
525
+ project,
389
526
  title: session.title,
390
527
  model: session.model,
391
528
  durationMinutes: session.duration_minutes,
@@ -643,6 +780,8 @@ async function cancelRun(db, runId) {
643
780
  module.exports = {
644
781
  startExecution,
645
782
  startProjectExecution,
783
+ previewExecution,
784
+ previewProjectExecution,
646
785
  cancelRun,
647
786
  getRun,
648
787
  listRunsForAdvice,
@@ -236,6 +236,7 @@ function buildAdvicePrompt(ctx) {
236
236
  - 流程节奏(回退次数、纠偏速度、是否过早收敛)
237
237
  - 是否适合引入一个 opencode skill 或 subagent 来自动化重复模式
238
238
  - 成本与 token 经济性
239
+ - **MCP 服务器使用**(见下「关于 MCP」一节)
239
240
 
240
241
  不要谈:
241
242
  - 对话中讨论的具体技术 / 代码是否正确(如「你写的 SQL JOIN 错了」、
@@ -247,6 +248,46 @@ function buildAdvicePrompt(ctx) {
247
248
  如果对话里只有业务讨论、看不出可改进的协作模式,5 个类别都给空数组,
248
249
  summary 写「本会话以业务讨论为主,协作模式无明显问题」。
249
250
 
251
+ # 关于 MCP(Model Context Protocol)
252
+
253
+ 下文的「工具使用 Top 20」里,工具可能来自两类来源:
254
+
255
+ - **内置工具**:名字短、全小写、无分隔符。例如
256
+ \`read\` \`write\` \`edit\` \`bash\` \`glob\` \`grep\` \`todowrite\`
257
+ \`task\` \`skill\` \`websearch\` \`question\` (OpenCode)
258
+ 以及 \`ls\` \`webfetch\` \`notebookread\` 等(Claude Code)。
259
+
260
+ - **MCP 工具**(由用户配置的外部 MCP 服务器提供):
261
+ - OpenCode 命名:\`<服务器名>_<方法名>\`(单下划线),例如
262
+ \`atlassian_getJiraIssue\` \`atlassian_searchConfluence\` —
263
+ "atlassian" 就是 MCP 服务器名。
264
+ - Claude Code 命名:\`mcp__<服务器名>__<方法名>\`(双下划线),例如
265
+ \`mcp__github__list_issues\`。
266
+
267
+ 请在分析时显式辨认 MCP 工具,并考虑以下角度(只在确实有迹象时谈,不要硬凑):
268
+
269
+ - **该不该用 MCP**:这一次任务里 MCP 工具是不是真的派上了用场?
270
+ 如果调用了 MCP 但没真正解决问题(只是来回查),建议下次直接给 AI
271
+ 具体信息或换一种问法。
272
+ - **MCP 调用错误率高**:某个 MCP 工具错误率明显偏高(从表中
273
+ err 列读),建议改用其他来源或先用一次手动调用确认参数。
274
+ - **重复 MCP 调用**:连续多次同名 MCP 调用拉同一类数据,建议下次
275
+ 一次性指明需要的字段,或用本地缓存/文件代替。
276
+ - **应该用 MCP 而没用**:用户多次让 AI"上网搜"或手抄外部系统数据
277
+ (Jira/GitHub/Slack/Notion 等),而项目本应配置对应 MCP 直接拉,
278
+ 可建议引入相应 MCP 服务器。
279
+
280
+ 按照上面 5 大类的归属:
281
+ - MCP 调用浪费/重复 → \`cost\`
282
+ - MCP 报错频繁、参数不对 → \`accuracy\`
283
+ - 缺少 MCP 上下文导致需要手抄数据 → \`context\`
284
+ - 反复出现的 MCP 调用模式可包成 skill → \`skills\`
285
+ - MCP 与人工/内置工具的分工节奏 → \`workflow\`
286
+
287
+ 不要新增类别,不要新增 AdviceItem 字段。
288
+ evidence 里点名具体 MCP 工具(完整工具名,例如 \`atlassian_getJiraIssue\`)。
289
+ 如果对话中没有 MCP 工具,或 MCP 用得很合理,不要硬挑毛病。
290
+
250
291
  # 输出契约
251
292
 
252
293
  只输出严格 JSON,不要 markdown 代码块,不要多余文字。结构如下:
@@ -270,7 +311,7 @@ AdviceItem:
270
311
  "why": "1 句话,说明协作上的问题",
271
312
  "action": "1 句话,具体可操作的改变(下次怎么做)",
272
313
  "evidence": "引自第 N 条消息 / 工具 X / 基础统计 — 必须是对话事实,不得引用任何评分",
273
- "actionable": true | false,
314
+ "actionable": true | false, // 见硬规则 6:仅当 AI 能在本机落盘改动时 true
274
315
  "executor": "opencode" | "claude" | "manual",
275
316
  "cwd_hint": "project_root"
276
317
  }
@@ -291,21 +332,43 @@ AdviceItem:
291
332
  每条 action 给出:skill 名 + 触发条件 + 一句话用途。
292
333
  - workflow 流程与节奏:拆解、迭代步幅、回退策略、人 ↔ AI 分工。
293
334
 
294
- 6. actionable=true 的条件必须同时满足:
295
- - 是「写代码 / 改文件 / 加 skill」类具体动作;
296
- - 在原项目根目录运行 opencode/claude 就能完成,无需补充人类专属知识。
297
- 不属于这一类(如「下次开场用模板」「以后多用缓存」「对 AI 的指令更
298
- 具体」)的 actionable=false, executor='manual'。
335
+ 6. actionable=true 必须是「AI 在用户电脑上**真的会落盘的改动**」。
336
+ 只有同时满足下面三点才能填 true:
337
+
338
+ a. **明确的产出物在文件系统里**——新建或修改某个具体的文件 /
339
+ 配置项 / skill / 脚本 / 模板。可以一句话说清"AI 将创建/修改
340
+ \`<相对路径>\`"。
341
+ b. **不依赖人类专属知识**——AI 看着项目根目录就能做完;不需要
342
+ 访问只有人才能拿到的密码、内部 wiki、外部账号、决策权。
343
+ c. **是项目级的、可重复受益的改变**——例如新增一个 skill、加一
344
+ 条 lint 规则、写一份模板,而不是"修这一次的 bug"。
345
+
346
+ 只要任何一点不满足 → actionable=false, executor='manual'。
347
+
348
+ **反例(下面这些必须是 actionable=false / executor='manual')**:
349
+ - 「下次开场用模板」「以后多用缓存」「下次先给 AI 文件路径」
350
+ —— 是让**人**改行为,不是落盘改动。
351
+ - 「换更便宜的模型」「关闭 reasoning」「用 sonnet 而非 opus」
352
+ —— 模型切换是用户在 client / 终端做的,不是 AI 改文件。
353
+ - 「检查代码是否正确」「再确认一遍 SQL」
354
+ —— 是要求人或 AI 验证,不是落盘动作。
355
+ - 「拆分任务」「分步提问」「先讨论再写代码」
356
+ —— 协作流程建议,只能人来执行。
357
+ - 「向团队同步」「写文档」(注:这条**有边界** —— 如果是让 AI
358
+ 在仓库里新建/更新某个具体 .md 文件,可 actionable=true;
359
+ 如果是"和同事开个会"则 false)。
299
360
 
300
361
  7. executor:
301
- - 显式动手做的任务(创建文件、改代码、写 skill) → 'opencode' 或 'claude';
302
- - 单纯让人类调整行为的建议 → 'manual'。
303
- 不会判断时填 'opencode'
362
+ - 落盘类动作(创建/改文件、写 skill、改配置文件) → 'opencode' 或 'claude';
363
+ - 一切让人类调整行为/认知/沟通的建议 → 'manual'。
364
+ 不会判断时,**默认填 'manual'**——错填 manual 只是少一个按钮;
365
+ 错填 opencode 会让用户点了按钮后 AI 干一些莫名其妙的事。
304
366
 
305
367
  8. cwd_hint: 目前只能填 "project_root"。
306
368
 
307
- 9. actionable 与 executor 必须一致:executor='manual' actionable 必须 false;
308
- executor 是 'opencode'/'claude' actionable 通常 true。
369
+ 9. actionable 与 executor 必须一致:executor='manual' actionable=false;
370
+ executor 是 'opencode'/'claude' actionable=true。
371
+ **不允许出现 executor='opencode' 且 actionable=false 的组合**。
309
372
 
310
373
  # 会话基础(只作事实参考,不要换算成分数)
311
374
 
@@ -357,11 +357,21 @@ function normaliseItem(it) {
357
357
  if (executor === 'manual') actionable = false;
358
358
  if (actionable && executor === 'manual') executor = 'opencode';
359
359
 
360
+ // Heuristic safety net: even with a tight prompt, LLMs sometimes mark
361
+ // "you should do X next time" advice as actionable=true, which would
362
+ // give the user an auto-execute button that does nonsense. Detect
363
+ // unmistakeably human-action phrasing in `action` and downgrade.
364
+ const action = typeof it.action === 'string' ? it.action : '';
365
+ if (actionable && looksLikeHumanAction(action)) {
366
+ actionable = false;
367
+ executor = 'manual';
368
+ }
369
+
360
370
  return {
361
371
  severity,
362
372
  title: typeof it.title === 'string' ? it.title.trim() : '',
363
373
  why: typeof it.why === 'string' ? it.why.trim() : '',
364
- action: typeof it.action === 'string' ? it.action.trim() : '',
374
+ action: action.trim(),
365
375
  evidence: typeof it.evidence === 'string' ? it.evidence.trim() : '',
366
376
  actionable,
367
377
  executor,
@@ -369,6 +379,44 @@ function normaliseItem(it) {
369
379
  };
370
380
  }
371
381
 
382
+ /**
383
+ * Lightweight Chinese-text heuristic. Returns true when `action` reads
384
+ * like a behaviour change the *user* needs to make, rather than a
385
+ * filesystem-level change an AI agent could carry out.
386
+ *
387
+ * Intentionally conservative — we only catch the obvious cases. False
388
+ * negatives (we say "no" but it really is human-only) just leave an
389
+ * over-promising button; false positives (we say "yes" but it really
390
+ * could auto-run) hide a working button. We prefer the former.
391
+ *
392
+ * Triggered phrases were collected from production LLM outputs where
393
+ * the model wrote actionable=true but `action` was clearly "next time
394
+ * the human should …".
395
+ */
396
+ const HUMAN_ACTION_PATTERNS = [
397
+ // Next-time / future-tense markers
398
+ /下次/, /以后/, /未来/, /后续/, /今后/, /日后/,
399
+ // Switching model / runtime settings (user-side configuration, not repo files)
400
+ /换(成|为)?[\s]*(sonnet|opus|haiku|claude|gpt|gemini|grok|deepseek)/i,
401
+ /(切换|更换|改用|改成|降级|升级).{0,8}模型/,
402
+ /关(掉|闭|去).{0,4}(reasoning|推理|思考)/,
403
+ /启用.{0,4}(reasoning|推理)/,
404
+ /调低.{0,6}(reasoning|temperature|温度)/,
405
+ // Talk-to-humans
406
+ /和(团队|同事|领导|产品|设计)/, /与(团队|同事|领导)/,
407
+ /(告知|通知|同步给|抄送|沟通).{0,6}(团队|同事|领导|项目组)/,
408
+ /(开个|开一次|组织).{0,4}(会议|分享|评审|review)/i,
409
+ // Habit / process changes (no filesystem delta)
410
+ /(养成|形成|建立)[^。;;]{0,30}(习惯|惯例|节奏)/,
411
+ /(定期|每周|每天|每月).{0,8}(回顾|评审|检查|总结)/,
412
+ /(培训|学习|熟悉|掌握).{0,8}(用法|文档|规范)/,
413
+ ];
414
+
415
+ function looksLikeHumanAction(action) {
416
+ if (!action || typeof action !== 'string') return false;
417
+ return HUMAN_ACTION_PATTERNS.some((re) => re.test(action));
418
+ }
419
+
372
420
  // ---------------------------------------------------------------------------
373
421
  // Exports
374
422
  // ---------------------------------------------------------------------------
@@ -381,4 +429,5 @@ module.exports = {
381
429
  // exported for tests / debugging:
382
430
  assembleContext,
383
431
  normaliseAdvicePayload,
432
+ looksLikeHumanAction,
384
433
  };