npm - agentboss - Versions diffs - 0.1.0 → 0.1.2 - Mend

agentboss 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/bin/aboss.js +288 -288
package/client/dist/assets/index-DxoLOxZ8.js +141 -0
package/client/dist/index.html +1 -1
package/package.json +1 -1
package/server/analysis/dimensions/judgement.js +111 -107
package/server/analysis/dimensions/llm-merge.js +59 -57
package/server/analysis/dimensions/output-quality.js +167 -167
package/server/analysis/dimensions/problem-definition.js +109 -104
package/server/analysis/job.js +91 -14
package/server/analysis/report-builder.js +574 -581
package/server/analysis/scoring-v2.js +126 -72
package/server/analysis/thresholds-v2.js +364 -358
package/server/api/execution.js +94 -0
package/server/db/schema.js +5 -2
package/server/etl/opencode.js +5 -1
package/server/execution/job.js +141 -2
package/server/llm/advice-prompt.js +74 -11
package/server/llm/advice.js +50 -1
package/server/llm/analysis-prompt.js +173 -162
package/server/llm/cli-runner.js +18 -2
package/server/llm/judge.js +6 -1
package/server/llm/mcp-classify.js +147 -0
package/server/llm/project-advice-prompt.js +106 -6
package/server/llm/project-advice.js +55 -2
package/server/llm/session-analyzer.js +10 -1
package/client/dist/assets/index-DBj1Ujlx.js +0 -137

package/server/api/execution.js CHANGED Viewed

@@ -4,6 +4,8 @@
  *   POST   /api/execution/start              { sessionId, adviceKey, executor?, ephemeral? }
  *   POST   /api/execution/project/start      { project, scope, from?, to?, adviceKey, executor?, ephemeral? }
  *   POST   /api/execution/cancel/:runId
+ *   GET    /api/execution/preview            ?sessionId=&adviceKey=&executor=
+ *   GET    /api/execution/project/preview    ?project=&scope=&from=&to=&adviceKey=&executor=
  *   GET    /api/execution/:runId             ?full=1 → return full stdout/stderr
  *   GET    /api/execution/advice/:sessionId
  *   GET    /api/execution/project/advice     ?project=&scope=&from=&to=
@@ -229,6 +231,98 @@ module.exports = function (db) {
     res.json({ ok: true, data: { run: projectRun(row) } });
   });
+  // -------------------------------------------------------------------------
+  //  Preview — return the exact prompt + cwd + CLI command that "执行"
+  //  would invoke.  Lets the UI show users *what* the auto-executor is
+  //  about to do, and serves as a manual fallback (copy the prompt and
+  //  paste it into your own OpenCode / Claude Code session).
+  //
+  //  Registered BEFORE the catch-all GET /:runId so adviceKey-based
+  //  lookups don't collide with run-id lookups.
+  // -------------------------------------------------------------------------
+  router.get('/preview', (req, res) => {
+    const sessionId = req.query.sessionId;
+    const adviceKey = req.query.adviceKey;
+    const executor  = req.query.executor;
+    if (typeof sessionId !== 'string' || !sessionId.trim()) {
+      return res.status(400).json({
+        ok: false, error: { code: 'BAD_REQUEST', message: 'sessionId required' },
+      });
+    }
+    if (typeof adviceKey !== 'string' || !adviceKey.trim()) {
+      return res.status(400).json({
+        ok: false, error: { code: 'BAD_REQUEST', message: 'adviceKey required' },
+      });
+    }
+    if (executor !== undefined && !VALID_EXECUTORS.includes(executor)) {
+      return res.status(400).json({
+        ok: false, error: { code: 'BAD_REQUEST', message: 'executor must be opencode or claude' },
+      });
+    }
+    const r = job.previewExecution(db, { sessionId, adviceKey, executor });
+    if (!r.ok) return failure(res, r.reason, r);
+    res.json({
+      ok: true,
+      data: {
+        scope: r.scope,
+        adviceKey: r.adviceKey,
+        executor: r.executor,
+        project: r.project,
+        projectExists: r.projectExists,
+        cli: r.cli,
+        prompt: r.prompt,
+        item: r.item,
+      },
+    });
+  });
+  router.get('/project/preview', (req, res) => {
+    const project   = req.query.project;
+    const scope     = req.query.scope;
+    const from      = req.query.from || '';
+    const to        = req.query.to   || '';
+    const adviceKey = req.query.adviceKey;
+    const executor  = req.query.executor;
+    if (typeof project !== 'string' || !project.trim()) {
+      return res.status(400).json({
+        ok: false, error: { code: 'BAD_REQUEST', message: 'project required' },
+      });
+    }
+    if (typeof scope !== 'string' || !['daily', 'weekly', 'all'].includes(scope)) {
+      return res.status(400).json({
+        ok: false, error: { code: 'BAD_REQUEST', message: 'scope must be daily|weekly|all' },
+      });
+    }
+    if (typeof adviceKey !== 'string' || !adviceKey.trim()) {
+      return res.status(400).json({
+        ok: false, error: { code: 'BAD_REQUEST', message: 'adviceKey required' },
+      });
+    }
+    if (executor !== undefined && !VALID_EXECUTORS.includes(executor)) {
+      return res.status(400).json({
+        ok: false, error: { code: 'BAD_REQUEST', message: 'executor must be opencode or claude' },
+      });
+    }
+    const r = job.previewProjectExecution(db, {
+      project, scope, windowFrom: from, windowTo: to, adviceKey, executor,
+    });
+    if (!r.ok) return failure(res, r.reason, r);
+    res.json({
+      ok: true,
+      data: {
+        scope: r.scope,
+        adviceKey: r.adviceKey,
+        executor: r.executor,
+        project: r.project,
+        projectExists: r.projectExists,
+        cli: r.cli,
+        prompt: r.prompt,
+        item: r.item,
+      },
+    });
+  });
   // -------------------------------------------------------------------------
   //  Project-level start + list
   //

package/server/db/schema.js CHANGED Viewed

@@ -325,8 +325,11 @@ function initDatabase(db) {
     ['currency_rate', '1'],
     ['idle_threshold_minutes', '5'],
     ['llm_tool_preference', 'auto'],
-    // v2: opt-in LLM judge for E1/O1 dimensions
-    ['enable_llm_judge', '0'],
+    // v2: LLM judge for E1/O1 dimensions.  On by default for new
+    // installs — existing users' explicit '0' is preserved by the
+    // INSERT OR IGNORE seed below, so toggling this only flips fresh
+    // boss.db files.
+    ['enable_llm_judge', '1'],
   ];
   const stmt = db.prepare(

package/server/etl/opencode.js CHANGED Viewed

@@ -27,6 +27,7 @@ const {
   getEtlState,
   updateEtlState,
 } = require('../db/queries');
+const { canonicalProject } = require('../utils/project');
 // ---------------------------------------------------------------------------
 // Constants
@@ -210,7 +211,10 @@ function mapSession(row, msgCount, errCount, toolCount, agg = {}) {
     tokens_cache_read: agg.tokens_cache_read || 0,
     tokens_cache_write: agg.tokens_cache_write || 0,
     cost_usd: agg.cost_usd || 0,
-    project: row.directory || null,
+    // OpenCode sometimes records Windows drives as "C//felix/code/X" (the
+    // colon got dropped).  canonicalProject re-inserts the ":" so downstream
+    // cwd / whitelist / fs.existsSync checks work.  See server/utils/project.js.
+    project: canonicalProject(row.directory) || null,
     title: row.title || null,
     model: agg.model_id || null,
     error_count: errCount,

package/server/execution/job.js CHANGED Viewed

@@ -297,6 +297,140 @@ function cleanupOrphans(db) {
   return orphans.length;
 }
+// ---------------------------------------------------------------------------
+//  Public: previewExecution / previewProjectExecution
+//
+//  Lookup-only counterpart of startExecution.  Returns exactly the prompt
+//  text we would have piped through stdin to the executor, along with the
+//  resolved cwd, executor name and the shell command we would run.  Used
+//  by the UI to let the user see (and copy) the full instruction before
+//  hitting "执行", and as a manual fallback for cases the auto path can't
+//  serve (CLI missing, item.actionable=false, …).
+//
+//  Stays close to startExecution's resolution order so they can't drift:
+//    1. session / project + advice cache lookup
+//    2. AdviceItem lookup
+//    3. executor pick (default opencode, falls back to item.executor)
+//    4. prompt assembly via the same builder used in spawnRunAsync
+//
+//  We deliberately don't enforce actionable / whitelist / project-exists
+//  here — the UI wants to show this info even when the auto-run path is
+//  blocked.
+// ---------------------------------------------------------------------------
+function describeCliCommand(executor, cwd) {
+  // What spawnRunAsync → runExecutor will actually invoke.  Mirrors
+  // server/execution/runner.js (`opencode run` / `claude -p`).  The
+  // prompt arrives on stdin so we surface that fact in the command.
+  if (executor === 'claude') {
+    return {
+      bin: 'claude',
+      args: ['-p'],
+      cwd,
+      stdinIsPrompt: true,
+      shellHint: `cd ${cwd} && claude -p < prompt.txt`,
+    };
+  }
+  return {
+    bin: 'opencode',
+    args: ['run'],
+    cwd,
+    stdinIsPrompt: true,
+    shellHint: `cd ${cwd} && opencode run < prompt.txt`,
+  };
+}
+function previewExecution(db, { sessionId, adviceKey, executor: executorOpt }) {
+  const session = getSessionById(db, sessionId);
+  if (!session) return { ok: false, reason: 'no-session' };
+  const advice = loadAdvice(db, sessionId);
+  if (!advice) return { ok: false, reason: 'no-advice' };
+  const item = findAdviceItem(advice, adviceKey);
+  if (!item) return { ok: false, reason: 'no-advice-item' };
+  const executor = executorOpt || item.executor || 'opencode';
+  // Normalise — session.project may carry an OpenCode-source path like
+  // "C//felix/code/X" that won't pass fs.existsSync; canonicalProject
+  // re-inserts the missing ":".  Mirrors previewProjectExecution.
+  const project = canonicalProject(session.project || '') || '';
+  const recentUserMessages = fetchRecentUserMessages(db, sessionId);
+  const prompt = buildExecutionPrompt({
+    advice: item,
+    session: {
+      project,
+      title: session.title,
+      model: session.model,
+      durationMinutes: session.duration_minutes,
+      messageCount: session.message_count,
+    },
+    recentUserMessages,
+  });
+  return {
+    ok: true,
+    scope: 'session',
+    adviceKey,
+    executor,
+    item,
+    project,
+    projectExists: isValidProjectPath(project),
+    cli: describeCliCommand(executor, project),
+    prompt,
+  };
+}
+function previewProjectExecution(db, {
+  project: projectRaw, scope, windowFrom = '', windowTo = '',
+  adviceKey, executor: executorOpt,
+}) {
+  const project = canonicalProject(projectRaw || '');
+  if (!project) return { ok: false, reason: 'no-project' };
+  if (!scope || (scope !== 'all' && (!windowFrom || !windowTo))) {
+    return { ok: false, reason: 'no-window' };
+  }
+  const cached = loadProjectAdvice(
+    db, project, scope,
+    scope === 'all' ? '' : windowFrom,
+    scope === 'all' ? '' : windowTo
+  );
+  if (!cached || !cached.payload) return { ok: false, reason: 'no-advice' };
+  const item = findAdviceItem(cached.payload, adviceKey);
+  if (!item) return { ok: false, reason: 'no-advice-item' };
+  const executor = executorOpt || item.executor || 'opencode';
+  const prompt = buildProjectExecutionPrompt({
+    advice: item,
+    project: {
+      path: project,
+      scope,
+      windowFrom: scope === 'all' ? '' : windowFrom,
+      windowTo:   scope === 'all' ? '' : windowTo,
+      sessionCount: cached.sessionCount,
+    },
+    crossSessionPatterns: Array.isArray(cached.payload.crossSessionPatterns)
+      ? cached.payload.crossSessionPatterns
+      : [],
+  });
+  return {
+    ok: true,
+    scope: 'project',
+    adviceKey,
+    executor,
+    item,
+    project,
+    projectExists: isValidProjectPath(project),
+    cli: describeCliCommand(executor, project),
+    prompt,
+  };
+}
 // ---------------------------------------------------------------------------
 //  Public: startExecution
 // ---------------------------------------------------------------------------
@@ -344,7 +478,10 @@ async function startExecution(db, opts) {
     }
     // 5. Project path validity.
-    const project = session.project;
+    //    Normalise the same way preview does so cwd, whitelist comparison
+    //    and fs.existsSync all agree.  OpenCode-source paths can come in
+    //    as "C//felix/code/X" (colon dropped); canonicalProject fixes it.
+    const project = canonicalProject(session.project || '') || '';
     if (!isValidProjectPath(project)) {
       return { ok: false, reason: 'invalid-project-path', extra: { project } };
     }
@@ -385,7 +522,7 @@ async function startExecution(db, opts) {
     const promptBuilder = () => buildExecutionPrompt({
       advice: item,
       session: {
-        project: session.project,
+        project,
         title: session.title,
         model: session.model,
         durationMinutes: session.duration_minutes,
@@ -643,6 +780,8 @@ async function cancelRun(db, runId) {
 module.exports = {
   startExecution,
   startProjectExecution,
+  previewExecution,
+  previewProjectExecution,
   cancelRun,
   getRun,
   listRunsForAdvice,

package/server/llm/advice-prompt.js CHANGED Viewed

@@ -236,6 +236,7 @@ function buildAdvicePrompt(ctx) {
   - 流程节奏(回退次数、纠偏速度、是否过早收敛)
   - 是否适合引入一个 opencode skill 或 subagent 来自动化重复模式
   - 成本与 token 经济性
+  - **MCP 服务器使用**(见下「关于 MCP」一节)
 不要谈:
   - 对话中讨论的具体技术 / 代码是否正确(如「你写的 SQL JOIN 错了」、
@@ -247,6 +248,46 @@ function buildAdvicePrompt(ctx) {
 如果对话里只有业务讨论、看不出可改进的协作模式,5 个类别都给空数组,
 summary 写「本会话以业务讨论为主,协作模式无明显问题」。
+# 关于 MCP(Model Context Protocol)
+下文的「工具使用 Top 20」里,工具可能来自两类来源:
+  - **内置工具**:名字短、全小写、无分隔符。例如
+    \`read\` \`write\` \`edit\` \`bash\` \`glob\` \`grep\` \`todowrite\`
+    \`task\` \`skill\` \`websearch\` \`question\` (OpenCode)
+    以及 \`ls\` \`webfetch\` \`notebookread\` 等(Claude Code)。
+  - **MCP 工具**(由用户配置的外部 MCP 服务器提供):
+    - OpenCode 命名:\`<服务器名>_<方法名>\`(单下划线),例如
+      \`atlassian_getJiraIssue\` \`atlassian_searchConfluence\` —
+      "atlassian" 就是 MCP 服务器名。
+    - Claude Code 命名:\`mcp__<服务器名>__<方法名>\`(双下划线),例如
+      \`mcp__github__list_issues\`。
+请在分析时显式辨认 MCP 工具,并考虑以下角度(只在确实有迹象时谈,不要硬凑):
+  - **该不该用 MCP**:这一次任务里 MCP 工具是不是真的派上了用场?
+    如果调用了 MCP 但没真正解决问题(只是来回查),建议下次直接给 AI
+    具体信息或换一种问法。
+  - **MCP 调用错误率高**:某个 MCP 工具错误率明显偏高(从表中
+    err 列读),建议改用其他来源或先用一次手动调用确认参数。
+  - **重复 MCP 调用**:连续多次同名 MCP 调用拉同一类数据,建议下次
+    一次性指明需要的字段,或用本地缓存/文件代替。
+  - **应该用 MCP 而没用**:用户多次让 AI"上网搜"或手抄外部系统数据
+    (Jira/GitHub/Slack/Notion 等),而项目本应配置对应 MCP 直接拉,
+    可建议引入相应 MCP 服务器。
+按照上面 5 大类的归属:
+  - MCP 调用浪费/重复 → \`cost\`
+  - MCP 报错频繁、参数不对 → \`accuracy\`
+  - 缺少 MCP 上下文导致需要手抄数据 → \`context\`
+  - 反复出现的 MCP 调用模式可包成 skill → \`skills\`
+  - MCP 与人工/内置工具的分工节奏 → \`workflow\`
+不要新增类别,不要新增 AdviceItem 字段。
+evidence 里点名具体 MCP 工具(完整工具名,例如 \`atlassian_getJiraIssue\`)。
+如果对话中没有 MCP 工具,或 MCP 用得很合理,不要硬挑毛病。
 # 输出契约
 只输出严格 JSON,不要 markdown 代码块,不要多余文字。结构如下:
@@ -270,7 +311,7 @@ AdviceItem:
   "why":        "1 句话,说明协作上的问题",
   "action":     "1 句话,具体可操作的改变(下次怎么做)",
   "evidence":   "引自第 N 条消息 / 工具 X / 基础统计 — 必须是对话事实,不得引用任何评分",
-  "actionable": true | false,
+  "actionable": true | false,    // 见硬规则 6:仅当 AI 能在本机落盘改动时 true
   "executor":   "opencode" | "claude" | "manual",
   "cwd_hint":   "project_root"
 }
@@ -291,21 +332,43 @@ AdviceItem:
                  每条 action 给出:skill 名 + 触发条件 + 一句话用途。
    - workflow 流程与节奏:拆解、迭代步幅、回退策略、人 ↔ AI 分工。
-6. actionable=true 的条件必须同时满足:
-   - 是「写代码 / 改文件 / 加 skill」类具体动作;
-   - 在原项目根目录运行 opencode/claude 就能完成,无需补充人类专属知识。
-   不属于这一类(如「下次开场用模板」「以后多用缓存」「对 AI 的指令更
-   具体」)的 → actionable=false, executor='manual'。
+6. actionable=true 必须是「AI 在用户电脑上**真的会落盘的改动**」。
+   只有同时满足下面三点才能填 true:
+   a. **明确的产出物在文件系统里**——新建或修改某个具体的文件 /
+      配置项 / skill / 脚本 / 模板。可以一句话说清"AI 将创建/修改
+      \`<相对路径>\`"。
+   b. **不依赖人类专属知识**——AI 看着项目根目录就能做完;不需要
+      访问只有人才能拿到的密码、内部 wiki、外部账号、决策权。
+   c. **是项目级的、可重复受益的改变**——例如新增一个 skill、加一
+      条 lint 规则、写一份模板,而不是"修这一次的 bug"。
+   只要任何一点不满足 → actionable=false, executor='manual'。
+   **反例(下面这些必须是 actionable=false / executor='manual')**:
+   - 「下次开场用模板」「以后多用缓存」「下次先给 AI 文件路径」
+     —— 是让**人**改行为,不是落盘改动。
+   - 「换更便宜的模型」「关闭 reasoning」「用 sonnet 而非 opus」
+     —— 模型切换是用户在 client / 终端做的,不是 AI 改文件。
+   - 「检查代码是否正确」「再确认一遍 SQL」
+     —— 是要求人或 AI 验证,不是落盘动作。
+   - 「拆分任务」「分步提问」「先讨论再写代码」
+     —— 协作流程建议,只能人来执行。
+   - 「向团队同步」「写文档」(注:这条**有边界** —— 如果是让 AI
+     在仓库里新建/更新某个具体 .md 文件,可 actionable=true;
+     如果是"和同事开个会"则 false)。
 7. executor:
-   - 显式动手做的任务(创建文件、改代码、写 skill) → 'opencode' 或 'claude';
-   - 单纯让人类调整行为的建议 → 'manual'。
-   不会判断时填 'opencode'。
+   - 落盘类动作(创建/改文件、写 skill、改配置文件) → 'opencode' 或 'claude';
+   - 一切让人类调整行为/认知/沟通的建议 → 'manual'。
+   不会判断时,**默认填 'manual'**——错填 manual 只是少一个按钮;
+   错填 opencode 会让用户点了按钮后 AI 干一些莫名其妙的事。
 8. cwd_hint: 目前只能填 "project_root"。
-9. actionable 与 executor 必须一致:executor='manual' 时 actionable 必须 false;
-   executor 是 'opencode'/'claude' 时 actionable 通常 true。
+9. actionable 与 executor 必须一致:executor='manual' ⇔ actionable=false;
+   executor 是 'opencode'/'claude' ⇔ actionable=true。
+   **不允许出现 executor='opencode' 且 actionable=false 的组合**。
 # 会话基础(只作事实参考,不要换算成分数)

package/server/llm/advice.js CHANGED Viewed

@@ -357,11 +357,21 @@ function normaliseItem(it) {
   if (executor === 'manual') actionable = false;
   if (actionable && executor === 'manual') executor = 'opencode';
+  // Heuristic safety net: even with a tight prompt, LLMs sometimes mark
+  // "you should do X next time" advice as actionable=true, which would
+  // give the user an auto-execute button that does nonsense.  Detect
+  // unmistakeably human-action phrasing in `action` and downgrade.
+  const action = typeof it.action === 'string' ? it.action : '';
+  if (actionable && looksLikeHumanAction(action)) {
+    actionable = false;
+    executor = 'manual';
+  }
   return {
     severity,
     title:    typeof it.title === 'string'    ? it.title.trim()    : '',
     why:      typeof it.why === 'string'      ? it.why.trim()      : '',
-    action:   typeof it.action === 'string'   ? it.action.trim()   : '',
+    action:   action.trim(),
     evidence: typeof it.evidence === 'string' ? it.evidence.trim() : '',
     actionable,
     executor,
@@ -369,6 +379,44 @@ function normaliseItem(it) {
   };
 }
+/**
+ * Lightweight Chinese-text heuristic.  Returns true when `action` reads
+ * like a behaviour change the *user* needs to make, rather than a
+ * filesystem-level change an AI agent could carry out.
+ *
+ * Intentionally conservative — we only catch the obvious cases.  False
+ * negatives (we say "no" but it really is human-only) just leave an
+ * over-promising button; false positives (we say "yes" but it really
+ * could auto-run) hide a working button.  We prefer the former.
+ *
+ * Triggered phrases were collected from production LLM outputs where
+ * the model wrote actionable=true but `action` was clearly "next time
+ * the human should …".
+ */
+const HUMAN_ACTION_PATTERNS = [
+  // Next-time / future-tense markers
+  /下次/, /以后/, /未来/, /后续/, /今后/, /日后/,
+  // Switching model / runtime settings (user-side configuration, not repo files)
+  /换(成|为)?[\s]*(sonnet|opus|haiku|claude|gpt|gemini|grok|deepseek)/i,
+  /(切换|更换|改用|改成|降级|升级).{0,8}模型/,
+  /关(掉|闭|去).{0,4}(reasoning|推理|思考)/,
+  /启用.{0,4}(reasoning|推理)/,
+  /调低.{0,6}(reasoning|temperature|温度)/,
+  // Talk-to-humans
+  /和(团队|同事|领导|产品|设计)/, /与(团队|同事|领导)/,
+  /(告知|通知|同步给|抄送|沟通).{0,6}(团队|同事|领导|项目组)/,
+  /(开个|开一次|组织).{0,4}(会议|分享|评审|review)/i,
+  // Habit / process changes (no filesystem delta)
+  /(养成|形成|建立)[^。;；]{0,30}(习惯|惯例|节奏)/,
+  /(定期|每周|每天|每月).{0,8}(回顾|评审|检查|总结)/,
+  /(培训|学习|熟悉|掌握).{0,8}(用法|文档|规范)/,
+];
+function looksLikeHumanAction(action) {
+  if (!action || typeof action !== 'string') return false;
+  return HUMAN_ACTION_PATTERNS.some((re) => re.test(action));
+}
 // ---------------------------------------------------------------------------
 //  Exports
 // ---------------------------------------------------------------------------
@@ -381,4 +429,5 @@ module.exports = {
   // exported for tests / debugging:
   assembleContext,
   normaliseAdvicePayload,
+  looksLikeHumanAction,
 };