npm - agentboss - Versions diffs - 0.1.0 - Mend

agentboss 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/README.md +34 -0
package/bin/aboss.js +288 -0
package/client/dist/assets/index-C1wFD_Vo.css +1 -0
package/client/dist/assets/index-DBj1Ujlx.js +137 -0
package/client/dist/index.html +34 -0
package/package.json +64 -0
package/server/analysis/daily-aggregator.js +258 -0
package/server/analysis/difficulty.js +129 -0
package/server/analysis/dimensions/ai-knowledge.js +172 -0
package/server/analysis/dimensions/ai-tools.js +161 -0
package/server/analysis/dimensions/judgement.js +107 -0
package/server/analysis/dimensions/llm-merge.js +57 -0
package/server/analysis/dimensions/output-quality.js +167 -0
package/server/analysis/dimensions/problem-definition.js +104 -0
package/server/analysis/dimensions/system-thinking.js +225 -0
package/server/analysis/evidence-builder.js +104 -0
package/server/analysis/job.js +273 -0
package/server/analysis/report-builder.js +581 -0
package/server/analysis/scoring-v2.js +72 -0
package/server/analysis/text-signals.js +179 -0
package/server/analysis/thresholds-v2.js +358 -0
package/server/api/advice.js +124 -0
package/server/api/analysis.js +141 -0
package/server/api/execution.js +330 -0
package/server/api/metrics.js +277 -0
package/server/api/overview.js +308 -0
package/server/api/project.js +255 -0
package/server/api/reports.js +125 -0
package/server/api/sessions.js +118 -0
package/server/api/settings.js +119 -0
package/server/db/connection.js +175 -0
package/server/db/queries.js +1051 -0
package/server/db/schema.js +487 -0
package/server/etl/active-time.js +150 -0
package/server/etl/backfill-subagents.js +178 -0
package/server/etl/claude-code.js +826 -0
package/server/etl/detect.js +341 -0
package/server/etl/judge-filter.js +117 -0
package/server/etl/opencode.js +606 -0
package/server/execution/job.js +662 -0
package/server/execution/prompt.js +227 -0
package/server/execution/runner.js +218 -0
package/server/index.js +94 -0
package/server/llm/advice-prompt.js +339 -0
package/server/llm/advice.js +384 -0
package/server/llm/analysis-prompt.js +162 -0
package/server/llm/cli-runner.js +249 -0
package/server/llm/judge-prompts.js +179 -0
package/server/llm/judge.js +118 -0
package/server/llm/project-advice-prompt.js +332 -0
package/server/llm/project-advice.js +491 -0
package/server/llm/session-analyzer.js +122 -0
package/server/utils/project.js +80 -0

package/server/llm/project-advice-prompt.js ADDED Viewed

@@ -0,0 +1,332 @@
+/**
+ * Project-level AI-advice prompt builder.
+ *
+ * Where session advice asks the model to read ONE conversation, project
+ * advice asks the model to read N already-generated session advice
+ * payloads under the same project and produce a SECOND-ORDER summary:
+ *
+ *   "Across these 12 sessions in C:/felix/code/AgentBoss this week,
+ *    what patterns recur?  Where is this developer systematically
+ *    losing money / accuracy / context?  Which skill / workflow change
+ *    would compound across multiple sessions?"
+ *
+ * # Inputs
+ *
+ * The caller (server/llm/project-advice.js) gives us a `ProjectContext`:
+ *
+ *   {
+ *     project:      'C:/felix/code/AgentBoss',
+ *     scope:        'daily' | 'weekly' | 'all',
+ *     windowFrom:   '2026-06-12',
+ *     windowTo:     '2026-06-12',
+ *     stats:        { sessionCount, totalCost, totalTokens, totalErrors, totalActiveMinutes },
+ *     sessions:     [{ id, title, model, date, cost, msgCount, advice: {…} }, …]
+ *   }
+ *
+ * `advice` for each session is the already-cached `session_analysis.llm_advice`
+ * payload (categories, summary, rationale).  We feed the model the
+ * second-pass material — NOT the raw transcripts — so the prompt stays
+ * manageable even for projects with 50+ sessions.
+ *
+ * # Why not raw transcripts?
+ *
+ * (a) Cost / time: even 10 sessions × 80 KB each blows past any sane
+ *     prompt budget and would take minutes to judge.
+ * (b) Duplication: per-session advice already extracted the salient
+ *     facts.  Re-deriving them is just paying twice for the same insight.
+ * (c) Cross-session patterns are easier to spot when the model sees
+ *     compact summaries side by side than when buried in transcripts.
+ *
+ * # Output contract (mirrors session advice + crossSessionPatterns)
+ *
+ *   {
+ *     "summary":              "≤80 字总评",
+ *     "crossSessionPatterns": ["..."],  // 0-5 一句话总结的跨会话模式
+ *     "categories": {
+ *       "cost":     [AdviceItem, …],
+ *       "accuracy": [AdviceItem, …],
+ *       "context":  [AdviceItem, …],
+ *       "skills":   [AdviceItem, …],
+ *       "workflow": [AdviceItem, …]
+ *     },
+ *     "rationale":           "≤120 字综合理由"
+ *   }
+ *
+ * AdviceItem shape matches session advice's normaliseItem(), but
+ * `evidence` here cites SESSION IDs / patterns rather than message
+ * numbers, e.g. "出现于 12 个会话中 9 个 (sess-abc, sess-def, ...)".
+ *
+ * # Sentinel + versioning
+ *
+ * Re-uses ADVICE_SENTINEL so server/etl/judge-filter.js still filters
+ * the helper-LLM calls out of the user's own data.  Has its own
+ * VERSION constant so bumping the project prompt doesn't invalidate
+ * the session-level cache.
+ *
+ * @author Felix
+ */
+'use strict';
+const { ADVICE_SENTINEL } = require('./advice-prompt');
+/** Bump when project-advice OUTPUT contract changes. */
+const PROJECT_ADVICE_PROMPT_VERSION = 1;
+/** Hard categories — same enum the session prompt uses. */
+const CATEGORIES = ['cost', 'accuracy', 'context', 'skills', 'workflow'];
+/** Soft prompt budget.  Project advice fits much more comfortably than
+ *  session advice because per-session payloads are compact (~2–4 KB). */
+const DEFAULT_MAX_BYTES = 80_000;
+/** Cap per-session block when summaries are unusually verbose. */
+const PER_SESSION_HARD_CAP_CHARS = 1_500;
+// ---------------------------------------------------------------------------
+//  Truncation
+// ---------------------------------------------------------------------------
+/**
+ * Reduce the per-session blocks so the assembled prompt stays under
+ * `maxBytes`.  Strategy:
+ *   1. If total fits → no-op.
+ *   2. Truncate each session's serialised advice to PER_SESSION_HARD_CAP_CHARS.
+ *   3. Still over? Drop the oldest sessions first (keep most recent N).
+ *   4. Mark ctx.truncated accordingly so the prompt can disclose this.
+ *
+ * @param {object} ctx
+ * @param {number} [maxBytes=DEFAULT_MAX_BYTES]
+ * @returns {object} possibly-truncated ctx (shallow copy)
+ */
+function truncateContext(ctx, maxBytes = DEFAULT_MAX_BYTES) {
+  const out = { ...ctx, truncated: false, omittedSessions: 0 };
+  const sessions = Array.isArray(ctx.sessions) ? ctx.sessions.slice() : [];
+  out.sessions = sessions;
+  const estimate = (list) => {
+    const FIXED_OVERHEAD = 6_000;
+    return FIXED_OVERHEAD + list.reduce((n, s) => n + serialisedLength(s), 0);
+  };
+  if (estimate(sessions) <= maxBytes) return out;
+  // 2. Hard-cap each session's advice serialisation.
+  for (const s of sessions) {
+    if (!s._serialised) continue;
+    if (s._serialised.length > PER_SESSION_HARD_CAP_CHARS) {
+      s._serialised = s._serialised.slice(0, PER_SESSION_HARD_CAP_CHARS) + '…[trimmed]';
+    }
+  }
+  if (estimate(sessions) <= maxBytes) {
+    out.truncated = true;
+    return out;
+  }
+  // 3. Drop oldest sessions until we fit (keep at least 2).
+  while (out.sessions.length > 2 && estimate(out.sessions) > maxBytes) {
+    out.sessions.shift();
+    out.omittedSessions++;
+  }
+  out.truncated = 'hard';
+  return out;
+}
+function serialisedLength(s) {
+  return s && s._serialised ? s._serialised.length : 0;
+}
+// ---------------------------------------------------------------------------
+//  Per-session serialisation
+// ---------------------------------------------------------------------------
+/**
+ * Render a single session's advice payload into a compact text block
+ * suitable for inclusion in the prompt.  Only writes fields that
+ * carry signal — empty categories are dropped entirely.
+ */
+function serialiseSessionAdvice(sess) {
+  const lines = [];
+  lines.push(`## SESSION ${sess.id}`);
+  const meta = [];
+  if (sess.title) meta.push(`title="${sess.title.slice(0, 80)}"`);
+  if (sess.date) meta.push(`date=${sess.date}`);
+  if (sess.model) meta.push(`model=${sess.model}`);
+  if (sess.cost != null) meta.push(`cost=$${Number(sess.cost).toFixed(4)}`);
+  if (sess.msgCount != null) meta.push(`msgs=${sess.msgCount}`);
+  if (sess.errorCount) meta.push(`errors=${sess.errorCount}`);
+  if (meta.length) lines.push(meta.join(' · '));
+  const adv = sess.advice || {};
+  if (adv.summary) lines.push(`summary: ${adv.summary}`);
+  const cats = adv.categories || {};
+  for (const key of CATEGORIES) {
+    const items = Array.isArray(cats[key]) ? cats[key] : [];
+    if (!items.length) continue;
+    lines.push(`[${key}]`);
+    for (const it of items) {
+      const sev = it.severity || 'low';
+      const title = (it.title || '').slice(0, 80);
+      const why = (it.why || '').slice(0, 200);
+      lines.push(`  - (${sev}) ${title}`);
+      if (why) lines.push(`    why: ${why}`);
+    }
+  }
+  return lines.join('\n');
+}
+/**
+ * Mutate ctx in place: attach `_serialised` to each session so
+ * truncateContext can measure / trim.
+ */
+function annotateContext(ctx) {
+  for (const s of ctx.sessions || []) {
+    s._serialised = serialiseSessionAdvice(s);
+  }
+  return ctx;
+}
+// ---------------------------------------------------------------------------
+//  Formatting helpers
+// ---------------------------------------------------------------------------
+function fmtNum(n) {
+  if (n == null || Number.isNaN(n)) return '–';
+  if (typeof n !== 'number') return String(n);
+  if (Number.isInteger(n)) return n.toLocaleString('en-US');
+  return n.toFixed(3);
+}
+function fmtSessionBlocks(sessions) {
+  if (!Array.isArray(sessions) || sessions.length === 0) return '(无会话)';
+  return sessions.map((s) => s._serialised || serialiseSessionAdvice(s)).join('\n\n');
+}
+function fmtWindow(ctx) {
+  if (ctx.scope === 'all') return '全部历史';
+  if (ctx.windowFrom === ctx.windowTo) return ctx.windowFrom;
+  return `${ctx.windowFrom} → ${ctx.windowTo}`;
+}
+// ---------------------------------------------------------------------------
+//  Prompt
+// ---------------------------------------------------------------------------
+/**
+ * Assemble the full project-advice prompt.
+ *
+ * @param {object} ctx
+ * @returns {string}
+ */
+function buildProjectAdvicePrompt(ctx) {
+  const stats = ctx.stats || {};
+  const truncatedNote =
+    ctx.truncated === 'hard'
+      ? `（注意:会话过多,已强制丢弃最早 ${ctx.omittedSessions} 个会话。）`
+      : ctx.truncated
+      ? '（注意:部分会话摘要已截断。）'
+      : '';
+  return `${ADVICE_SENTINEL}（内部标记,忽略本行）
+你是一位 AI 协作教练。下面给你的不是单次会话,而是「同一个项目下多次会话已经被
+逐个分析后的结论列表」。你的任务:阅读这些 per-session 结论,做**跨会话二次
+汇总**,找出在这个项目上反复出现、值得系统性改进的协作模式。
+# 关键原则:做总结,不要重复
+每个 SESSION 块已经写好了该次会话的具体建议。你不需要逐条复述,而是要:
+  1. 找出在多个 session 之间**重复出现**的问题(例:5/12 个 session 都缺
+     初始上下文)。
+  2. 找出在单个 session 看似小事、但**跨会话累计起来代价巨大**的模式
+     (例:每次都不开缓存)。
+  3. 找出**项目特定**的改进点(skill / 流程 / 配置),因为只看一次会话
+     时很难判断是否值得建立 skill。
+  4. 反过来,如果某个建议**只在 1 个 session 里出现一次**,不要在项目级
+     再次强调它——那属于个例,不要污染项目级总览。
+# 评估对象与禁区
+可以谈:
+  - 跨多个 session 的协作模式(开场习惯、提问粒度、上下文准备)
+  - 项目内的成本结构(模型选择、token 使用、缓存命中)
+  - 是否应建立 / 完善 opencode skill 或 subagent
+  - 工具使用习惯(是否反复使用低效组合)
+  - 项目级流程瓶颈(测试节奏、回退频率)
+不要谈:
+  - 任何具体业务 / 代码层面的对错
+  - 单次会话内的局部细节(那应该回到 session 详情看)
+  - 任何评分体系(H1/H2/E1/O1/Lx/子分)字眼一律禁止
+# 输出契约
+只输出严格 JSON,不要 markdown 代码块,不要多余文字。结构:
+{
+  "summary":              "≤80 字一句话总评(只谈协作)",
+  "crossSessionPatterns": ["≤30 字的跨会话模式描述", ...],   // 0-5 条
+  "categories": {
+    "cost":     [AdviceItem, ...],
+    "accuracy": [AdviceItem, ...],
+    "context":  [AdviceItem, ...],
+    "skills":   [AdviceItem, ...],
+    "workflow": [AdviceItem, ...]
+  },
+  "rationale": "≤120 字综合理由(只谈协作)"
+}
+AdviceItem(项目级):
+{
+  "severity":   "high" | "medium" | "low",
+  "title":      "≤24 字",
+  "why":        "1-2 句话,说明这个问题在多个会话中如何重复或累积",
+  "action":     "1 句话,项目级别可落地的改变(skill / 配置 / 流程)",
+  "evidence":   "必须引用具体会话证据,例:出现于 7/12 个会话(sess-abc, sess-def, ...)",
+  "actionable": true | false,
+  "executor":   "opencode" | "claude" | "manual",
+  "cwd_hint":   "project_root"
+}
+# 硬规则
+1. 5 个 categories 键必须存在,无内容给空数组。
+2. AdviceItem 总数 ≤ 5 条,按 severity 由高到低排;**严格优先**跨多
+   session 重复出现的问题,单例问题不要列入。
+3. evidence 必须指向具体 session id 或可量化的统计(如 "9/12 会话
+   未提供初始文件路径")。**禁止**仅引用单一会话内的某条消息。
+4. 出现「分数」「等级」「Lx」「子分」「H1」「H2」「E1」「O1」字眼一律违规。
+5. action 必须是「项目级」可落地动作(写一个 skill / 改一个配置 / 形成
+   一条惯例),不是「下次开场更具体一些」这种纯口头建议。
+6. actionable / executor / cwd_hint 规则与单 session 版本相同;manual
+   时 actionable 必须 false。
+7. 如果没有发现任何值得 project 级别报告的问题(全是个例),把所有
+   categories 设为空数组,summary 写「未发现项目级别的系统性协作问题」。
+# 项目基础
+项目:           ${ctx.project || '?'}
+分析范围:       ${ctx.scope || '?'} · ${fmtWindow(ctx)}
+分析的会话数:   ${fmtNum(stats.sessionCount)}
+活跃时长合计:   ${fmtNum(stats.totalActiveMinutes)} 分钟
+总成本:         $${typeof stats.totalCost === 'number' ? stats.totalCost.toFixed(4) : '–'}
+总 token:       ${fmtNum(stats.totalTokens)}
+错误总数:       ${fmtNum(stats.totalErrors)}
+# 各会话的 per-session 结论 ${truncatedNote}
+${fmtSessionBlocks(ctx.sessions || [])}
+—— 输出 JSON ——`;
+}
+module.exports = {
+  PROJECT_ADVICE_PROMPT_VERSION,
+  CATEGORIES,
+  DEFAULT_MAX_BYTES,
+  buildProjectAdvicePrompt,
+  truncateContext,
+  annotateContext,
+  serialiseSessionAdvice,
+};