npm - agentboss - Versions diffs - 0.1.0 - Mend

agentboss 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/README.md +34 -0
package/bin/aboss.js +288 -0
package/client/dist/assets/index-C1wFD_Vo.css +1 -0
package/client/dist/assets/index-DBj1Ujlx.js +137 -0
package/client/dist/index.html +34 -0
package/package.json +64 -0
package/server/analysis/daily-aggregator.js +258 -0
package/server/analysis/difficulty.js +129 -0
package/server/analysis/dimensions/ai-knowledge.js +172 -0
package/server/analysis/dimensions/ai-tools.js +161 -0
package/server/analysis/dimensions/judgement.js +107 -0
package/server/analysis/dimensions/llm-merge.js +57 -0
package/server/analysis/dimensions/output-quality.js +167 -0
package/server/analysis/dimensions/problem-definition.js +104 -0
package/server/analysis/dimensions/system-thinking.js +225 -0
package/server/analysis/evidence-builder.js +104 -0
package/server/analysis/job.js +273 -0
package/server/analysis/report-builder.js +581 -0
package/server/analysis/scoring-v2.js +72 -0
package/server/analysis/text-signals.js +179 -0
package/server/analysis/thresholds-v2.js +358 -0
package/server/api/advice.js +124 -0
package/server/api/analysis.js +141 -0
package/server/api/execution.js +330 -0
package/server/api/metrics.js +277 -0
package/server/api/overview.js +308 -0
package/server/api/project.js +255 -0
package/server/api/reports.js +125 -0
package/server/api/sessions.js +118 -0
package/server/api/settings.js +119 -0
package/server/db/connection.js +175 -0
package/server/db/queries.js +1051 -0
package/server/db/schema.js +487 -0
package/server/etl/active-time.js +150 -0
package/server/etl/backfill-subagents.js +178 -0
package/server/etl/claude-code.js +826 -0
package/server/etl/detect.js +341 -0
package/server/etl/judge-filter.js +117 -0
package/server/etl/opencode.js +606 -0
package/server/execution/job.js +662 -0
package/server/execution/prompt.js +227 -0
package/server/execution/runner.js +218 -0
package/server/index.js +94 -0
package/server/llm/advice-prompt.js +339 -0
package/server/llm/advice.js +384 -0
package/server/llm/analysis-prompt.js +162 -0
package/server/llm/cli-runner.js +249 -0
package/server/llm/judge-prompts.js +179 -0
package/server/llm/judge.js +118 -0
package/server/llm/project-advice-prompt.js +332 -0
package/server/llm/project-advice.js +491 -0
package/server/llm/session-analyzer.js +122 -0
package/server/utils/project.js +80 -0

package/server/analysis/dimensions/system-thinking.js ADDED Viewed

@@ -0,0 +1,225 @@
+/**
+ * H3 — System Thinking (rolling-window aggregate, NOT per-session).
+ *
+ * Captures whether the operator's prompting style gets more consistent,
+ * more abstract, and less repetitive over time.
+ *   • consistency  — Jaccard similarity of user-prompt token sets across
+ *                    same-project sessions
+ *   • dedup        — share of sessions whose first prompt is highly
+ *                    similar (>=0.6 Jaccard) to a previous one
+ *   • refactor     — refactor-vocabulary occurrences normalised per 100 sessions
+ *   • abstraction  — abstraction-vocabulary token share in user messages
+ *
+ * Result is keyed by (period, end_date, window_days) and stored in
+ * capability_rollup_v2.  Caller decides the window (7d weekly, 30d
+ * monthly).
+ *
+ * See spec §4.3.
+ *
+ * @author Felix
+ */
+'use strict';
+const { queryAll } = require('../../db/queries');
+const {
+  matchesAny,
+  termOccurrences,
+  REFACTOR_PATTERNS,
+  ABSTRACTION_TERMS,
+} = require('../text-signals');
+const { evalIndicator, rollupDimension, scoreToLevel, H3 } = require('../thresholds-v2');
+// ---------------------------------------------------------------------------
+//  Helpers
+// ---------------------------------------------------------------------------
+const STOP = new Set([
+  'the','a','an','is','are','to','of','in','on','for','and','or','but','if','it',
+  '我','你','他','她','它','的','了','在','是','和','与','也','就','都','或',
+]);
+/** Tokenise a string into a Set of lowercased word-like tokens. */
+function tokenSet(text) {
+  if (!text) return new Set();
+  const tokens = String(text)
+    .toLowerCase()
+    .split(/[^\p{L}\p{N}_]+/u)
+    .filter((t) => t && t.length >= 2 && !STOP.has(t));
+  return new Set(tokens);
+}
+function jaccard(a, b) {
+  if (a.size === 0 && b.size === 0) return 1;
+  if (a.size === 0 || b.size === 0) return 0;
+  let inter = 0;
+  for (const t of a) if (b.has(t)) inter++;
+  const uni = a.size + b.size - inter;
+  return uni === 0 ? 0 : inter / uni;
+}
+// ---------------------------------------------------------------------------
+//  Aggregator
+// ---------------------------------------------------------------------------
+/**
+ * @param {object} db
+ * @param {string} fromDate  YYYY-MM-DD inclusive
+ * @param {string} toDate    YYYY-MM-DD inclusive
+ * @returns {{
+ *   subScores: object,
+ *   subLevels: object,
+ *   raw: object,
+ *   score: number|null,
+ *   level: number|null,
+ *   sessionCount: number
+ * }}
+ */
+function analyzeRange(db, fromDate, toDate) {
+  // Pull all sessions in window
+  const sessions = queryAll(
+    db,
+    `SELECT id, project, title, date
+       FROM unified_session
+      WHERE date >= ? AND date <= ?
+      ORDER BY started_at ASC`,
+    [fromDate, toDate]
+  );
+  if (sessions.length === 0) {
+    return emptyResult(0);
+  }
+  // Pull all user messages in window (one query is cheaper than N)
+  const userMsgs = queryAll(
+    db,
+    `SELECT m.session_id, m.timestamp, m.text
+       FROM unified_message m
+       JOIN unified_session s ON s.id = m.session_id
+      WHERE m.role = 'user'
+        AND s.date >= ? AND s.date <= ?
+        AND m.text IS NOT NULL
+      ORDER BY m.timestamp ASC`,
+    [fromDate, toDate]
+  );
+  // Group user messages by session
+  const bySession = Object.create(null);
+  for (const m of userMsgs) {
+    (bySession[m.session_id] || (bySession[m.session_id] = [])).push(m);
+  }
+  // ---- consistency: average pairwise Jaccard between session FIRST user msgs ----
+  const firstPrompts = sessions.map((s) => {
+    const msgs = bySession[s.id] || [];
+    return { session: s, text: msgs[0]?.text || '', tokens: tokenSet(msgs[0]?.text) };
+  }).filter((p) => p.tokens.size > 0);
+  let consistency = null;
+  if (firstPrompts.length >= 2) {
+    // Pairwise within the same project; if no project repeats, fall back
+    // to global pairwise so we still produce a number for solo projects.
+    const byProj = Object.create(null);
+    for (const p of firstPrompts) {
+      const k = p.session.project || '__none__';
+      (byProj[k] || (byProj[k] = [])).push(p);
+    }
+    const sims = [];
+    for (const group of Object.values(byProj)) {
+      if (group.length < 2) continue;
+      for (let i = 0; i < group.length; i++) {
+        for (let j = i + 1; j < group.length; j++) {
+          sims.push(jaccard(group[i].tokens, group[j].tokens));
+        }
+      }
+    }
+    if (sims.length === 0) {
+      // global fallback
+      for (let i = 0; i < firstPrompts.length; i++) {
+        for (let j = i + 1; j < firstPrompts.length; j++) {
+          sims.push(jaccard(firstPrompts[i].tokens, firstPrompts[j].tokens));
+        }
+      }
+    }
+    if (sims.length > 0) {
+      consistency = sims.reduce((a, b) => a + b, 0) / sims.length;
+    }
+  }
+  // ---- dedup: % of sessions whose first prompt is highly similar (>=0.6) to a previous session ----
+  let dedup = null;
+  if (firstPrompts.length >= 2) {
+    let dupes = 0;
+    for (let i = 1; i < firstPrompts.length; i++) {
+      for (let j = 0; j < i; j++) {
+        if (jaccard(firstPrompts[i].tokens, firstPrompts[j].tokens) >= 0.6) {
+          dupes++;
+          break;
+        }
+      }
+    }
+    dedup = dupes / firstPrompts.length;
+  }
+  // ---- refactor: occurrences per 100 sessions ----
+  let refactorHits = 0;
+  for (const m of userMsgs) {
+    if (m.text && matchesAny(m.text, REFACTOR_PATTERNS)) refactorHits++;
+  }
+  const refactor = sessions.length > 0 ? (refactorHits / sessions.length) * 100 : null;
+  // ---- abstraction: vocab share in user messages ----
+  let totalTokens = 0;
+  let abstractTokens = 0;
+  for (const m of userMsgs) {
+    if (!m.text) continue;
+    const toks = m.text.split(/[^\p{L}\p{N}_]+/u).filter(Boolean);
+    totalTokens += toks.length;
+    abstractTokens += termOccurrences(m.text, ABSTRACTION_TERMS);
+  }
+  const abstraction = totalTokens > 0 ? abstractTokens / totalTokens : null;
+  // ---- evaluate against thresholds (difficulty-agnostic — 'all') ----
+  const consE  = evalIndicator(H3.consistency, consistency, 2);
+  const dedupE = evalIndicator(H3.dedup,       dedup,       2);
+  const refE   = evalIndicator(H3.refactor,    refactor,    2);
+  const absE   = evalIndicator(H3.abstraction, abstraction, 2);
+  const subScores = {
+    consistency: consE.score,
+    dedup:       dedupE.score,
+    refactor:    refE.score,
+    abstraction: absE.score,
+  };
+  const subLevels = {
+    consistency: consE.level,
+    dedup:       dedupE.level,
+    refactor:    refE.level,
+    abstraction: absE.level,
+  };
+  const score = rollupDimension('H3', subScores);
+  const level = scoreToLevel(score);
+  return {
+    subScores,
+    subLevels,
+    raw: { consistency, dedup, refactor, abstraction, refactorHits, abstractTokens, totalTokens },
+    score,
+    level,
+    sessionCount: sessions.length,
+  };
+}
+function emptyResult(n) {
+  return {
+    subScores: { consistency: null, dedup: null, refactor: null, abstraction: null },
+    subLevels: { consistency: null, dedup: null, refactor: null, abstraction: null },
+    raw: {},
+    score: null,
+    level: null,
+    sessionCount: n,
+  };
+}
+module.exports = { analyzeRange };

package/server/analysis/evidence-builder.js ADDED Viewed

@@ -0,0 +1,104 @@
+/**
+ * Shared evidence-builder for dimension scorers.
+ *
+ * Produces a structured "why" object the UI tooltip renders without
+ * needing to re-implement any threshold tables on the client.  Each
+ * dimension scorer feeds in:
+ *   - key / label / what : indicator identity + 1-sentence "how it's measured"
+ *   - expl               : result of thresholds-v2.explainIndicator()
+ *   - unit               : '次' / '%' / '轮' / ratio etc. Formats the
+ *                          observed value the same way thresholds are
+ *                          formatted, so band text reads naturally.
+ *
+ * @author Felix
+ */
+'use strict';
+const DIFFICULTY_LABEL = { 1: '琐碎', 2: '常规', 3: '复杂', 4: '重型' };
+/**
+ * Format a raw scalar according to the indicator's `unit`.
+ *   '%'   → 12.3%
+ *   'x'   → 1.45x   (multiplier / ratio)
+ *   else  → integer if integer else 2-decimal
+ */
+function fmtNum(v, unit) {
+  if (v == null || Number.isNaN(v)) return '-';
+  if (unit === '%') return (v * 100).toFixed(1) + '%';
+  if (unit === 'x') return v.toFixed(2) + '×';
+  if (typeof v === 'number') return Number.isInteger(v) ? String(v) : v.toFixed(2);
+  return String(v);
+}
+function fmtObserved(v, unit) {
+  const num = fmtNum(v, unit);
+  if (!unit || unit === '%' || unit === 'x') return num;
+  return num + ' ' + unit;
+}
+/**
+ * Describe in plain Chinese which band the value fell into.
+ */
+function describeBand(direction, bounds, level, unit) {
+  if (!bounds || level == null) return '';
+  if (direction === 'lower') {
+    if (level === 4) return `≤ ${fmtNum(bounds.L4, unit)}（L4 专家档）`;
+    if (level === 3) return `> ${fmtNum(bounds.L4, unit)} 且 ≤ ${fmtNum(bounds.L3, unit)}（L3 精通档）`;
+    if (level === 2) return `> ${fmtNum(bounds.L3, unit)} 且 ≤ ${fmtNum(bounds.L2, unit)}（L2 熟练档）`;
+    return `> ${fmtNum(bounds.L2, unit)}（L1 新手档）`;
+  }
+  if (direction === 'higher') {
+    if (level === 4) return `≥ ${fmtNum(bounds.L4, unit)}（L4 专家档）`;
+    if (level === 3) return `≥ ${fmtNum(bounds.L3, unit)} 且 < ${fmtNum(bounds.L4, unit)}（L3 精通档）`;
+    if (level === 2) return `≥ ${fmtNum(bounds.L2, unit)} 且 < ${fmtNum(bounds.L3, unit)}（L2 熟练档）`;
+    return `< ${fmtNum(bounds.L2, unit)}（L1 新手档）`;
+  }
+  if (direction === 'band') {
+    const b = bounds[`L${level}`];
+    if (Array.isArray(b)) {
+      return `落在 [${fmtNum(b[0], unit)}, ${fmtNum(b[1], unit)}] 区间（L${level} 档）`;
+    }
+    return `落在 L${level} 档`;
+  }
+  return '';
+}
+/**
+ * Build the evidence object the API ships to the UI tooltip.
+ */
+function makeEvidence({ key, label, what, expl, unit, difficulty }) {
+  const { value, level, score, direction, bounds } = expl || {};
+  const diffLabel = `${difficulty} ${DIFFICULTY_LABEL[difficulty] || ''}`.trim();
+  if (value == null || level == null) {
+    return {
+      key, label, what,
+      observed: null,
+      level: null,
+      score: null,
+      direction,
+      bounds,
+      difficulty,
+      reason: '数据不足，未计分（信号缺失或会话过短）',
+    };
+  }
+  const observed = fmtObserved(value, unit);
+  const bandText = describeBand(direction, bounds, level, unit);
+  const reason = `难度档位 ${diffLabel}，观测到 ${observed}，${bandText} → 得分 ${score}`;
+  return {
+    key, label, what,
+    observed,
+    rawValue: value,
+    level,
+    score,
+    direction,
+    bounds,
+    difficulty,
+    reason,
+  };
+}
+module.exports = { makeEvidence, fmtNum, fmtObserved, describeBand, DIFFICULTY_LABEL };

package/server/analysis/job.js ADDED Viewed

@@ -0,0 +1,273 @@
+/**
+ * Analysis Job Scheduler for Agent Boss
+ *
+ * Orchestrates session analysis: walks unanalyzed sessions in reverse
+ * chronological order (most-recent date first), scores each one, then
+ * aggregates daily summaries. See design doc §6.6.
+ *
+ * @author Felix
+ */
+const {
+  getUnanalyzedSessions,
+  upsertSessionAnalysis,
+  getAnalysisState,
+  updateAnalysisState,
+  getSessionsByDate,
+} = require('../db/queries');
+const { analyzeSessionV2 } = require('./scoring-v2');
+const { normaliseAdvicePayload } = require('../llm/advice');
+const { aggregateDailySummary } = require('./daily-aggregator');
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+/**
+ * Build a list of YYYY-MM-DD strings starting from today going back
+ * `days` days, ordered most-recent first.  Today is included so the
+ * current day's sessions get scored too; sessions that keep growing
+ * after being scored are re-picked by getUnanalyzedSessions (ended_at
+ * newer than analyzed_at).
+ *
+ * @param {number} days
+ * @returns {string[]}
+ */
+function buildDateList(days) {
+  const dates = [];
+  const now = new Date();
+  for (let i = 0; i <= days; i++) {
+    const d = new Date(now);
+    d.setDate(d.getDate() - i);
+    const yyyy = d.getFullYear();
+    const mm = String(d.getMonth() + 1).padStart(2, '0');
+    const dd = String(d.getDate()).padStart(2, '0');
+    dates.push(`${yyyy}-${mm}-${dd}`);
+  }
+  return dates; // already most-recent first
+}
+// ---------------------------------------------------------------------------
+// Per-session analyze + persist (shared by the job loop and the
+// per-session reanalyze endpoint)
+// ---------------------------------------------------------------------------
+/**
+ * Run v2 analysis for ONE session and persist everything:
+ *   - session_analysis v2 columns (via upsertSessionAnalysis)
+ *   - llm_advice (the advice half of the combined v2 LLM call)
+ *
+ * @param {object}  db
+ * @param {object}  session   unified_session row
+ * @param {object} [opts]     { force?: boolean } bypass the analyzer cache
+ * @returns {Promise<object|null>}  the v2 result (scores/levels/advice) or null
+ */
+async function analyzeAndStoreSession(db, session, opts = {}) {
+  const analysis = {
+    session_id: session.id,
+    source: session.source,
+    analyzed_at: new Date().toISOString(),
+    status: 'done',
+  };
+  let v2 = null;
+  try { v2 = await analyzeSessionV2(db, session, { force: opts.force === true }); }
+  catch (_) { /* fall through with status=done but empty v2 fields */ }
+  if (v2) {
+    analysis.difficulty    = v2.difficulty.bucket;
+    analysis.score_h1      = v2.scores.H1;  analysis.level_h1 = v2.levels.H1;
+    analysis.score_h2      = v2.scores.H2;  analysis.level_h2 = v2.levels.H2;
+    analysis.score_h3      = v2.scores.H3;  analysis.level_h3 = v2.levels.H3;
+    analysis.score_e1      = v2.scores.E1;  analysis.level_e1 = v2.levels.E1;
+    analysis.score_e2      = v2.scores.E2;  analysis.level_e2 = v2.levels.E2;
+    analysis.score_o1      = v2.scores.O1;  analysis.level_o1 = v2.levels.O1;
+    analysis.sub_scores_v2 = JSON.stringify({
+      subScores:   v2.subScores,
+      subLevels:   v2.subLevels,
+      subEvidence: v2.subEvidence,
+    });
+    analysis.llm_judge_v2  = v2.llmJudge ? JSON.stringify(v2.llmJudge) : null;
+    analysis.judge_source  = v2.judgeSource;
+  }
+  upsertSessionAnalysis(db, analysis);
+  // Persist the advice half to llm_advice (separate column; the upsert
+  // above doesn't touch it).
+  if (v2 && v2.llmAdvice) {
+    try {
+      const m = v2.llmAdviceMeta || {};
+      const norm = normaliseAdvicePayload(v2.llmAdvice, {
+        msgCount: m.msgCount || session.message_count || 0,
+        cli: m.cli || null,
+        truncated: false,
+        omittedMessages: 0,
+      });
+      db.run('UPDATE session_analysis SET llm_advice = ? WHERE session_id = ?',
+        [JSON.stringify(norm), session.id]);
+    } catch (_) { /* advice persistence is best-effort */ }
+  }
+  return v2;
+}
+// ---------------------------------------------------------------------------
+// Main job
+// ---------------------------------------------------------------------------
+/**
+ * Run analysis job: analyze unanalyzed sessions in reverse chronological order.
+ * Default: last 7 days. Processes one date at a time, most recent first.
+ *
+ * @param {object} db - sql.js boss.db instance
+ * @param {object} options - {
+ *   days: 7,
+ *   onProgress: fn,
+ *   forceReanalyze: false,
+ *   dates: string[]  // optional explicit YYYY-MM-DD list; overrides `days`
+ * }
+ * @returns {Promise<{analyzed: number, errors: number, skipped: number}>}
+ */
+async function runAnalysisJob(db, options = {}) {
+  const {
+    days = 7,
+    onProgress = null,
+    forceReanalyze = false,
+    dates: explicitDates = null,
+  } = options;
+  const result = { analyzed: 0, errors: 0, skipped: 0 };
+  // 1. Mark analysis as running
+  updateAnalysisState(db, {
+    status: 'running',
+    current_date: null,
+    analyzed_count: 0,
+    total_count: 0,
+    last_analyzed_at: null,
+  });
+  const dates = Array.isArray(explicitDates) && explicitDates.length > 0
+    ? explicitDates.slice().sort().reverse()  // most-recent first, matching buildDateList
+    : buildDateList(days);
+  try {
+    // Pre-calculate total count for progress reporting
+    let totalSessions = 0;
+    for (const date of dates) {
+      const sessions = forceReanalyze
+        ? getSessionsByDate(db, date)
+        : getUnanalyzedSessions(db, date);
+      totalSessions += sessions.length;
+    }
+    updateAnalysisState(db, {
+      status: 'running',
+      current_date: dates[0] || null,
+      analyzed_count: 0,
+      total_count: totalSessions,
+      last_analyzed_at: null,
+    });
+    // 2. Process each date (most recent first)
+    for (const date of dates) {
+      updateAnalysisState(db, {
+        status: 'running',
+        current_date: date,
+        analyzed_count: result.analyzed,
+        total_count: totalSessions,
+        last_analyzed_at: null,
+      });
+      // 2a. Get unanalyzed sessions for this date
+      const sessions = forceReanalyze
+        ? getSessionsByDate(db, date)
+        : getUnanalyzedSessions(db, date);
+      if (sessions.length === 0) {
+        result.skipped++;
+        continue;
+      }
+      // 2b. Analyze each session
+      for (const session of sessions) {
+        try {
+          // One combined LLM call (scores + advice); persisted to the v2
+          // columns + llm_advice.  forceReanalyze bypasses the analyzer cache.
+          await analyzeAndStoreSession(db, session, { force: forceReanalyze });
+          result.analyzed++;
+          // Update progress
+          updateAnalysisState(db, {
+            status: 'running',
+            current_date: date,
+            analyzed_count: result.analyzed,
+            total_count: totalSessions,
+            last_analyzed_at: new Date().toISOString(),
+          });
+          if (onProgress) {
+            onProgress({
+              date,
+              sessionId: session.id,
+              analyzed: result.analyzed,
+              total: totalSessions,
+              errors: result.errors,
+            });
+          }
+        } catch (err) {
+          // Mark session analysis as error, continue with next
+          result.errors++;
+          upsertSessionAnalysis(db, {
+            session_id: session.id,
+            source: session.source,
+            analyzed_at: new Date().toISOString(),
+            status: 'error',
+          });
+          if (onProgress) {
+            onProgress({
+              date,
+              sessionId: session.id,
+              analyzed: result.analyzed,
+              total: totalSessions,
+              errors: result.errors,
+              error: err.message,
+            });
+          }
+        }
+      }
+      // 2d. Aggregate daily summary after processing all sessions for this date
+      try {
+        aggregateDailySummary(db, date);
+      } catch (err) {
+        // Non-fatal: log but continue with next date
+        if (onProgress) {
+          onProgress({
+            date,
+            aggregationError: err.message,
+          });
+        }
+      }
+    }
+  } finally {
+    // 5. Always reset analysis state to idle when done
+    updateAnalysisState(db, {
+      status: 'idle',
+      current_date: null,
+      analyzed_count: result.analyzed,
+      total_count: result.analyzed + result.errors + result.skipped,
+      last_analyzed_at: new Date().toISOString(),
+    });
+  }
+  return result;
+}
+module.exports = { runAnalysisJob, buildDateList, analyzeAndStoreSession };