npm - agentboss - Versions diffs - 0.1.0 → 0.1.2 - Mend

agentboss 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/bin/aboss.js +288 -288
package/client/dist/assets/index-DxoLOxZ8.js +141 -0
package/client/dist/index.html +1 -1
package/package.json +1 -1
package/server/analysis/dimensions/judgement.js +111 -107
package/server/analysis/dimensions/llm-merge.js +59 -57
package/server/analysis/dimensions/output-quality.js +167 -167
package/server/analysis/dimensions/problem-definition.js +109 -104
package/server/analysis/job.js +91 -14
package/server/analysis/report-builder.js +574 -581
package/server/analysis/scoring-v2.js +126 -72
package/server/analysis/thresholds-v2.js +364 -358
package/server/api/execution.js +94 -0
package/server/db/schema.js +5 -2
package/server/etl/opencode.js +5 -1
package/server/execution/job.js +141 -2
package/server/llm/advice-prompt.js +74 -11
package/server/llm/advice.js +50 -1
package/server/llm/analysis-prompt.js +173 -162
package/server/llm/cli-runner.js +18 -2
package/server/llm/judge.js +6 -1
package/server/llm/mcp-classify.js +147 -0
package/server/llm/project-advice-prompt.js +106 -6
package/server/llm/project-advice.js +55 -2
package/server/llm/session-analyzer.js +10 -1
package/client/dist/assets/index-DBj1Ujlx.js +0 -137

package/server/analysis/scoring-v2.js CHANGED Viewed

@@ -1,72 +1,126 @@
-'use strict';
-const { classifySession } = require('./difficulty');
-const problemDef    = require('./dimensions/problem-definition');
-const judgement     = require('./dimensions/judgement');
-const aiTools       = require('./dimensions/ai-tools');
-const aiKnowledge   = require('./dimensions/ai-knowledge');
-const outputQuality = require('./dimensions/output-quality');
-const { analyzeSessionLLM } = require('../llm/session-analyzer');
-const { dimensionSource } = require('./dimensions/llm-merge');
-const { rollupDimension, scoreToLevel } = require('./thresholds-v2');
-/**
- * Score a single session through the v2.1 dimensions.
- * ONE consolidated LLM call per session returns BOTH the dimension scores
- * AND the collaboration advice.  All difficulties (including trivial) are
- * assessed; the prompt is difficulty-conditioned so short sessions are
- * graded on an easier curve.
- *
- * @param {object} db
- * @param {object} session
- * @param {object} [opts]   { force?: boolean }  bypass the analyzer cache
- */
-async function analyzeSessionV2(db, session, opts = {}) {
-  const difficulty = classifySession(session);
-  // E2 (tool coverage) is structural — always computed.
-  const e2 = safe(() => aiTools.analyze(db, session, difficulty.bucket));
-  // One consolidated LLM call → { scores, advice } (null if disabled/failed).
-  let llm = null;
-  try { llm = await analyzeSessionLLM(db, session, { force: opts.force === true }); }
-  catch (_) { llm = null; }
-  const cells = (llm && llm.scores) || {};
-  const h1 = safe(() => problemDef.analyze(db, session, difficulty.bucket, cells.H1));
-  const h2 = safe(() => judgement.analyze(db, session, difficulty.bucket, cells.H2));
-  const e1 = safe(() => aiKnowledge.analyze(db, session, difficulty.bucket, cells.E1));
-  const o1 = safe(() => outputQuality.analyze(db, session, difficulty.bucket, cells.O1));
-  const judgeSrc = dimensionSource([h1, h2, e1, o1].map((r) => r && r.judgeSource));
-  return assemble(difficulty, { h1, h2, e1, e2, o1 }, llm, judgeSrc);
-}
-/** Build the standard result object from the per-dimension scorer outputs. */
-function assemble(difficulty, dims, llm, judgeSource) {
-  const { h1, h2, e1, e2, o1 } = dims;
-  const pick = (r, f) => (r ? r[f] : null);
-  const subScores = { H1: pick(h1, 'subScores'), H2: pick(h2, 'subScores'), H3: null, E1: pick(e1, 'subScores'), E2: pick(e2, 'subScores'), O1: pick(o1, 'subScores') };
-  const subLevels = { H1: pick(h1, 'subLevels'), H2: pick(h2, 'subLevels'), H3: null, E1: pick(e1, 'subLevels'), E2: pick(e2, 'subLevels'), O1: pick(o1, 'subLevels') };
-  const subEvidence = { H1: pick(h1, 'subEvidence'), H2: pick(h2, 'subEvidence'), H3: null, E1: pick(e1, 'subEvidence'), E2: pick(e2, 'subEvidence'), O1: pick(o1, 'subEvidence') };
-  const scores = { H1: pick(h1, 'score'), H2: pick(h2, 'score'), H3: null, E1: pick(e1, 'score'), E2: pick(e2, 'score'), O1: pick(o1, 'score') };
-  const levels = { H1: scoreToLevel(scores.H1), H2: scoreToLevel(scores.H2), H3: null, E1: scoreToLevel(scores.E1), E2: scoreToLevel(scores.E2), O1: scoreToLevel(scores.O1) };
-  return {
-    difficulty, subScores, subLevels, subEvidence, scores, levels, judgeSource,
-    // Full analyzer payload (scores+advice) cached verbatim in llm_judge_v2.
-    llmJudge: llm || null,
-    // Raw advice + the meta the advice normaliser needs (job persists to llm_advice).
-    llmAdvice: llm && llm.advice ? llm.advice : null,
-    llmAdviceMeta: llm ? { msgCount: llm.msgCount, cli: llm.cli } : null,
-  };
-}
-function safe(fn) {
-  try { return fn(); }
-  catch (err) { console.warn('[scoring-v2] dimension error:', err.message); return null; }
-}
-module.exports = { analyzeSessionV2, rollupDimension };
+'use strict';
+const { classifySession } = require('./difficulty');
+const { analyzeSessionLLM } = require('../llm/session-analyzer');
+const { dimensionSource, mergeIndicator } = require('./dimensions/llm-merge');
+const { rollupDimension, scoreToLevel } = require('./thresholds-v2');
+/**
+ * Score a single session through the v2.1 dimensions.
+ * ONE consolidated LLM call per session returns BOTH the dimension scores
+ * AND the collaboration advice.  All difficulties (including trivial) are
+ * assessed; the prompt is difficulty-conditioned so short sessions are
+ * graded on an easier curve.
+ *
+ * @param {object} db
+ * @param {object} session
+ * @param {object} [opts]   { force?: boolean }  bypass the analyzer cache
+ */
+async function analyzeSessionV2(db, session, opts = {}) {
+  const difficulty = classifySession(session);
+  // One consolidated LLM call → { scores, advice } (null if disabled/failed).
+  let llm = null;
+  try { llm = await analyzeSessionLLM(db, session, { force: opts.force === true }); }
+  catch (_) { llm = null; }
+  const cells = (llm && llm.scores) || {};
+  // All five dimensions are fully LLM-judged — no rule fallback.  A
+  // sub-indicator the LLM can't score stays null ("未评估") rather than
+  // degrading to a keyword heuristic.
+  const h1  = safe(() => buildLlmDimension(cells.H1, H1_LABELS, 'H1'));
+  const h2  = safe(() => buildLlmDimension(cells.H2, H2_LABELS, 'H2'));
+  const h3  = safe(() => buildLlmDimension(normaliseH3(cells.H3), H3_LABELS, 'H3'));
+  // ENV — merged AI-capability environment (knowledge / tooling / currency).
+  // Persisted into the legacy E1 slot (E2 retired).
+  const env = safe(() => buildLlmDimension(cells.ENV, ENV_LABELS, 'ENV'));
+  const o1  = safe(() => buildLlmDimension(cells.O1, O1_LABELS, 'O1'));
+  const judgeSrc = dimensionSource([h1, h2, h3, env, o1].map((r) => r && r.judgeSource));
+  return assemble(difficulty, { h1, h2, h3, e1: env, e2: null, o1 }, llm, judgeSrc);
+}
+const H1_LABELS  = { clarity: '初始指令清晰度', converge: '任务收敛轮次', drift: '方向变更次数' };
+const H2_LABELS  = { challenge: '合理质疑率', override: '推翻率', accept_rate: '顺从/采纳判断' };
+const O1_LABELS  = { first_take: '一次采纳率', code_style: '代码规范性', completeness: '方案完备性' };
+const H3_LABELS  = { abstraction: '抽象层级', reuse: '复用意识', standard: '规范约束' };
+const ENV_LABELS = { knowledge: '知识覆盖', tooling: '工具运用', currency: '时效性' };
+/** Tolerate the older single-cell H3 shape ({system:{…}}). */
+function normaliseH3(group) {
+  const g = group || {};
+  if (g.system && !g.abstraction) g.abstraction = g.system;
+  return g;
+}
+/**
+ * Build a fully LLM-judged dimension from a group of sub-indicator cells
+ * (no rule fallback — these dimensions have no single-session structural
+ * proxy).  Returns null when the LLM scored none of them.
+ *
+ * @param {object|null} group   { <subKey>: {score,confidence,evidence}, … }
+ * @param {object} labels       { <subKey>: 中文label }
+ * @param {string} dimKey       rollup weights key (H3 / ENV)
+ */
+function buildLlmDimension(group, labels, dimKey) {
+  const g = group || {};
+  const subScores = {};
+  const subLevels = {};
+  const subEvidence = {};
+  const sources = [];
+  let any = false;
+  for (const k of Object.keys(labels)) {
+    const m = mergeIndicator(g[k], null, null);
+    subScores[k] = m.score;
+    subLevels[k] = m.level;
+    subEvidence[k] = {
+      key: k, label: labels[k],
+      what: m.evidence || '未发现相关证据',
+      score: m.score, level: m.level,
+      source: m.source,
+    };
+    sources.push(m.source);
+    if (m.score != null) any = true;
+  }
+  if (!any) return null;
+  const score = rollupDimension(dimKey, subScores);
+  return {
+    subScores,
+    subLevels,
+    subEvidence,
+    score,
+    level: scoreToLevel(score),
+    judgeSource: dimensionSource(sources),
+  };
+}
+/** Build the standard result object from the per-dimension scorer outputs. */
+function assemble(difficulty, dims, llm, judgeSource) {
+  const { h1, h2, h3, e1, e2, o1 } = dims;
+  const pick = (r, f) => (r ? r[f] : null);
+  const subScores = { H1: pick(h1, 'subScores'), H2: pick(h2, 'subScores'), H3: pick(h3, 'subScores'), E1: pick(e1, 'subScores'), E2: pick(e2, 'subScores'), O1: pick(o1, 'subScores') };
+  const subLevels = { H1: pick(h1, 'subLevels'), H2: pick(h2, 'subLevels'), H3: pick(h3, 'subLevels'), E1: pick(e1, 'subLevels'), E2: pick(e2, 'subLevels'), O1: pick(o1, 'subLevels') };
+  const subEvidence = { H1: pick(h1, 'subEvidence'), H2: pick(h2, 'subEvidence'), H3: pick(h3, 'subEvidence'), E1: pick(e1, 'subEvidence'), E2: pick(e2, 'subEvidence'), O1: pick(o1, 'subEvidence') };
+  const scores = { H1: pick(h1, 'score'), H2: pick(h2, 'score'), H3: pick(h3, 'score'), E1: pick(e1, 'score'), E2: pick(e2, 'score'), O1: pick(o1, 'score') };
+  const levels = { H1: scoreToLevel(scores.H1), H2: scoreToLevel(scores.H2), H3: scoreToLevel(scores.H3), E1: scoreToLevel(scores.E1), E2: scoreToLevel(scores.E2), O1: scoreToLevel(scores.O1) };
+  return {
+    difficulty, subScores, subLevels, subEvidence, scores, levels, judgeSource,
+    // Full analyzer payload (scores+advice) cached verbatim in llm_judge_v2.
+    llmJudge: llm || null,
+    // Raw advice + the meta the advice normaliser needs (job persists to llm_advice).
+    llmAdvice: llm && llm.advice ? llm.advice : null,
+    llmAdviceMeta: llm ? { msgCount: llm.msgCount, cli: llm.cli } : null,
+  };
+}
+function safe(fn) {
+  try { return fn(); }
+  catch (err) { console.warn('[scoring-v2] dimension error:', err.message); return null; }
+}
+module.exports = { analyzeSessionV2, rollupDimension };