agentboss 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,72 +1,126 @@
1
- 'use strict';
2
-
3
- const { classifySession } = require('./difficulty');
4
- const problemDef = require('./dimensions/problem-definition');
5
- const judgement = require('./dimensions/judgement');
6
- const aiTools = require('./dimensions/ai-tools');
7
- const aiKnowledge = require('./dimensions/ai-knowledge');
8
- const outputQuality = require('./dimensions/output-quality');
9
- const { analyzeSessionLLM } = require('../llm/session-analyzer');
10
- const { dimensionSource } = require('./dimensions/llm-merge');
11
- const { rollupDimension, scoreToLevel } = require('./thresholds-v2');
12
-
13
- /**
14
- * Score a single session through the v2.1 dimensions.
15
- * ONE consolidated LLM call per session returns BOTH the dimension scores
16
- * AND the collaboration advice. All difficulties (including trivial) are
17
- * assessed; the prompt is difficulty-conditioned so short sessions are
18
- * graded on an easier curve.
19
- *
20
- * @param {object} db
21
- * @param {object} session
22
- * @param {object} [opts] { force?: boolean } bypass the analyzer cache
23
- */
24
- async function analyzeSessionV2(db, session, opts = {}) {
25
- const difficulty = classifySession(session);
26
-
27
- // E2 (tool coverage) is structural — always computed.
28
- const e2 = safe(() => aiTools.analyze(db, session, difficulty.bucket));
29
-
30
- // One consolidated LLM call → { scores, advice } (null if disabled/failed).
31
- let llm = null;
32
- try { llm = await analyzeSessionLLM(db, session, { force: opts.force === true }); }
33
- catch (_) { llm = null; }
34
- const cells = (llm && llm.scores) || {};
35
-
36
- const h1 = safe(() => problemDef.analyze(db, session, difficulty.bucket, cells.H1));
37
- const h2 = safe(() => judgement.analyze(db, session, difficulty.bucket, cells.H2));
38
- const e1 = safe(() => aiKnowledge.analyze(db, session, difficulty.bucket, cells.E1));
39
- const o1 = safe(() => outputQuality.analyze(db, session, difficulty.bucket, cells.O1));
40
-
41
- const judgeSrc = dimensionSource([h1, h2, e1, o1].map((r) => r && r.judgeSource));
42
- return assemble(difficulty, { h1, h2, e1, e2, o1 }, llm, judgeSrc);
43
- }
44
-
45
- /** Build the standard result object from the per-dimension scorer outputs. */
46
- function assemble(difficulty, dims, llm, judgeSource) {
47
- const { h1, h2, e1, e2, o1 } = dims;
48
- const pick = (r, f) => (r ? r[f] : null);
49
-
50
- const subScores = { H1: pick(h1, 'subScores'), H2: pick(h2, 'subScores'), H3: null, E1: pick(e1, 'subScores'), E2: pick(e2, 'subScores'), O1: pick(o1, 'subScores') };
51
- const subLevels = { H1: pick(h1, 'subLevels'), H2: pick(h2, 'subLevels'), H3: null, E1: pick(e1, 'subLevels'), E2: pick(e2, 'subLevels'), O1: pick(o1, 'subLevels') };
52
- const subEvidence = { H1: pick(h1, 'subEvidence'), H2: pick(h2, 'subEvidence'), H3: null, E1: pick(e1, 'subEvidence'), E2: pick(e2, 'subEvidence'), O1: pick(o1, 'subEvidence') };
53
-
54
- const scores = { H1: pick(h1, 'score'), H2: pick(h2, 'score'), H3: null, E1: pick(e1, 'score'), E2: pick(e2, 'score'), O1: pick(o1, 'score') };
55
- const levels = { H1: scoreToLevel(scores.H1), H2: scoreToLevel(scores.H2), H3: null, E1: scoreToLevel(scores.E1), E2: scoreToLevel(scores.E2), O1: scoreToLevel(scores.O1) };
56
-
57
- return {
58
- difficulty, subScores, subLevels, subEvidence, scores, levels, judgeSource,
59
- // Full analyzer payload (scores+advice) cached verbatim in llm_judge_v2.
60
- llmJudge: llm || null,
61
- // Raw advice + the meta the advice normaliser needs (job persists to llm_advice).
62
- llmAdvice: llm && llm.advice ? llm.advice : null,
63
- llmAdviceMeta: llm ? { msgCount: llm.msgCount, cli: llm.cli } : null,
64
- };
65
- }
66
-
67
- function safe(fn) {
68
- try { return fn(); }
69
- catch (err) { console.warn('[scoring-v2] dimension error:', err.message); return null; }
70
- }
71
-
72
- module.exports = { analyzeSessionV2, rollupDimension };
1
+ 'use strict';
2
+
3
+ const { classifySession } = require('./difficulty');
4
+ const { analyzeSessionLLM } = require('../llm/session-analyzer');
5
+ const { dimensionSource, mergeIndicator } = require('./dimensions/llm-merge');
6
+ const { rollupDimension, scoreToLevel } = require('./thresholds-v2');
7
+
8
+ /**
9
+ * Score a single session through the v2.1 dimensions.
10
+ * ONE consolidated LLM call per session returns BOTH the dimension scores
11
+ * AND the collaboration advice. All difficulties (including trivial) are
12
+ * assessed; the prompt is difficulty-conditioned so short sessions are
13
+ * graded on an easier curve.
14
+ *
15
+ * @param {object} db
16
+ * @param {object} session
17
+ * @param {object} [opts] { force?: boolean } bypass the analyzer cache
18
+ */
19
+ async function analyzeSessionV2(db, session, opts = {}) {
20
+ const difficulty = classifySession(session);
21
+
22
+ // One consolidated LLM call → { scores, advice } (null if disabled/failed).
23
+ let llm = null;
24
+ try { llm = await analyzeSessionLLM(db, session, { force: opts.force === true }); }
25
+ catch (_) { llm = null; }
26
+ const cells = (llm && llm.scores) || {};
27
+
28
+ // All five dimensions are fully LLM-judged — no rule fallback. A
29
+ // sub-indicator the LLM can't score stays null ("未评估") rather than
30
+ // degrading to a keyword heuristic.
31
+ const h1 = safe(() => buildLlmDimension(cells.H1, H1_LABELS, 'H1'));
32
+ const h2 = safe(() => buildLlmDimension(cells.H2, H2_LABELS, 'H2'));
33
+ const h3 = safe(() => buildLlmDimension(normaliseH3(cells.H3), H3_LABELS, 'H3'));
34
+ // ENV merged AI-capability environment (knowledge / tooling / currency).
35
+ // Persisted into the legacy E1 slot (E2 retired).
36
+ const env = safe(() => buildLlmDimension(cells.ENV, ENV_LABELS, 'ENV'));
37
+ const o1 = safe(() => buildLlmDimension(cells.O1, O1_LABELS, 'O1'));
38
+
39
+ const judgeSrc = dimensionSource([h1, h2, h3, env, o1].map((r) => r && r.judgeSource));
40
+ return assemble(difficulty, { h1, h2, h3, e1: env, e2: null, o1 }, llm, judgeSrc);
41
+ }
42
+
43
+ const H1_LABELS = { clarity: '初始指令清晰度', converge: '任务收敛轮次', drift: '方向变更次数' };
44
+ const H2_LABELS = { challenge: '合理质疑率', override: '推翻率', accept_rate: '顺从/采纳判断' };
45
+ const O1_LABELS = { first_take: '一次采纳率', code_style: '代码规范性', completeness: '方案完备性' };
46
+ const H3_LABELS = { abstraction: '抽象层级', reuse: '复用意识', standard: '规范约束' };
47
+ const ENV_LABELS = { knowledge: '知识覆盖', tooling: '工具运用', currency: '时效性' };
48
+
49
+ /** Tolerate the older single-cell H3 shape ({system:{…}}). */
50
+ function normaliseH3(group) {
51
+ const g = group || {};
52
+ if (g.system && !g.abstraction) g.abstraction = g.system;
53
+ return g;
54
+ }
55
+
56
+ /**
57
+ * Build a fully LLM-judged dimension from a group of sub-indicator cells
58
+ * (no rule fallback these dimensions have no single-session structural
59
+ * proxy). Returns null when the LLM scored none of them.
60
+ *
61
+ * @param {object|null} group { <subKey>: {score,confidence,evidence}, }
62
+ * @param {object} labels { <subKey>: 中文label }
63
+ * @param {string} dimKey rollup weights key (H3 / ENV)
64
+ */
65
+ function buildLlmDimension(group, labels, dimKey) {
66
+ const g = group || {};
67
+ const subScores = {};
68
+ const subLevels = {};
69
+ const subEvidence = {};
70
+ const sources = [];
71
+ let any = false;
72
+
73
+ for (const k of Object.keys(labels)) {
74
+ const m = mergeIndicator(g[k], null, null);
75
+ subScores[k] = m.score;
76
+ subLevels[k] = m.level;
77
+ subEvidence[k] = {
78
+ key: k, label: labels[k],
79
+ what: m.evidence || '未发现相关证据',
80
+ score: m.score, level: m.level,
81
+ source: m.source,
82
+ };
83
+ sources.push(m.source);
84
+ if (m.score != null) any = true;
85
+ }
86
+ if (!any) return null;
87
+
88
+ const score = rollupDimension(dimKey, subScores);
89
+ return {
90
+ subScores,
91
+ subLevels,
92
+ subEvidence,
93
+ score,
94
+ level: scoreToLevel(score),
95
+ judgeSource: dimensionSource(sources),
96
+ };
97
+ }
98
+
99
+ /** Build the standard result object from the per-dimension scorer outputs. */
100
+ function assemble(difficulty, dims, llm, judgeSource) {
101
+ const { h1, h2, h3, e1, e2, o1 } = dims;
102
+ const pick = (r, f) => (r ? r[f] : null);
103
+
104
+ const subScores = { H1: pick(h1, 'subScores'), H2: pick(h2, 'subScores'), H3: pick(h3, 'subScores'), E1: pick(e1, 'subScores'), E2: pick(e2, 'subScores'), O1: pick(o1, 'subScores') };
105
+ const subLevels = { H1: pick(h1, 'subLevels'), H2: pick(h2, 'subLevels'), H3: pick(h3, 'subLevels'), E1: pick(e1, 'subLevels'), E2: pick(e2, 'subLevels'), O1: pick(o1, 'subLevels') };
106
+ const subEvidence = { H1: pick(h1, 'subEvidence'), H2: pick(h2, 'subEvidence'), H3: pick(h3, 'subEvidence'), E1: pick(e1, 'subEvidence'), E2: pick(e2, 'subEvidence'), O1: pick(o1, 'subEvidence') };
107
+
108
+ const scores = { H1: pick(h1, 'score'), H2: pick(h2, 'score'), H3: pick(h3, 'score'), E1: pick(e1, 'score'), E2: pick(e2, 'score'), O1: pick(o1, 'score') };
109
+ const levels = { H1: scoreToLevel(scores.H1), H2: scoreToLevel(scores.H2), H3: scoreToLevel(scores.H3), E1: scoreToLevel(scores.E1), E2: scoreToLevel(scores.E2), O1: scoreToLevel(scores.O1) };
110
+
111
+ return {
112
+ difficulty, subScores, subLevels, subEvidence, scores, levels, judgeSource,
113
+ // Full analyzer payload (scores+advice) cached verbatim in llm_judge_v2.
114
+ llmJudge: llm || null,
115
+ // Raw advice + the meta the advice normaliser needs (job persists to llm_advice).
116
+ llmAdvice: llm && llm.advice ? llm.advice : null,
117
+ llmAdviceMeta: llm ? { msgCount: llm.msgCount, cli: llm.cli } : null,
118
+ };
119
+ }
120
+
121
+ function safe(fn) {
122
+ try { return fn(); }
123
+ catch (err) { console.warn('[scoring-v2] dimension error:', err.message); return null; }
124
+ }
125
+
126
+ module.exports = { analyzeSessionV2, rollupDimension };