agentboss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,72 +1,122 @@
1
- 'use strict';
2
-
3
- const { classifySession } = require('./difficulty');
4
- const problemDef = require('./dimensions/problem-definition');
5
- const judgement = require('./dimensions/judgement');
6
- const aiTools = require('./dimensions/ai-tools');
7
- const aiKnowledge = require('./dimensions/ai-knowledge');
8
- const outputQuality = require('./dimensions/output-quality');
9
- const { analyzeSessionLLM } = require('../llm/session-analyzer');
10
- const { dimensionSource } = require('./dimensions/llm-merge');
11
- const { rollupDimension, scoreToLevel } = require('./thresholds-v2');
12
-
13
- /**
14
- * Score a single session through the v2.1 dimensions.
15
- * ONE consolidated LLM call per session returns BOTH the dimension scores
16
- * AND the collaboration advice. All difficulties (including trivial) are
17
- * assessed; the prompt is difficulty-conditioned so short sessions are
18
- * graded on an easier curve.
19
- *
20
- * @param {object} db
21
- * @param {object} session
22
- * @param {object} [opts] { force?: boolean } bypass the analyzer cache
23
- */
24
- async function analyzeSessionV2(db, session, opts = {}) {
25
- const difficulty = classifySession(session);
26
-
27
- // E2 (tool coverage) is structural always computed.
28
- const e2 = safe(() => aiTools.analyze(db, session, difficulty.bucket));
29
-
30
- // One consolidated LLM call → { scores, advice } (null if disabled/failed).
31
- let llm = null;
32
- try { llm = await analyzeSessionLLM(db, session, { force: opts.force === true }); }
33
- catch (_) { llm = null; }
34
- const cells = (llm && llm.scores) || {};
35
-
36
- const h1 = safe(() => problemDef.analyze(db, session, difficulty.bucket, cells.H1));
37
- const h2 = safe(() => judgement.analyze(db, session, difficulty.bucket, cells.H2));
38
- const e1 = safe(() => aiKnowledge.analyze(db, session, difficulty.bucket, cells.E1));
39
- const o1 = safe(() => outputQuality.analyze(db, session, difficulty.bucket, cells.O1));
40
-
41
- const judgeSrc = dimensionSource([h1, h2, e1, o1].map((r) => r && r.judgeSource));
42
- return assemble(difficulty, { h1, h2, e1, e2, o1 }, llm, judgeSrc);
43
- }
44
-
45
- /** Build the standard result object from the per-dimension scorer outputs. */
46
- function assemble(difficulty, dims, llm, judgeSource) {
47
- const { h1, h2, e1, e2, o1 } = dims;
48
- const pick = (r, f) => (r ? r[f] : null);
49
-
50
- const subScores = { H1: pick(h1, 'subScores'), H2: pick(h2, 'subScores'), H3: null, E1: pick(e1, 'subScores'), E2: pick(e2, 'subScores'), O1: pick(o1, 'subScores') };
51
- const subLevels = { H1: pick(h1, 'subLevels'), H2: pick(h2, 'subLevels'), H3: null, E1: pick(e1, 'subLevels'), E2: pick(e2, 'subLevels'), O1: pick(o1, 'subLevels') };
52
- const subEvidence = { H1: pick(h1, 'subEvidence'), H2: pick(h2, 'subEvidence'), H3: null, E1: pick(e1, 'subEvidence'), E2: pick(e2, 'subEvidence'), O1: pick(o1, 'subEvidence') };
53
-
54
- const scores = { H1: pick(h1, 'score'), H2: pick(h2, 'score'), H3: null, E1: pick(e1, 'score'), E2: pick(e2, 'score'), O1: pick(o1, 'score') };
55
- const levels = { H1: scoreToLevel(scores.H1), H2: scoreToLevel(scores.H2), H3: null, E1: scoreToLevel(scores.E1), E2: scoreToLevel(scores.E2), O1: scoreToLevel(scores.O1) };
56
-
57
- return {
58
- difficulty, subScores, subLevels, subEvidence, scores, levels, judgeSource,
59
- // Full analyzer payload (scores+advice) cached verbatim in llm_judge_v2.
60
- llmJudge: llm || null,
61
- // Raw advice + the meta the advice normaliser needs (job persists to llm_advice).
62
- llmAdvice: llm && llm.advice ? llm.advice : null,
63
- llmAdviceMeta: llm ? { msgCount: llm.msgCount, cli: llm.cli } : null,
64
- };
65
- }
66
-
67
- function safe(fn) {
68
- try { return fn(); }
69
- catch (err) { console.warn('[scoring-v2] dimension error:', err.message); return null; }
70
- }
71
-
72
- module.exports = { analyzeSessionV2, rollupDimension };
1
+ 'use strict';
2
+
3
+ const { classifySession } = require('./difficulty');
4
+ const problemDef = require('./dimensions/problem-definition');
5
+ const judgement = require('./dimensions/judgement');
6
+ const outputQuality = require('./dimensions/output-quality');
7
+ const { analyzeSessionLLM } = require('../llm/session-analyzer');
8
+ const { dimensionSource, mergeIndicator } = require('./dimensions/llm-merge');
9
+ const { rollupDimension, scoreToLevel } = require('./thresholds-v2');
10
+
11
+ /**
12
+ * Score a single session through the v2.1 dimensions.
13
+ * ONE consolidated LLM call per session returns BOTH the dimension scores
14
+ * AND the collaboration advice. All difficulties (including trivial) are
15
+ * assessed; the prompt is difficulty-conditioned so short sessions are
16
+ * graded on an easier curve.
17
+ *
18
+ * @param {object} db
19
+ * @param {object} session
20
+ * @param {object} [opts] { force?: boolean } bypass the analyzer cache
21
+ */
22
+ async function analyzeSessionV2(db, session, opts = {}) {
23
+ const difficulty = classifySession(session);
24
+
25
+ // One consolidated LLM call → { scores, advice } (null if disabled/failed).
26
+ let llm = null;
27
+ try { llm = await analyzeSessionLLM(db, session, { force: opts.force === true }); }
28
+ catch (_) { llm = null; }
29
+ const cells = (llm && llm.scores) || {};
30
+
31
+ const h1 = safe(() => problemDef.analyze(db, session, difficulty.bucket, cells.H1));
32
+ const h2 = safe(() => judgement.analyze(db, session, difficulty.bucket, cells.H2));
33
+ const h3 = safe(() => buildLlmDimension(normaliseH3(cells.H3), H3_LABELS, 'H3'));
34
+ // ENV merged AI-capability environment (knowledge / tooling / currency),
35
+ // fully LLM-judged. Persisted into the legacy E1 slot (E2 retired).
36
+ const env = safe(() => buildLlmDimension(cells.ENV, ENV_LABELS, 'ENV'));
37
+ const o1 = safe(() => outputQuality.analyze(db, session, difficulty.bucket, cells.O1));
38
+
39
+ const judgeSrc = dimensionSource([h1, h2, h3, env, o1].map((r) => r && r.judgeSource));
40
+ return assemble(difficulty, { h1, h2, h3, e1: env, e2: null, o1 }, llm, judgeSrc);
41
+ }
42
+
43
+ const H3_LABELS = { abstraction: '抽象层级', reuse: '复用意识', standard: '规范约束' };
44
+ const ENV_LABELS = { knowledge: '知识覆盖', tooling: '工具运用', currency: '时效性' };
45
+
46
+ /** Tolerate the older single-cell H3 shape ({system:{…}}). */
47
+ function normaliseH3(group) {
48
+ const g = group || {};
49
+ if (g.system && !g.abstraction) g.abstraction = g.system;
50
+ return g;
51
+ }
52
+
53
+ /**
54
+ * Build a fully LLM-judged dimension from a group of sub-indicator cells
55
+ * (no rule fallback these dimensions have no single-session structural
56
+ * proxy). Returns null when the LLM scored none of them.
57
+ *
58
+ * @param {object|null} group { <subKey>: {score,confidence,evidence}, … }
59
+ * @param {object} labels { <subKey>: 中文label }
60
+ * @param {string} dimKey rollup weights key (H3 / ENV)
61
+ */
62
+ function buildLlmDimension(group, labels, dimKey) {
63
+ const g = group || {};
64
+ const subScores = {};
65
+ const subLevels = {};
66
+ const subEvidence = {};
67
+ const sources = [];
68
+ let any = false;
69
+
70
+ for (const k of Object.keys(labels)) {
71
+ const m = mergeIndicator(g[k], null, null);
72
+ subScores[k] = m.score;
73
+ subLevels[k] = m.level;
74
+ subEvidence[k] = {
75
+ key: k, label: labels[k],
76
+ what: m.evidence || '未发现相关证据',
77
+ score: m.score, level: m.level,
78
+ };
79
+ sources.push(m.source);
80
+ if (m.score != null) any = true;
81
+ }
82
+ if (!any) return null;
83
+
84
+ const score = rollupDimension(dimKey, subScores);
85
+ return {
86
+ subScores,
87
+ subLevels,
88
+ subEvidence,
89
+ score,
90
+ level: scoreToLevel(score),
91
+ judgeSource: dimensionSource(sources),
92
+ };
93
+ }
94
+
95
+ /** Build the standard result object from the per-dimension scorer outputs. */
96
+ function assemble(difficulty, dims, llm, judgeSource) {
97
+ const { h1, h2, h3, e1, e2, o1 } = dims;
98
+ const pick = (r, f) => (r ? r[f] : null);
99
+
100
+ const subScores = { H1: pick(h1, 'subScores'), H2: pick(h2, 'subScores'), H3: pick(h3, 'subScores'), E1: pick(e1, 'subScores'), E2: pick(e2, 'subScores'), O1: pick(o1, 'subScores') };
101
+ const subLevels = { H1: pick(h1, 'subLevels'), H2: pick(h2, 'subLevels'), H3: pick(h3, 'subLevels'), E1: pick(e1, 'subLevels'), E2: pick(e2, 'subLevels'), O1: pick(o1, 'subLevels') };
102
+ const subEvidence = { H1: pick(h1, 'subEvidence'), H2: pick(h2, 'subEvidence'), H3: pick(h3, 'subEvidence'), E1: pick(e1, 'subEvidence'), E2: pick(e2, 'subEvidence'), O1: pick(o1, 'subEvidence') };
103
+
104
+ const scores = { H1: pick(h1, 'score'), H2: pick(h2, 'score'), H3: pick(h3, 'score'), E1: pick(e1, 'score'), E2: pick(e2, 'score'), O1: pick(o1, 'score') };
105
+ const levels = { H1: scoreToLevel(scores.H1), H2: scoreToLevel(scores.H2), H3: scoreToLevel(scores.H3), E1: scoreToLevel(scores.E1), E2: scoreToLevel(scores.E2), O1: scoreToLevel(scores.O1) };
106
+
107
+ return {
108
+ difficulty, subScores, subLevels, subEvidence, scores, levels, judgeSource,
109
+ // Full analyzer payload (scores+advice) cached verbatim in llm_judge_v2.
110
+ llmJudge: llm || null,
111
+ // Raw advice + the meta the advice normaliser needs (job persists to llm_advice).
112
+ llmAdvice: llm && llm.advice ? llm.advice : null,
113
+ llmAdviceMeta: llm ? { msgCount: llm.msgCount, cli: llm.cli } : null,
114
+ };
115
+ }
116
+
117
+ function safe(fn) {
118
+ try { return fn(); }
119
+ catch (err) { console.warn('[scoring-v2] dimension error:', err.message); return null; }
120
+ }
121
+
122
+ module.exports = { analyzeSessionV2, rollupDimension };